pax_global_header00006660000000000000000000000064145644447620014532gustar00rootroot0000000000000052 comment=b545dba48639ac5ecb691f49fb2748c847cf5be9 LucenePlusPlus-rel_3.0.9/000077500000000000000000000000001456444476200153305ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/.gitignore000066400000000000000000000031741456444476200173250ustar00rootroot00000000000000*~ *.o *.aps *.tar.gz *.rar *.cmd *.suo *.ncb *.idb *.obj *.opt *.pch *.pyc *.log *.exe *.exp *.lib *.idb *.pdb *.ilk *.manifest *.user *.bak *.orig *.lock* *.waf* .DS_Store bin/* src/msvc/Debug DLL src/msvc/Debug Static src/msvc/Release DLL src/msvc/Release Static src/core/msvc/Debug DLL src/core/msvc/Debug Static src/core/msvc/Release DLL src/core/msvc/Release Static src/contrib/msvc/Debug DLL src/contrib/msvc/Debug Static src/contrib/msvc/Release DLL src/contrib/msvc/Release Static src/test/msvc/Debug DLL src/test/msvc/Debug Static src/test/msvc/Release DLL src/test/msvc/Release Static src/test/testfiles/temp src/demo/deletefiles/msvc/Release DLL src/demo/deletefiles/msvc/Release Static src/demo/deletefiles/msvc/Debug DLL src/demo/deletefiles/msvc/Debug Static src/demo/indexfiles/msvc/Release DLL src/demo/indexfiles/msvc/Release Static src/demo/indexfiles/msvc/Debug DLL src/demo/indexfiles/msvc/Debug Static src/demo/searchfiles/msvc/Release DLL src/demo/searchfiles/msvc/Release Static src/demo/searchfiles/msvc/Debug DLL src/demo/searchfiles/msvc/Debug Static CMakeCache.txt CMakeFiles/ build/ CTestTestfile.cmake Makefile cmake_install.cmake cmake_uninstall.cmake include/Config.h install_manifest.txt liblucene++-contrib.pc liblucene++.pc src/contrib/CMakeFiles/ src/contrib/CTestTestfile.cmake src/contrib/Makefile src/contrib/cmake_install.cmake src/core/CMakeFiles/ src/core/CTestTestfile.cmake src/core/Makefile src/core/cmake_install.cmake src/demo/CMakeFiles/ src/demo/CTestTestfile.cmake src/demo/Makefile src/demo/cmake_install.cmake src/test/CMakeFiles/ src/test/CTestTestfile.cmake src/test/Makefile src/test/cmake_install.cmake LucenePlusPlus-rel_3.0.9/APACHE.license000066400000000000000000000261361456444476200176250ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. LucenePlusPlus-rel_3.0.9/AUTHORS000066400000000000000000000000571456444476200164020ustar00rootroot00000000000000Alan Wright Ben van Klinken Jamie Kirkpatrick LucenePlusPlus-rel_3.0.9/CMakeLists.txt000066400000000000000000000065471456444476200201040ustar00rootroot00000000000000#################################### # init #################################### cmake_minimum_required(VERSION 3.5) project(lucene++) set(lucene++_VERSION_MAJOR 3) set(lucene++_VERSION_MINOR 0) set(lucene++_VERSION_PATCH 9) set(lucene++_SOVERSION "0") set(lucene++_VERSION "${lucene++_VERSION_MAJOR}.${lucene++_VERSION_MINOR}.${lucene++_VERSION_PATCH}") # set default build type as release if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release") endif() set(LIB_DESTINATION "${CMAKE_INSTALL_LIBDIR}" CACHE STRING "Define lib output directory name") #################################### # CMake Modules #################################### # include specific modules set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") include(options.cmake) # pre-compiled headers support include(cotire) # if setup using the Toolchain-llvm.cmake file, then use llvm... if(ENABLE_LLVM) include(Toolchain-llvm) endif() # fetch dependencies include(dependencies) # build docs include(Lucene++Docs) # Enable C++11 set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) #################################### # platform specific options #################################### if(WIN32 OR WIN64) set(CMAKE_DEBUG_POSTFIX "d") endif() if(MSVC) # Disable automatic boost linking on Windows as libraries are added to the linker explicitly add_definitions(-DBOOST_ALL_NO_LIB) # enable exceptions, see http://msdn.microsoft.com/en-us/library/1deeycx5.aspx add_definitions(-EHsc) # Disable including too many Windows headers add_definitions(-DWIN32_LEAN_AND_MEAN) # Disable the min/max macros that conflict with std::min/std::max add_definitions(-DNOMINMAX) endif() if(NOT WIN32 AND NOT CMAKE_SYSTEM MATCHES "SunOS-5*.") set(CMAKE_POSITION_INDEPENDENT_CODE ON) endif() if(CYGWIN) add_definitions(-D__LARGE64_FILES) endif() if(APPLE) set(CMAKE_MACOSX_RPATH ON) set(CMAKE_SKIP_BUILD_RPATH FALSE) set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir) if("${isSystemDir}" STREQUAL "-1") set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") endif() endif() #################################### # custom targets #################################### configure_file( "${CMAKE_MODULE_PATH}/cmake_uninstall.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" IMMEDIATE @ONLY ) add_custom_target( uninstall "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" VERBATIM ) if(ENABLE_PACKAGING) include(CreateLucene++Packages) endif() #################################### # bootstrap #################################### include(TestCXXAcceptsFlag) include(GNUInstallDirs) add_subdirectory(include) add_subdirectory(src) message("\n\n** Build Summary **") message(" Version: ${lucene++_VERSION}") message(" Prefix: ${CMAKE_INSTALL_PREFIX}") message(" Build Type: ${CMAKE_BUILD_TYPE}") message(" Architecture: ${CMAKE_SYSTEM_PROCESSOR}") message(" System: ${CMAKE_SYSTEM_NAME}") message(" Boost Include: ${Boost_INCLUDE_DIRS}") message(" Boost Libraries: ${Boost_LIBRARY_DIRS}") message(" Zlib Include: ${ZLIB_INCLUDE_DIRS}") message(" Zlib Library: ${ZLIB_LIBRARY_RELEASE}") LucenePlusPlus-rel_3.0.9/COPYING000066400000000000000000000070631456444476200163710ustar00rootroot00000000000000This source code is dual-licensed. ================================== LGPL: This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. See the file LGPL.licence Apache 2.0: Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. See the file APACHE.licence Notes regarding licensing glib code. ==================================== Files: src/core/util/unicode/* Some portions of glib code have been used in Lucene++, a project spawned from CLucene, with special permission from the author(s). This is the email exchange that took place in 2006 between Ben van Klinken, Owen Taylor and Tom Tromey: ---------- Forwarded message ---------- From: Owen Taylor Date: 11 February 2006 04:48 Subject: Re: Fwd: glib licensing To: Ben van Klinken Cc: tromey@redhat.com On Fri, 2006-02-10 at 18:34 +0100, Ben van Klinken wrote: > > Hi Owen, > > I am the author and maintainer of CLucene (clucene.sourceforge.net). I > was forwarded to you by Matthias Classen. We have an enquiry about > licensing of glib: > > CLucene was licensed LGPL, but recently we changed our license to > allow licensing under apache or LGPL. During the audit of the change, > we made a mistake with some of the code (code in the > gunichartables.cpp - various utf8<>wchar conversion functions) to be > licensed apache, since some of the functions are from the glib > library. The file in question contains various functions from the > http://cvs.gnome.org/viewcvs/glib/glib/ directory. > > We are working on fixing this and are exploring several options. When > discussing the issue on our mailing list, one suggestion was to > enquire with you what the chances of re-licensing some of the glib > unicode functions under > the apache license would be? I believe you were the author of glib > unicode support? I'd have to know more specifically what particular portions of the GLib code are involved; can you be more specific about particular functions and code portions? while I did much of the work, there have been additions made later on by other people, and a good portion of the code derives originally from libunicode by Tom Tromey. (Cc'ed) For the portions that are actually my code I don't have any objection to them also being licensed under the Apache license ... it's pretty much straightforward implementations of algorithms from the Unicode standard, and other implementations are widely available in many forms. Regards, Owen ---------- Forwarded message ---------- From: Tom Tromey Date: 25 April 2006 02:42 Subject: Re: Fwd: glib licensing To: Ben van Klinken Cc: Owen Taylor Ben> All the code has Tom Tromey's name at the top. Please let me know if Ben> you need any other information. I was wondering ... if other people made substantial contributions after this code was written, wouldn't they also be copyright holders? You'd have to use the cvs history to see this. Ben> I don't think we need to do anything other for you to agree with this Ben> licensing in this email correspondence. So unless you can shed any Ben> more light on this process and if you agree to this licensing, i will Ben> append the apache license to the top of our files with a note that it Ben> has been licensed with your approval. It is fine by me. Tom LucenePlusPlus-rel_3.0.9/GPL.license000066400000000000000000001045131456444476200173220ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . LucenePlusPlus-rel_3.0.9/LGPL.license000066400000000000000000000167411456444476200174430ustar00rootroot00000000000000 GNU LESSER GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. This version of the GNU Lesser General Public License incorporates the terms and conditions of version 3 of the GNU General Public License, supplemented by the additional permissions listed below. 0. Additional Definitions. As used herein, "this License" refers to version 3 of the GNU Lesser General Public License, and the "GNU GPL" refers to version 3 of the GNU General Public License. "The Library" refers to a covered work governed by this License, other than an Application or a Combined Work as defined below. An "Application" is any work that makes use of an interface provided by the Library, but which is not otherwise based on the Library. Defining a subclass of a class defined by the Library is deemed a mode of using an interface provided by the Library. A "Combined Work" is a work produced by combining or linking an Application with the Library. The particular version of the Library with which the Combined Work was made is also called the "Linked Version". The "Minimal Corresponding Source" for a Combined Work means the Corresponding Source for the Combined Work, excluding any source code for portions of the Combined Work that, considered in isolation, are based on the Application, and not on the Linked Version. The "Corresponding Application Code" for a Combined Work means the object code and/or source code for the Application, including any data and utility programs needed for reproducing the Combined Work from the Application, but excluding the System Libraries of the Combined Work. 1. Exception to Section 3 of the GNU GPL. You may convey a covered work under sections 3 and 4 of this License without being bound by section 3 of the GNU GPL. 2. Conveying Modified Versions. If you modify a copy of the Library, and, in your modifications, a facility refers to a function or data to be supplied by an Application that uses the facility (other than as an argument passed when the facility is invoked), then you may convey a copy of the modified version: a) under this License, provided that you make a good faith effort to ensure that, in the event an Application does not supply the function or data, the facility still operates, and performs whatever part of its purpose remains meaningful, or b) under the GNU GPL, with none of the additional permissions of this License applicable to that copy. 3. Object Code Incorporating Material from Library Header Files. The object code form of an Application may incorporate material from a header file that is part of the Library. You may convey such object code under terms of your choice, provided that, if the incorporated material is not limited to numerical parameters, data structure layouts and accessors, or small macros, inline functions and templates (ten or fewer lines in length), you do both of the following: a) Give prominent notice with each copy of the object code that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the object code with a copy of the GNU GPL and this license document. 4. Combined Works. You may convey a Combined Work under terms of your choice that, taken together, effectively do not restrict modification of the portions of the Library contained in the Combined Work and reverse engineering for debugging such modifications, if you also do each of the following: a) Give prominent notice with each copy of the Combined Work that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the Combined Work with a copy of the GNU GPL and this license document. c) For a Combined Work that displays copyright notices during execution, include the copyright notice for the Library among these notices, as well as a reference directing the user to the copies of the GNU GPL and this license document. d) Do one of the following: 0) Convey the Minimal Corresponding Source under the terms of this License, and the Corresponding Application Code in a form suitable for, and under terms that permit, the user to recombine or relink the Application with a modified version of the Linked Version to produce a modified Combined Work, in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source. 1) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (a) uses at run time a copy of the Library already present on the user's computer system, and (b) will operate properly with a modified version of the Library that is interface-compatible with the Linked Version. e) Provide Installation Information, but only if you would otherwise be required to provide such information under section 6 of the GNU GPL, and only to the extent that such information is necessary to install and execute a modified version of the Combined Work produced by recombining or relinking the Application with a modified version of the Linked Version. (If you use option 4d0, the Installation Information must accompany the Minimal Corresponding Source and Corresponding Application Code. If you use option 4d1, you must provide the Installation Information in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.) 5. Combined Libraries. You may place library facilities that are a work based on the Library side by side in a single library together with other library facilities that are not Applications and are not covered by this License, and convey such a combined library under terms of your choice, if you do both of the following: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities, conveyed under the terms of this License. b) Give prominent notice with the combined library that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 6. Revised Versions of the GNU Lesser General Public License. The Free Software Foundation may publish revised and/or new versions of the GNU Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library as you received it specifies that a certain numbered version of the GNU Lesser General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that published version or of any later version published by the Free Software Foundation. If the Library as you received it does not specify a version number of the GNU Lesser General Public License, you may choose any version of the GNU Lesser General Public License ever published by the Free Software Foundation. If the Library as you received it specifies that a proxy can decide whether future versions of the GNU Lesser General Public License shall apply, that proxy's public statement of acceptance of any version is permanent authorization for you to choose that version for the Library. LucenePlusPlus-rel_3.0.9/README.PACKAGE000066400000000000000000000001621456444476200172410ustar00rootroot00000000000000An up to date C++ port of the popular Java Lucene library, a high-performance, full-featured text search engine. LucenePlusPlus-rel_3.0.9/README.md000066400000000000000000000043031456444476200166070ustar00rootroot00000000000000Lucene++ ========== Welcome to lucene++ version **3.0.9**. Lucene++ is a C++ port of the popular Java [Lucene](http://lucene.apache.org/) library, a high-performance, full-featured text search engine. Lucene++ Components ---------------- - liblucene++ library - liblucene++-contrib library - lucene++-tester (unit tester) - deletefiles (demo) - indexfiles (demo) - searchfiles (demo) For information on building the Lucene++ suite, please read doc/BUILDING.md Useful Resources ---------------- Official [Java Lucene](http://lucene.apache.org/java/docs/index.html) - useful links and documentation relevant to Lucene and lucene++. [Lucene in Action](https://www.amazon.com/dp/1932394281/) by Otis Gospodnetic and Erik Hatcher. To run unit test suite ---------------------- lucene_tester is built using the [Google Testing Framework](https://code.google.com/p/googletest/). you can run the test suite on unix with the following command run from the repository root:: ``` $ build/src/test/lucene++-tester ``` the test suite can also be run from the repository root on NT systems, but the required DLL files must manually be copied into the test binary path before executing, otherwise you will recieve errors telling you that required libraries cannot be found. ``` $ build/src/test/lucene++-tester ``` Command options can be discovered by supplying `--help`. To run the demos ---------------- Start by indexing a directory of files - open a command prompt and run ``` ./indexfiles ``` Once the indexer has finished, you can query the index using searchfiles ``` ./searchfiles -index ``` This uses an interactive command for you to enter queries, type a query to search the index press enter and you'll see the results. Acknowledgements ---------------- - Ben van Klinken and contributors to the CLucene project for inspiring this project. - md5 Copyright (C) 1999, 2000, 2002 Aladdin Enterprises - `Unicode character properties (guniprop)[http://library.gnome.org/devel/glib/] Copyright (C) 1999 Tom Tromey, Copyright (C) 2000 Red Hat, Inc. - `Cotire (compile time reducer)[https://github.com/sakra/cotire] by Sascha Kratky. LucenePlusPlus-rel_3.0.9/REQUESTS000066400000000000000000000000751456444476200165300ustar00rootroot00000000000000See https://github.com/luceneplusplus/LucenePlusPlus/issues LucenePlusPlus-rel_3.0.9/cmake/000077500000000000000000000000001456444476200164105ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/cmake/CreateLucene++Packages.cmake000066400000000000000000000064431456444476200235450ustar00rootroot00000000000000#Creates all the relevant packages set(CPACK_PACKAGE_VERSION_MAJOR ${lucene++_VERSION_MAJOR}) set(CPACK_PACKAGE_VERSION_MINOR ${lucene++_VERSION_MINOR}) set(CPACK_PACKAGE_VERSION_PATCH ${lucene++_VERSION_MAJOR}) set(CPACK_PACKAGE_VERSION ${lucene++_VERSION}) set(CPACK_PACKAGE_SOVERSION ${lucene++_SOVERSION}) set(CPACK_PACKAGE_VENDOR "Alan Wright") set(CPACK_PACKAGE_CONTACT "alanwright.home@googlemail.com") set(CPACK_PACKAGE_NAME "liblucene++") set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.PACKAGE") set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Lucene++ is an up to date C++ port of the popular Java Lucene library, a high-performance, full-featured text search engine") set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README.PACKAGE") set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/COPYING") #so, what are we going to install? set(CPACK_INSTALL_CMAKE_PROJECTS "${CMAKE_BINARY_DIR};lucene++;ALL;/") set(CPACK_COMPONENTS_ALL development runtime) set(CPACK_GENERATOR "TGZ") set(CPACK_PACKAGE_FILE_NAME "lucene++-${CPACK_PACKAGE_VERSION}-${CMAKE_SYSTEM_NAME}") if((WIN32 OR WIN64) AND NOT UNIX) set(CPACK_SOURCE_GENERATOR "ZIP") else() set(CPACK_SOURCE_GENERATOR "TBZ2;TGZ") endif() set(CPACK_SOURCE_PACKAGE_FILE_NAME "lucene++-${CPACK_PACKAGE_VERSION}-Source") #specific packaging requirements:, set(CPACK_DEBIAN_PACKAGE_DEPENDS "libc6 (>= 2.4), libgcc1 (>= 1:4.1.1-21), libstdc++6 (>= 4.1.1-21), libboost-date-time1.42.0, libboost-filesystem1.42.0, libboost-regex1.42.0, libboost-thread1.42.0, libboost-iostreams1.42.0") set(CPACK_DEBIAN_PACKAGE_SECTION "libs") set(CPACK_RPM_PACKAGE_LICENSE "Apache 2.0") set(CPACK_RPM_PACKAGE_GROUP "libs") set(CPACK_RPM_PACKAGE_REQUIRES "libboost-date-time1.42.0, libboost-filesystem1.42.0, libboost-regex1.42.0, libboost-thread1.42.0, libboost-iostreams1.42.0") #don't include the current binary dir. get_filename_component(lucene++_BINARY_DIR_name "${lucene++_BINARY_DIR}" NAME) set(CPACK_SOURCE_IGNORE_FILES "/\\\\.svn/" "/\\\\.git/" "\\\\.swp$" "\\\\.#;/#" ".*~" ".*\\\\.tmp" ".*\\\\.save" "/${lucene++_BINARY_DIR_name}/" ) if((WIN32 OR WIN64) AND NOT UNIX) # There is a bug in NSI that does not handle full unix paths properly. Make # sure there is at least one set of four (4) backlasshes. set(CPACK_GENERATOR "${CPACK_GENERATOR};NSIS") #set(CPACK_PACKAGE_ICON "${CMake_SOURCE_DIR}/Utilities/Release\\\\InstallIcon.bmp") #set(CPACK_NSIS_INSTALLED_ICON_NAME "bin\\\\MyExecutable.exe") set(CPACK_NSIS_DISPLAY_NAME "${CPACK_PACKAGE_INSTALL_DIRECTORY} Lucene++ Library") set(CPACK_NSIS_HELP_LINK "http:\\\\\\\\lucene++.sourceforge.net") set(CPACK_NSIS_URL_INFO_ABOUT "http:\\\\\\\\lucene++.sourceforge.net") set(CPACK_NSIS_CONTACT "lucene++-developers@lists.sourceforge.net") #set(CPACK_NSIS_MODIFY_PATH ON) else() # set(CPACK_STRIP_FILES "bin/xxx") set(CPACK_SOURCE_STRIP_FILES "") endif() #set(CPACK_PACKAGE_EXECUTABLES "MyExecutable" "My Executable") add_custom_target(dist-package COMMAND rsync -avP -e ssh "${CPACK_PACKAGE_FILE_NAME}.*" ustramooner@frs.sourceforge.net:uploads/ # DEPENDS package ) add_custom_target(dist-package_source COMMAND rsync -avP -e ssh "${CPACK_SOURCE_PACKAGE_FILE_NAME}.*" ustramooner@frs.sourceforge.net:uploads/ # DEPENDS package_source ) #this must be last include(CPack) LucenePlusPlus-rel_3.0.9/cmake/Lucene++Docs.cmake000066400000000000000000000125751456444476200215760ustar00rootroot00000000000000# - Lucene++Docs.cmake # This file provides support for building the Lucene++ Documentation. # To build the documention, you will have to enable it # and then do the equivalent of "make doc". MACRO(SET_YESNO) FOREACH(param ${ARGV}) IF ( ${param} ) SET(${param} "YES") ELSE ( ${param} ) SET(${param} "NO") ENDIF ( ${param} ) ENDFOREACH(param) ENDMACRO(SET_YESNO) MACRO(SET_BLANK) FOREACH(param ${ARGV}) IF ( NOT ${param} ) SET(${param} "") ENDIF ( NOT ${param} ) ENDFOREACH(param) ENDMACRO(SET_BLANK) IF (ENABLE_DOCS) OPTION(DOCS_HTML_HELP "Doxygen should compile HTML into a Help file (CHM)." NO) OPTION(DOCS_HTML "Doxygen should build HTML documentation." YES) OPTION(DOCS_XML "Doxygen should build XML documentation." NO) OPTION(DOCS_RTF "Doxygen should build RTF documentation." NO) OPTION(DOCS_MAN "Doxygen should build man documentation." NO) OPTION(DOCS_TAGFILE "Doxygen should build a tagfile." NO) OPTION(DOCS_LATEX "Doxygen should build Latex documentation." NO ) MARK_AS_ADVANCED( DOCS_HTML_HELP DOCS_LATEX DOCS_XML DOCS_HTML DOCS_RTF DOCS_MAN DOCS_TAGFILE ) # # Check for the tools # FIND_PACKAGE(Doxygen) IF ( DOXYGEN_FOUND ) # This creates a new target to build documentation. # It runs ${DOXYGEN_EXECUTABLE} which is the full path and executable to # Doxygen on your system, set by the FindDoxygen.cmake module # (called by FindDocumentation.cmake). # It runs the final generated Doxyfile against it. # The DOT_PATH is substituted into the Doxyfile. ADD_CUSTOM_TARGET(doc "${DOXYGEN_EXECUTABLE}" "${PROJECT_BINARY_DIR}/doc/doxyfile" VERBATIM ) IF ( DOCS_HTML_HELP ) IF ( NOT DOCS_HTML ) MESSAGE ( FATAL_ERROR "DOCS_HTML is required to buidl DOCS_HTML_HELP" ) ENDIF ( NOT DOCS_HTML ) FIND_PACKAGE(HTMLHelp) IF ( NOT HTML_HELP_COMPILER ) MESSAGE(FATAL_ERROR "HTML Help compiler not found, turn DOCS_HTML_HELP off to proceed") ENDIF ( NOT HTML_HELP_COMPILER ) #make cygwin work with hhc... IF ( CYGWIN ) EXECUTE_PROCESS ( COMMAND cygpath "${HTML_HELP_COMPILER}" OUTPUT_VARIABLE HTML_HELP_COMPILER_EX ) STRING ( REPLACE "\n" "" HTML_HELP_COMPILER_EX "${HTML_HELP_COMPILER_EX}" ) STRING ( REPLACE "\r" "" HTML_HELP_COMPILER_EX "${HTML_HELP_COMPILER_EX}" ) SET ( HTML_HELP_COMPILER_EX "\"${HTML_HELP_COMPILER_EX}\"" ) ELSE ( CYGWIN ) SET ( HTML_HELP_COMPILER_EX "${HTML_HELP_COMPILER}" ) ENDIF ( CYGWIN ) ENDIF ( DOCS_HTML_HELP ) IF ( DOCS_LATEX ) FIND_PACKAGE(LATEX) IF ( NOT LATEX_COMPILER ) MESSAGE(FATAL_ERROR "Latex compiler not found, turn DOCS_LATEX off to proceed") ENDIF ( NOT LATEX_COMPILER ) ENDIF ( DOCS_LATEX ) FIND_PACKAGE(Perl) IF ( DOXYGEN_DOT_EXECUTABLE ) SET ( HAVE_DOT "YES" ) ELSE ( DOXYGEN_DOT_EXECUTABLE ) SET ( HAVE_DOT "NO" ) ENDIF ( DOXYGEN_DOT_EXECUTABLE ) #doxygen expects YES/NO parameters SET_YESNO( DOCS_HTML_HELP DOCS_LATEX DOCS_XML DOCS_HTML DOCS_RTF DOCS_MAN ) #empty out paths if not found SET_BLANK( PERL_EXECUTABLE DOXYGEN_DOT_EXECUTABLE HTML_HELP_COMPILER LATEX_COMPILER ) IF ( DOCS_TAGFILE ) SET ( DOCS_TAGFILE_LOCATION "${PROJECT_BINARY_DIR}/doc/tag/lucene++.tag" ) ENDIF ( DOCS_TAGFILE ) # This processes our Doxyfile.cmake and substitutes paths to generate a final Doxyfile CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/doc/doxygen/Doxyfile.cmake" "${PROJECT_BINARY_DIR}/doc/doxyfile") CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/doc/doxygen/helpheader.htm.cmake" "${PROJECT_BINARY_DIR}/doc/helpheader.htm") CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/doc/doxygen/helpfooter.htm.cmake" "${PROJECT_BINARY_DIR}/doc/helpfooter.htm") CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/doc/doxygen/doxygen.css.cmake" "${PROJECT_BINARY_DIR}/doc/html/doxygen.css") #create a target for tar.gz html help FIND_PACKAGE(UnixCommands) IF ( TAR AND GZIP ) ADD_CUSTOM_TARGET(doc-tarz COMMAND "${TAR}" "-czf" "${PROJECT_BINARY_DIR}/doc/lucene++-doc.tar.gz" ./ WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/doc/html/" #DEPENDS doc VERBATIM ) ENDIF ( TAR AND GZIP ) #install HTML pages if they were built IF ( DOCS_HTML AND NOT WIN32 ) INSTALL(DIRECTORY "${PROJECT_BINARY_DIR}/doc/html/" DESTINATION share/doc/lucene++-doc/html) ENDIF ( DOCS_HTML AND NOT WIN32 ) #install man pages if they were built IF ( DOCS_MAN ) INSTALL(DIRECTORY "${PROJECT_BINARY_DIR}/doc/man/" DESTINATION man) ENDIF ( DOCS_MAN ) ELSE ( DOXYGEN_FOUND ) MESSAGE(FATAL_ERROR "Doxygen not found, turn ENABLE_DOCS off to proceed") ENDIF ( DOXYGEN_FOUND ) ENDIF (ENABLE_DOCS) LucenePlusPlus-rel_3.0.9/cmake/MacroEnsureVersion.cmake000066400000000000000000000066561456444476200232200ustar00rootroot00000000000000# This macro compares version numbers of the form "x.y.z" # MACRO_ENSURE_VERSION( FOO_MIN_VERSION FOO_VERSION_FOUND FOO_VERSION_OK) # will set FOO_VERSIN_OK to true if FOO_VERSION_FOUND >= FOO_MIN_VERSION # where both have to be in a 3-part-version format, leading and trailing # text is ok, e.g. # MACRO_ENSURE_VERSION( "2.5.31" "flex 2.5.4a" VERSION_OK) # which means 2.5.31 is required and "flex 2.5.4a" is what was found on the system # Copyright (c) 2006, David Faure, # # Redistribution and use is allowed according to the terms of the BSD license. # For details see the accompanying COPYING-CMAKE-SCRIPTS file. MACRO(MACRO_ENSURE_VERSION requested_version found_version var_too_old) # parse the parts of the version string STRING(REGEX REPLACE "([0-9]+)\\.[0-9]+\\.[0-9]+" "\\1" req_major_vers "${requested_version}") STRING(REGEX REPLACE "[0-9]+\\.([0-9]+)\\.[0-9]+" "\\1" req_minor_vers "${requested_version}") STRING(REGEX REPLACE "[0-9]+\\.[0-9]+\\.([0-9]+)" "\\1" req_patch_vers "${requested_version}") STRING(REGEX REPLACE "[^0-9]*([0-9]+)\\.[0-9]+\\.[0-9]+.*" "\\1" found_major_vers "${found_version}") STRING(REGEX REPLACE "[^0-9]*[0-9]+\\.([0-9]+)\\.[0-9]+.*" "\\1" found_minor_vers "${found_version}") STRING(REGEX REPLACE "[^0-9]*[0-9]+\\.[0-9]+\\.([0-9]+).*" "\\1" found_patch_vers "${found_version}") # compute an overall version number which can be compared at once MATH(EXPR req_vers_num "${req_major_vers}*10000 + ${req_minor_vers}*100 + ${req_patch_vers}") MATH(EXPR found_vers_num "${found_major_vers}*10000 + ${found_minor_vers}*100 + ${found_patch_vers}") if (found_vers_num LESS req_vers_num) set( ${var_too_old} FALSE ) else (found_vers_num LESS req_vers_num) set( ${var_too_old} TRUE ) endif (found_vers_num LESS req_vers_num) ENDMACRO(MACRO_ENSURE_VERSION) # This macro compares version numbers of the form "x.y" # MACRO_ENSURE_VERSION( FOO_MIN_VERSION FOO_VERSION_FOUND FOO_VERSION_OK) # will set FOO_VERSIN_OK to true if FOO_VERSION_FOUND >= FOO_MIN_VERSION # where both have to be in a 2-part-version format, leading and trailing # text is ok, e.g. # MACRO_ENSURE_VERSION( "0.5" "foo 0.6" VERSION_OK) # which means 0.5 is required and "foo 0.6" is what was found on the system # Copyright (c) 2006, David Faure, # Copyright (c) 2007, Pino Toscano, # # Redistribution and use is allowed according to the terms of the BSD license. # For details see the accompanying COPYING-CMAKE-SCRIPTS file. MACRO(MACRO_ENSURE_VERSION2 requested_version found_version var_too_old) # parse the parts of the version string STRING(REGEX REPLACE "([0-9]+)\\.[0-9]+" "\\1" req_major_vers "${requested_version}") STRING(REGEX REPLACE "[0-9]+\\.([0-9]+)" "\\1" req_minor_vers "${requested_version}") STRING(REGEX REPLACE "[^0-9]*([0-9]+)\\.[0-9]+.*" "\\1" found_major_vers "${found_version}") STRING(REGEX REPLACE "[^0-9]*[0-9]+\\.([0-9]+).*" "\\1" found_minor_vers "${found_version}") # compute an overall version number which can be compared at once MATH(EXPR req_vers_num "${req_major_vers}*100 + ${req_minor_vers}") MATH(EXPR found_vers_num "${found_major_vers}*100 + ${found_minor_vers}") if (found_vers_num LESS req_vers_num) set( ${var_too_old} FALSE ) else (found_vers_num LESS req_vers_num) set( ${var_too_old} TRUE ) endif (found_vers_num LESS req_vers_num) ENDMACRO(MACRO_ENSURE_VERSION2) LucenePlusPlus-rel_3.0.9/cmake/Toolchain-g++32.cmake000066400000000000000000000012661456444476200220560ustar00rootroot00000000000000# Cross compiling from linux using g++-multilib to create 32 bit output # On ubuntu, you'll need to install the packages: g++-multilib gcc-multilib # # Use of this file: # cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchain-g++32.cmake .. SET(CMAKE_CXX_FLAGS "-m32") SET(CMAKE_C_FLAGS "-m32") SET(CMAKE_EXE_LINKER_FLAGS "-m32") SET(CMAKE_MODULE_LINKER_FLAGS "-m32") # here is the target environment located SET(CMAKE_FIND_ROOT_PATH /usr/lib32 ) # adjust the default behaviour of the FIND_XXX() commands: # search headers and libraries in the target environment, search # programs in the host environment set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) LucenePlusPlus-rel_3.0.9/cmake/Toolchain-llvm.cmake000066400000000000000000000007411456444476200223040ustar00rootroot00000000000000# Use of this file: # cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchain-llvm.cmake .. # which compilers to use for C and C++ SET(CMAKE_C_COMPILER clang) SET(CMAKE_CXX_COMPILER clang++) SET(ENABLE_LLVM CACHE BOOL TRUE) SET(ENABLE_LLVM_BC CACHE BOOL FALSE) IF ( ENABLE_LLVM_BC ) #TODO: make this work... #this only crates the llvm objects, it can't link them together currently SET(CMAKE_C_FLAGS "-emit-llvm") SET(CMAKE_CXX_FLAGS "-emit-llvm") ENDIF ( ENABLE_LLVM_BC ) LucenePlusPlus-rel_3.0.9/cmake/Toolchain-mingw32.cmake000066400000000000000000000023021456444476200226130ustar00rootroot00000000000000# Cross compiling from linux using mingw32 tools # On ubuntu, you'll need to install the packages: mingw32, mingw32-binutils, mingw32-runtime # # Use of this file: # cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchain-mingw32.cmake -C ../cmake/Toolchain-mingw32.cmake .. # the name of the target operating system set(CMAKE_SYSTEM_NAME Windows) # which compilers to use for C and C++ set(CMAKE_C_COMPILER i586-mingw32msvc-gcc) set(CMAKE_CXX_COMPILER i586-mingw32msvc-g++) # here is the target environment located set(CMAKE_FIND_ROOT_PATH /usr/i586-mingw32msvc /home/alex/mingw-install ) include_directories(/usr/lib/gcc/i586-mingw32msvc/4.2.1-sjlj/include/c++) # adjust the default behaviour of the FIND_XXX() commands: # search headers and libraries in the target environment, search # programs in the host environment set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) set(_CL_HAVE_GCCVISIBILITYPATCH 0) set(_CL_HAVE_NAMESPACES_EXITCODE 0) set(_CL_HAVE_NO_SNPRINTF_BUG_EXITCODE 0) set(_CL_HAVE_NO_SNWPRINTF_BUG_EXITCODE 0) set(LUCENE_STATIC_CONSTANT_SYNTAX_EXITCODE 1) set(_CL_HAVE_TRY_BLOCKS_EXITCODE 0) set(ENABLE_ANSI_MODE OFF) LucenePlusPlus-rel_3.0.9/cmake/cmake_uninstall.cmake.in000066400000000000000000000015631456444476200231750ustar00rootroot00000000000000IF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") MESSAGE(FATAL_ERROR "Cannot find install manifest: \"@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt\"") ENDIF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") FILE(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files) STRING(REGEX REPLACE "\n" ";" files "${files}") FOREACH(file ${files}) MESSAGE(STATUS "Uninstalling \"${file}\"") IF(EXISTS "${file}") EXEC_PROGRAM( "@CMAKE_COMMAND@" ARGS "-E remove \"${file}\"" OUTPUT_VARIABLE rm_out RETURN_VALUE rm_retval ) IF("${rm_retval}" STREQUAL 0) ELSE("${rm_retval}" STREQUAL 0) MESSAGE(FATAL_ERROR "Problem when removing \"${file}\"") ENDIF("${rm_retval}" STREQUAL 0) #ELSE(EXISTS "${file}") # MESSAGE(STATUS "File \"${file}\" does not exist.") ENDIF(EXISTS "${file}") ENDFOREACH(file) LucenePlusPlus-rel_3.0.9/cmake/cotire.cmake000066400000000000000000005243631456444476200207140ustar00rootroot00000000000000# - cotire (compile time reducer) # # See the cotire manual for usage hints. # #============================================================================= # Copyright 2012-2018 Sascha Kratky # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation # files (the "Software"), to deal in the Software without # restriction, including without limitation the rights to use, # copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the # Software is furnished to do so, subject to the following # conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # OTHER DEALINGS IN THE SOFTWARE. #============================================================================= if(__COTIRE_INCLUDED) return() endif() set(__COTIRE_INCLUDED TRUE) # call cmake_minimum_required, but prevent modification of the CMake policy stack in include mode # cmake_minimum_required also sets the policy version as a side effect, which we have to avoid if (NOT CMAKE_SCRIPT_MODE_FILE) cmake_policy(PUSH) endif() cmake_minimum_required(VERSION 2.8.12) if (NOT CMAKE_SCRIPT_MODE_FILE) cmake_policy(POP) endif() set (COTIRE_CMAKE_MODULE_FILE "${CMAKE_CURRENT_LIST_FILE}") set (COTIRE_CMAKE_MODULE_VERSION "1.8.0") # activate select policies if (POLICY CMP0025) # Compiler id for Apple Clang is now AppleClang cmake_policy(SET CMP0025 NEW) endif() if (POLICY CMP0026) # disallow use of the LOCATION target property cmake_policy(SET CMP0026 NEW) endif() if (POLICY CMP0038) # targets may not link directly to themselves cmake_policy(SET CMP0038 NEW) endif() if (POLICY CMP0039) # utility targets may not have link dependencies cmake_policy(SET CMP0039 NEW) endif() if (POLICY CMP0040) # target in the TARGET signature of add_custom_command() must exist cmake_policy(SET CMP0040 NEW) endif() if (POLICY CMP0045) # error on non-existent target in get_target_property cmake_policy(SET CMP0045 NEW) endif() if (POLICY CMP0046) # error on non-existent dependency in add_dependencies cmake_policy(SET CMP0046 NEW) endif() if (POLICY CMP0049) # do not expand variables in target source entries cmake_policy(SET CMP0049 NEW) endif() if (POLICY CMP0050) # disallow add_custom_command SOURCE signatures cmake_policy(SET CMP0050 NEW) endif() if (POLICY CMP0051) # include TARGET_OBJECTS expressions in a target's SOURCES property cmake_policy(SET CMP0051 NEW) endif() if (POLICY CMP0053) # simplify variable reference and escape sequence evaluation cmake_policy(SET CMP0053 NEW) endif() if (POLICY CMP0054) # only interpret if() arguments as variables or keywords when unquoted cmake_policy(SET CMP0054 NEW) endif() if (POLICY CMP0055) # strict checking for break() command cmake_policy(SET CMP0055 NEW) endif() include(CMakeParseArguments) include(ProcessorCount) function (cotire_get_configuration_types _configsVar) set (_configs "") if (CMAKE_CONFIGURATION_TYPES) list (APPEND _configs ${CMAKE_CONFIGURATION_TYPES}) endif() if (CMAKE_BUILD_TYPE) list (APPEND _configs "${CMAKE_BUILD_TYPE}") endif() if (_configs) list (REMOVE_DUPLICATES _configs) set (${_configsVar} ${_configs} PARENT_SCOPE) else() set (${_configsVar} "None" PARENT_SCOPE) endif() endfunction() function (cotire_get_source_file_extension _sourceFile _extVar) # get_filename_component returns extension from first occurrence of . in file name # this function computes the extension from last occurrence of . in file name string (FIND "${_sourceFile}" "." _index REVERSE) if (_index GREATER -1) math (EXPR _index "${_index} + 1") string (SUBSTRING "${_sourceFile}" ${_index} -1 _sourceExt) else() set (_sourceExt "") endif() set (${_extVar} "${_sourceExt}" PARENT_SCOPE) endfunction() macro (cotire_check_is_path_relative_to _path _isRelativeVar) set (${_isRelativeVar} FALSE) if (IS_ABSOLUTE "${_path}") foreach (_dir ${ARGN}) file (RELATIVE_PATH _relPath "${_dir}" "${_path}") if (NOT _relPath OR (NOT IS_ABSOLUTE "${_relPath}" AND NOT "${_relPath}" MATCHES "^\\.\\.")) set (${_isRelativeVar} TRUE) break() endif() endforeach() endif() endmacro() function (cotire_filter_language_source_files _language _target _sourceFilesVar _excludedSourceFilesVar _cotiredSourceFilesVar) if (CMAKE_${_language}_SOURCE_FILE_EXTENSIONS) set (_languageExtensions "${CMAKE_${_language}_SOURCE_FILE_EXTENSIONS}") else() set (_languageExtensions "") endif() if (CMAKE_${_language}_IGNORE_EXTENSIONS) set (_ignoreExtensions "${CMAKE_${_language}_IGNORE_EXTENSIONS}") else() set (_ignoreExtensions "") endif() if (COTIRE_UNITY_SOURCE_EXCLUDE_EXTENSIONS) set (_excludeExtensions "${COTIRE_UNITY_SOURCE_EXCLUDE_EXTENSIONS}") else() set (_excludeExtensions "") endif() if (COTIRE_DEBUG AND _languageExtensions) message (STATUS "${_language} source file extensions: ${_languageExtensions}") endif() if (COTIRE_DEBUG AND _ignoreExtensions) message (STATUS "${_language} ignore extensions: ${_ignoreExtensions}") endif() if (COTIRE_DEBUG AND _excludeExtensions) message (STATUS "${_language} exclude extensions: ${_excludeExtensions}") endif() if (CMAKE_VERSION VERSION_LESS "3.1.0") set (_allSourceFiles ${ARGN}) else() # as of CMake 3.1 target sources may contain generator expressions # since we cannot obtain required property information about source files added # through generator expressions at configure time, we filter them out string (GENEX_STRIP "${ARGN}" _allSourceFiles) endif() set (_filteredSourceFiles "") set (_excludedSourceFiles "") foreach (_sourceFile ${_allSourceFiles}) get_source_file_property(_sourceIsHeaderOnly "${_sourceFile}" HEADER_FILE_ONLY) get_source_file_property(_sourceIsExternal "${_sourceFile}" EXTERNAL_OBJECT) get_source_file_property(_sourceIsSymbolic "${_sourceFile}" SYMBOLIC) if (NOT _sourceIsHeaderOnly AND NOT _sourceIsExternal AND NOT _sourceIsSymbolic) cotire_get_source_file_extension("${_sourceFile}" _sourceExt) if (_sourceExt) list (FIND _ignoreExtensions "${_sourceExt}" _ignoreIndex) if (_ignoreIndex LESS 0) list (FIND _excludeExtensions "${_sourceExt}" _excludeIndex) if (_excludeIndex GREATER -1) list (APPEND _excludedSourceFiles "${_sourceFile}") else() list (FIND _languageExtensions "${_sourceExt}" _sourceIndex) if (_sourceIndex GREATER -1) # consider source file unless it is excluded explicitly get_source_file_property(_sourceIsExcluded "${_sourceFile}" COTIRE_EXCLUDED) if (_sourceIsExcluded) list (APPEND _excludedSourceFiles "${_sourceFile}") else() list (APPEND _filteredSourceFiles "${_sourceFile}") endif() else() get_source_file_property(_sourceLanguage "${_sourceFile}" LANGUAGE) if ("${_sourceLanguage}" STREQUAL "${_language}") # add to excluded sources, if file is not ignored and has correct language without having the correct extension list (APPEND _excludedSourceFiles "${_sourceFile}") endif() endif() endif() endif() endif() endif() endforeach() # separate filtered source files from already cotired ones # the COTIRE_TARGET property of a source file may be set while a target is being processed by cotire set (_sourceFiles "") set (_cotiredSourceFiles "") foreach (_sourceFile ${_filteredSourceFiles}) get_source_file_property(_sourceIsCotired "${_sourceFile}" COTIRE_TARGET) if (_sourceIsCotired) list (APPEND _cotiredSourceFiles "${_sourceFile}") else() get_source_file_property(_sourceCompileFlags "${_sourceFile}" COMPILE_FLAGS) if (_sourceCompileFlags) # add to excluded sources, if file has custom compile flags list (APPEND _excludedSourceFiles "${_sourceFile}") else() get_source_file_property(_sourceCompileOptions "${_sourceFile}" COMPILE_OPTIONS) if (_sourceCompileOptions) # add to excluded sources, if file has list of custom compile options list (APPEND _excludedSourceFiles "${_sourceFile}") else() list (APPEND _sourceFiles "${_sourceFile}") endif() endif() endif() endforeach() if (COTIRE_DEBUG) if (_sourceFiles) message (STATUS "Filtered ${_target} ${_language} sources: ${_sourceFiles}") endif() if (_excludedSourceFiles) message (STATUS "Excluded ${_target} ${_language} sources: ${_excludedSourceFiles}") endif() if (_cotiredSourceFiles) message (STATUS "Cotired ${_target} ${_language} sources: ${_cotiredSourceFiles}") endif() endif() set (${_sourceFilesVar} ${_sourceFiles} PARENT_SCOPE) set (${_excludedSourceFilesVar} ${_excludedSourceFiles} PARENT_SCOPE) set (${_cotiredSourceFilesVar} ${_cotiredSourceFiles} PARENT_SCOPE) endfunction() function (cotire_get_objects_with_property_on _filteredObjectsVar _property _type) set (_filteredObjects "") foreach (_object ${ARGN}) get_property(_isSet ${_type} "${_object}" PROPERTY ${_property} SET) if (_isSet) get_property(_propertyValue ${_type} "${_object}" PROPERTY ${_property}) if (_propertyValue) list (APPEND _filteredObjects "${_object}") endif() endif() endforeach() set (${_filteredObjectsVar} ${_filteredObjects} PARENT_SCOPE) endfunction() function (cotire_get_objects_with_property_off _filteredObjectsVar _property _type) set (_filteredObjects "") foreach (_object ${ARGN}) get_property(_isSet ${_type} "${_object}" PROPERTY ${_property} SET) if (_isSet) get_property(_propertyValue ${_type} "${_object}" PROPERTY ${_property}) if (NOT _propertyValue) list (APPEND _filteredObjects "${_object}") endif() endif() endforeach() set (${_filteredObjectsVar} ${_filteredObjects} PARENT_SCOPE) endfunction() function (cotire_get_source_file_property_values _valuesVar _property) set (_values "") foreach (_sourceFile ${ARGN}) get_source_file_property(_propertyValue "${_sourceFile}" ${_property}) if (_propertyValue) list (APPEND _values "${_propertyValue}") endif() endforeach() set (${_valuesVar} ${_values} PARENT_SCOPE) endfunction() function (cotire_resolve_config_properties _configurations _propertiesVar) set (_properties "") foreach (_property ${ARGN}) if ("${_property}" MATCHES "") foreach (_config ${_configurations}) string (TOUPPER "${_config}" _upperConfig) string (REPLACE "" "${_upperConfig}" _configProperty "${_property}") list (APPEND _properties ${_configProperty}) endforeach() else() list (APPEND _properties ${_property}) endif() endforeach() set (${_propertiesVar} ${_properties} PARENT_SCOPE) endfunction() function (cotire_copy_set_properties _configurations _type _source _target) cotire_resolve_config_properties("${_configurations}" _properties ${ARGN}) foreach (_property ${_properties}) get_property(_isSet ${_type} ${_source} PROPERTY ${_property} SET) if (_isSet) get_property(_propertyValue ${_type} ${_source} PROPERTY ${_property}) set_property(${_type} ${_target} PROPERTY ${_property} "${_propertyValue}") endif() endforeach() endfunction() function (cotire_get_target_usage_requirements _target _config _targetRequirementsVar) set (_targetRequirements "") get_target_property(_librariesToProcess ${_target} LINK_LIBRARIES) while (_librariesToProcess) # remove from head list (GET _librariesToProcess 0 _library) list (REMOVE_AT _librariesToProcess 0) if (_library MATCHES "^\\$<\\$:([A-Za-z0-9_:-]+)>$") set (_library "${CMAKE_MATCH_1}") elseif (_config STREQUAL "None" AND _library MATCHES "^\\$<\\$:([A-Za-z0-9_:-]+)>$") set (_library "${CMAKE_MATCH_1}") endif() if (TARGET ${_library}) list (FIND _targetRequirements ${_library} _index) if (_index LESS 0) list (APPEND _targetRequirements ${_library}) # BFS traversal of transitive libraries get_target_property(_libraries ${_library} INTERFACE_LINK_LIBRARIES) if (_libraries) list (APPEND _librariesToProcess ${_libraries}) list (REMOVE_DUPLICATES _librariesToProcess) endif() endif() endif() endwhile() set (${_targetRequirementsVar} ${_targetRequirements} PARENT_SCOPE) endfunction() function (cotire_filter_compile_flags _language _flagFilter _matchedOptionsVar _unmatchedOptionsVar) if (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel") set (_flagPrefix "[/-]") else() set (_flagPrefix "--?") endif() set (_optionFlag "") set (_matchedOptions "") set (_unmatchedOptions "") foreach (_compileFlag ${ARGN}) if (_compileFlag) if (_optionFlag AND NOT "${_compileFlag}" MATCHES "^${_flagPrefix}") # option with separate argument list (APPEND _matchedOptions "${_compileFlag}") set (_optionFlag "") elseif ("${_compileFlag}" MATCHES "^(${_flagPrefix})(${_flagFilter})$") # remember option set (_optionFlag "${CMAKE_MATCH_2}") elseif ("${_compileFlag}" MATCHES "^(${_flagPrefix})(${_flagFilter})(.+)$") # option with joined argument list (APPEND _matchedOptions "${CMAKE_MATCH_3}") set (_optionFlag "") else() # flush remembered option if (_optionFlag) list (APPEND _matchedOptions "${_optionFlag}") set (_optionFlag "") endif() # add to unfiltered options list (APPEND _unmatchedOptions "${_compileFlag}") endif() endif() endforeach() if (_optionFlag) list (APPEND _matchedOptions "${_optionFlag}") endif() if (COTIRE_DEBUG AND _matchedOptions) message (STATUS "Filter ${_flagFilter} matched: ${_matchedOptions}") endif() if (COTIRE_DEBUG AND _unmatchedOptions) message (STATUS "Filter ${_flagFilter} unmatched: ${_unmatchedOptions}") endif() set (${_matchedOptionsVar} ${_matchedOptions} PARENT_SCOPE) set (${_unmatchedOptionsVar} ${_unmatchedOptions} PARENT_SCOPE) endfunction() function (cotire_is_target_supported _target _isSupportedVar) if (NOT TARGET "${_target}") set (${_isSupportedVar} FALSE PARENT_SCOPE) return() endif() get_target_property(_imported ${_target} IMPORTED) if (_imported) set (${_isSupportedVar} FALSE PARENT_SCOPE) return() endif() get_target_property(_targetType ${_target} TYPE) if (NOT _targetType MATCHES "EXECUTABLE|(STATIC|SHARED|MODULE|OBJECT)_LIBRARY") set (${_isSupportedVar} FALSE PARENT_SCOPE) return() endif() set (${_isSupportedVar} TRUE PARENT_SCOPE) endfunction() function (cotire_get_target_compile_flags _config _language _target _flagsVar) string (TOUPPER "${_config}" _upperConfig) # collect options from CMake language variables set (_compileFlags "") if (CMAKE_${_language}_FLAGS) set (_compileFlags "${_compileFlags} ${CMAKE_${_language}_FLAGS}") endif() if (CMAKE_${_language}_FLAGS_${_upperConfig}) set (_compileFlags "${_compileFlags} ${CMAKE_${_language}_FLAGS_${_upperConfig}}") endif() if (_target) # add target compile flags get_target_property(_targetflags ${_target} COMPILE_FLAGS) if (_targetflags) set (_compileFlags "${_compileFlags} ${_targetflags}") endif() endif() if (UNIX) separate_arguments(_compileFlags UNIX_COMMAND "${_compileFlags}") elseif(WIN32) separate_arguments(_compileFlags WINDOWS_COMMAND "${_compileFlags}") else() separate_arguments(_compileFlags) endif() # target compile options if (_target) get_target_property(_targetOptions ${_target} COMPILE_OPTIONS) if (_targetOptions) list (APPEND _compileFlags ${_targetOptions}) endif() endif() # interface compile options from linked library targets if (_target) set (_linkedTargets "") cotire_get_target_usage_requirements(${_target} ${_config} _linkedTargets) foreach (_linkedTarget ${_linkedTargets}) get_target_property(_targetOptions ${_linkedTarget} INTERFACE_COMPILE_OPTIONS) if (_targetOptions) list (APPEND _compileFlags ${_targetOptions}) endif() endforeach() endif() # handle language standard properties if (CMAKE_${_language}_STANDARD_DEFAULT) # used compiler supports language standard levels if (_target) get_target_property(_targetLanguageStandard ${_target} ${_language}_STANDARD) if (_targetLanguageStandard) set (_type "EXTENSION") get_property(_isSet TARGET ${_target} PROPERTY ${_language}_EXTENSIONS SET) if (_isSet) get_target_property(_targetUseLanguageExtensions ${_target} ${_language}_EXTENSIONS) if (NOT _targetUseLanguageExtensions) set (_type "STANDARD") endif() endif() if (CMAKE_${_language}${_targetLanguageStandard}_${_type}_COMPILE_OPTION) list (APPEND _compileFlags "${CMAKE_${_language}${_targetLanguageStandard}_${_type}_COMPILE_OPTION}") endif() endif() endif() endif() # handle the POSITION_INDEPENDENT_CODE target property if (_target) get_target_property(_targetPIC ${_target} POSITION_INDEPENDENT_CODE) if (_targetPIC) get_target_property(_targetType ${_target} TYPE) if (_targetType STREQUAL "EXECUTABLE" AND CMAKE_${_language}_COMPILE_OPTIONS_PIE) list (APPEND _compileFlags "${CMAKE_${_language}_COMPILE_OPTIONS_PIE}") elseif (CMAKE_${_language}_COMPILE_OPTIONS_PIC) list (APPEND _compileFlags "${CMAKE_${_language}_COMPILE_OPTIONS_PIC}") endif() endif() endif() # handle visibility target properties if (_target) get_target_property(_targetVisibility ${_target} ${_language}_VISIBILITY_PRESET) if (_targetVisibility AND CMAKE_${_language}_COMPILE_OPTIONS_VISIBILITY) list (APPEND _compileFlags "${CMAKE_${_language}_COMPILE_OPTIONS_VISIBILITY}${_targetVisibility}") endif() get_target_property(_targetVisibilityInlines ${_target} VISIBILITY_INLINES_HIDDEN) if (_targetVisibilityInlines AND CMAKE_${_language}_COMPILE_OPTIONS_VISIBILITY_INLINES_HIDDEN) list (APPEND _compileFlags "${CMAKE_${_language}_COMPILE_OPTIONS_VISIBILITY_INLINES_HIDDEN}") endif() endif() # platform specific flags if (APPLE) get_target_property(_architectures ${_target} OSX_ARCHITECTURES_${_upperConfig}) if (NOT _architectures) get_target_property(_architectures ${_target} OSX_ARCHITECTURES) endif() if (_architectures) foreach (_arch ${_architectures}) list (APPEND _compileFlags "-arch" "${_arch}") endforeach() endif() if (CMAKE_OSX_SYSROOT) if (CMAKE_${_language}_SYSROOT_FLAG) list (APPEND _compileFlags "${CMAKE_${_language}_SYSROOT_FLAG}" "${CMAKE_OSX_SYSROOT}") else() list (APPEND _compileFlags "-isysroot" "${CMAKE_OSX_SYSROOT}") endif() endif() if (CMAKE_OSX_DEPLOYMENT_TARGET) if (CMAKE_${_language}_OSX_DEPLOYMENT_TARGET_FLAG) list (APPEND _compileFlags "${CMAKE_${_language}_OSX_DEPLOYMENT_TARGET_FLAG}${CMAKE_OSX_DEPLOYMENT_TARGET}") else() list (APPEND _compileFlags "-mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}") endif() endif() endif() if (COTIRE_DEBUG AND _compileFlags) message (STATUS "Target ${_target} compile flags: ${_compileFlags}") endif() set (${_flagsVar} ${_compileFlags} PARENT_SCOPE) endfunction() function (cotire_get_target_include_directories _config _language _target _includeDirsVar _systemIncludeDirsVar) set (_includeDirs "") set (_systemIncludeDirs "") # default include dirs if (CMAKE_INCLUDE_CURRENT_DIR) list (APPEND _includeDirs "${CMAKE_CURRENT_BINARY_DIR}") list (APPEND _includeDirs "${CMAKE_CURRENT_SOURCE_DIR}") endif() set (_targetFlags "") cotire_get_target_compile_flags("${_config}" "${_language}" "${_target}" _targetFlags) # parse additional include directories from target compile flags if (CMAKE_INCLUDE_FLAG_${_language}) string (STRIP "${CMAKE_INCLUDE_FLAG_${_language}}" _includeFlag) string (REGEX REPLACE "^[-/]+" "" _includeFlag "${_includeFlag}") if (_includeFlag) set (_dirs "") cotire_filter_compile_flags("${_language}" "${_includeFlag}" _dirs _ignore ${_targetFlags}) if (_dirs) list (APPEND _includeDirs ${_dirs}) endif() endif() endif() # parse additional system include directories from target compile flags if (CMAKE_INCLUDE_SYSTEM_FLAG_${_language}) string (STRIP "${CMAKE_INCLUDE_SYSTEM_FLAG_${_language}}" _includeFlag) string (REGEX REPLACE "^[-/]+" "" _includeFlag "${_includeFlag}") if (_includeFlag) set (_dirs "") cotire_filter_compile_flags("${_language}" "${_includeFlag}" _dirs _ignore ${_targetFlags}) if (_dirs) list (APPEND _systemIncludeDirs ${_dirs}) endif() endif() endif() # target include directories get_directory_property(_dirs DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" INCLUDE_DIRECTORIES) if (_target) get_target_property(_targetDirs ${_target} INCLUDE_DIRECTORIES) if (_targetDirs) list (APPEND _dirs ${_targetDirs}) endif() get_target_property(_targetDirs ${_target} INTERFACE_SYSTEM_INCLUDE_DIRECTORIES) if (_targetDirs) list (APPEND _systemIncludeDirs ${_targetDirs}) endif() endif() # interface include directories from linked library targets if (_target) set (_linkedTargets "") cotire_get_target_usage_requirements(${_target} ${_config} _linkedTargets) foreach (_linkedTarget ${_linkedTargets}) get_target_property(_linkedTargetType ${_linkedTarget} TYPE) if (CMAKE_INCLUDE_CURRENT_DIR_IN_INTERFACE AND NOT CMAKE_VERSION VERSION_LESS "3.4.0" AND _linkedTargetType MATCHES "(STATIC|SHARED|MODULE|OBJECT)_LIBRARY") # CMAKE_INCLUDE_CURRENT_DIR_IN_INTERFACE refers to CMAKE_CURRENT_BINARY_DIR and CMAKE_CURRENT_SOURCE_DIR # at the time, when the target was created. These correspond to the target properties BINARY_DIR and SOURCE_DIR # which are only available with CMake 3.4 or later. get_target_property(_targetDirs ${_linkedTarget} BINARY_DIR) if (_targetDirs) list (APPEND _dirs ${_targetDirs}) endif() get_target_property(_targetDirs ${_linkedTarget} SOURCE_DIR) if (_targetDirs) list (APPEND _dirs ${_targetDirs}) endif() endif() get_target_property(_targetDirs ${_linkedTarget} INTERFACE_INCLUDE_DIRECTORIES) if (_targetDirs) list (APPEND _dirs ${_targetDirs}) endif() get_target_property(_targetDirs ${_linkedTarget} INTERFACE_SYSTEM_INCLUDE_DIRECTORIES) if (_targetDirs) list (APPEND _systemIncludeDirs ${_targetDirs}) endif() endforeach() endif() if (dirs) list (REMOVE_DUPLICATES _dirs) endif() list (LENGTH _includeDirs _projectInsertIndex) foreach (_dir ${_dirs}) if (CMAKE_INCLUDE_DIRECTORIES_PROJECT_BEFORE) cotire_check_is_path_relative_to("${_dir}" _isRelative "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}") if (_isRelative) list (LENGTH _includeDirs _len) if (_len EQUAL _projectInsertIndex) list (APPEND _includeDirs "${_dir}") else() list (INSERT _includeDirs _projectInsertIndex "${_dir}") endif() math (EXPR _projectInsertIndex "${_projectInsertIndex} + 1") else() list (APPEND _includeDirs "${_dir}") endif() else() list (APPEND _includeDirs "${_dir}") endif() endforeach() list (REMOVE_DUPLICATES _includeDirs) list (REMOVE_DUPLICATES _systemIncludeDirs) if (CMAKE_${_language}_IMPLICIT_INCLUDE_DIRECTORIES) list (REMOVE_ITEM _includeDirs ${CMAKE_${_language}_IMPLICIT_INCLUDE_DIRECTORIES}) endif() if (WIN32 AND NOT MINGW) # convert Windows paths in include directories to CMake paths if (_includeDirs) set (_paths "") foreach (_dir ${_includeDirs}) file (TO_CMAKE_PATH "${_dir}" _path) list (APPEND _paths "${_path}") endforeach() set (_includeDirs ${_paths}) endif() if (_systemIncludeDirs) set (_paths "") foreach (_dir ${_systemIncludeDirs}) file (TO_CMAKE_PATH "${_dir}" _path) list (APPEND _paths "${_path}") endforeach() set (_systemIncludeDirs ${_paths}) endif() endif() if (COTIRE_DEBUG AND _includeDirs) message (STATUS "Target ${_target} include dirs: ${_includeDirs}") endif() set (${_includeDirsVar} ${_includeDirs} PARENT_SCOPE) if (COTIRE_DEBUG AND _systemIncludeDirs) message (STATUS "Target ${_target} system include dirs: ${_systemIncludeDirs}") endif() set (${_systemIncludeDirsVar} ${_systemIncludeDirs} PARENT_SCOPE) endfunction() function (cotire_get_target_export_symbol _target _exportSymbolVar) set (_exportSymbol "") get_target_property(_targetType ${_target} TYPE) get_target_property(_enableExports ${_target} ENABLE_EXPORTS) if (_targetType MATCHES "(SHARED|MODULE)_LIBRARY" OR (_targetType STREQUAL "EXECUTABLE" AND _enableExports)) get_target_property(_exportSymbol ${_target} DEFINE_SYMBOL) if (NOT _exportSymbol) set (_exportSymbol "${_target}_EXPORTS") endif() string (MAKE_C_IDENTIFIER "${_exportSymbol}" _exportSymbol) endif() set (${_exportSymbolVar} ${_exportSymbol} PARENT_SCOPE) endfunction() function (cotire_get_target_compile_definitions _config _language _target _definitionsVar) string (TOUPPER "${_config}" _upperConfig) set (_configDefinitions "") # CMAKE_INTDIR for multi-configuration build systems if (NOT "${CMAKE_CFG_INTDIR}" STREQUAL ".") list (APPEND _configDefinitions "CMAKE_INTDIR=\"${_config}\"") endif() # target export define symbol cotire_get_target_export_symbol("${_target}" _defineSymbol) if (_defineSymbol) list (APPEND _configDefinitions "${_defineSymbol}") endif() # directory compile definitions get_directory_property(_definitions DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" COMPILE_DEFINITIONS) if (_definitions) list (APPEND _configDefinitions ${_definitions}) endif() get_directory_property(_definitions DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" COMPILE_DEFINITIONS_${_upperConfig}) if (_definitions) list (APPEND _configDefinitions ${_definitions}) endif() # target compile definitions get_target_property(_definitions ${_target} COMPILE_DEFINITIONS) if (_definitions) list (APPEND _configDefinitions ${_definitions}) endif() get_target_property(_definitions ${_target} COMPILE_DEFINITIONS_${_upperConfig}) if (_definitions) list (APPEND _configDefinitions ${_definitions}) endif() # interface compile definitions from linked library targets set (_linkedTargets "") cotire_get_target_usage_requirements(${_target} ${_config} _linkedTargets) foreach (_linkedTarget ${_linkedTargets}) get_target_property(_definitions ${_linkedTarget} INTERFACE_COMPILE_DEFINITIONS) if (_definitions) list (APPEND _configDefinitions ${_definitions}) endif() endforeach() # parse additional compile definitions from target compile flags # and do not look at directory compile definitions, which we already handled set (_targetFlags "") cotire_get_target_compile_flags("${_config}" "${_language}" "${_target}" _targetFlags) cotire_filter_compile_flags("${_language}" "D" _definitions _ignore ${_targetFlags}) if (_definitions) list (APPEND _configDefinitions ${_definitions}) endif() list (REMOVE_DUPLICATES _configDefinitions) if (COTIRE_DEBUG AND _configDefinitions) message (STATUS "Target ${_target} compile definitions: ${_configDefinitions}") endif() set (${_definitionsVar} ${_configDefinitions} PARENT_SCOPE) endfunction() function (cotire_get_target_compiler_flags _config _language _target _compilerFlagsVar) # parse target compile flags omitting compile definitions and include directives set (_targetFlags "") cotire_get_target_compile_flags("${_config}" "${_language}" "${_target}" _targetFlags) set (_flagFilter "D") if (CMAKE_INCLUDE_FLAG_${_language}) string (STRIP "${CMAKE_INCLUDE_FLAG_${_language}}" _includeFlag) string (REGEX REPLACE "^[-/]+" "" _includeFlag "${_includeFlag}") if (_includeFlag) set (_flagFilter "${_flagFilter}|${_includeFlag}") endif() endif() if (CMAKE_INCLUDE_SYSTEM_FLAG_${_language}) string (STRIP "${CMAKE_INCLUDE_SYSTEM_FLAG_${_language}}" _includeFlag) string (REGEX REPLACE "^[-/]+" "" _includeFlag "${_includeFlag}") if (_includeFlag) set (_flagFilter "${_flagFilter}|${_includeFlag}") endif() endif() set (_compilerFlags "") cotire_filter_compile_flags("${_language}" "${_flagFilter}" _ignore _compilerFlags ${_targetFlags}) if (COTIRE_DEBUG AND _compilerFlags) message (STATUS "Target ${_target} compiler flags: ${_compilerFlags}") endif() set (${_compilerFlagsVar} ${_compilerFlags} PARENT_SCOPE) endfunction() function (cotire_add_sys_root_paths _pathsVar) if (APPLE) if (CMAKE_OSX_SYSROOT AND CMAKE_${_language}_HAS_ISYSROOT) foreach (_path IN LISTS ${_pathsVar}) if (IS_ABSOLUTE "${_path}") get_filename_component(_path "${CMAKE_OSX_SYSROOT}/${_path}" ABSOLUTE) if (EXISTS "${_path}") list (APPEND ${_pathsVar} "${_path}") endif() endif() endforeach() endif() endif() set (${_pathsVar} ${${_pathsVar}} PARENT_SCOPE) endfunction() function (cotire_get_source_extra_properties _sourceFile _pattern _resultVar) set (_extraProperties ${ARGN}) set (_result "") if (_extraProperties) list (FIND _extraProperties "${_sourceFile}" _index) if (_index GREATER -1) math (EXPR _index "${_index} + 1") list (LENGTH _extraProperties _len) math (EXPR _len "${_len} - 1") foreach (_index RANGE ${_index} ${_len}) list (GET _extraProperties ${_index} _value) if (_value MATCHES "${_pattern}") list (APPEND _result "${_value}") else() break() endif() endforeach() endif() endif() set (${_resultVar} ${_result} PARENT_SCOPE) endfunction() function (cotire_get_source_compile_definitions _config _language _sourceFile _definitionsVar) set (_compileDefinitions "") if (NOT CMAKE_SCRIPT_MODE_FILE) string (TOUPPER "${_config}" _upperConfig) get_source_file_property(_definitions "${_sourceFile}" COMPILE_DEFINITIONS) if (_definitions) list (APPEND _compileDefinitions ${_definitions}) endif() get_source_file_property(_definitions "${_sourceFile}" COMPILE_DEFINITIONS_${_upperConfig}) if (_definitions) list (APPEND _compileDefinitions ${_definitions}) endif() endif() cotire_get_source_extra_properties("${_sourceFile}" "^[a-zA-Z0-9_]+(=.*)?$" _definitions ${ARGN}) if (_definitions) list (APPEND _compileDefinitions ${_definitions}) endif() if (COTIRE_DEBUG AND _compileDefinitions) message (STATUS "Source ${_sourceFile} compile definitions: ${_compileDefinitions}") endif() set (${_definitionsVar} ${_compileDefinitions} PARENT_SCOPE) endfunction() function (cotire_get_source_files_compile_definitions _config _language _definitionsVar) set (_configDefinitions "") foreach (_sourceFile ${ARGN}) cotire_get_source_compile_definitions("${_config}" "${_language}" "${_sourceFile}" _sourceDefinitions) if (_sourceDefinitions) list (APPEND _configDefinitions "${_sourceFile}" ${_sourceDefinitions} "-") endif() endforeach() set (${_definitionsVar} ${_configDefinitions} PARENT_SCOPE) endfunction() function (cotire_get_source_undefs _sourceFile _property _sourceUndefsVar) set (_sourceUndefs "") if (NOT CMAKE_SCRIPT_MODE_FILE) get_source_file_property(_undefs "${_sourceFile}" ${_property}) if (_undefs) list (APPEND _sourceUndefs ${_undefs}) endif() endif() cotire_get_source_extra_properties("${_sourceFile}" "^[a-zA-Z0-9_]+$" _undefs ${ARGN}) if (_undefs) list (APPEND _sourceUndefs ${_undefs}) endif() if (COTIRE_DEBUG AND _sourceUndefs) message (STATUS "Source ${_sourceFile} ${_property} undefs: ${_sourceUndefs}") endif() set (${_sourceUndefsVar} ${_sourceUndefs} PARENT_SCOPE) endfunction() function (cotire_get_source_files_undefs _property _sourceUndefsVar) set (_sourceUndefs "") foreach (_sourceFile ${ARGN}) cotire_get_source_undefs("${_sourceFile}" ${_property} _undefs) if (_undefs) list (APPEND _sourceUndefs "${_sourceFile}" ${_undefs} "-") endif() endforeach() set (${_sourceUndefsVar} ${_sourceUndefs} PARENT_SCOPE) endfunction() macro (cotire_set_cmd_to_prologue _cmdVar) set (${_cmdVar} "${CMAKE_COMMAND}") if (COTIRE_DEBUG) list (APPEND ${_cmdVar} "--warn-uninitialized") endif() list (APPEND ${_cmdVar} "-DCOTIRE_BUILD_TYPE:STRING=$") if (XCODE) list (APPEND ${_cmdVar} "-DXCODE:BOOL=TRUE") endif() if (COTIRE_VERBOSE) list (APPEND ${_cmdVar} "-DCOTIRE_VERBOSE:BOOL=ON") elseif("${CMAKE_GENERATOR}" MATCHES "Makefiles") list (APPEND ${_cmdVar} "-DCOTIRE_VERBOSE:BOOL=$(VERBOSE)") endif() endmacro() function (cotire_init_compile_cmd _cmdVar _language _compilerLauncher _compilerExe _compilerArg1) if (NOT _compilerLauncher) set (_compilerLauncher ${CMAKE_${_language}_COMPILER_LAUNCHER}) endif() if (NOT _compilerExe) set (_compilerExe "${CMAKE_${_language}_COMPILER}") endif() if (NOT _compilerArg1) set (_compilerArg1 ${CMAKE_${_language}_COMPILER_ARG1}) endif() if (WIN32) file (TO_NATIVE_PATH "${_compilerExe}" _compilerExe) endif() string (STRIP "${_compilerArg1}" _compilerArg1) if ("${CMAKE_GENERATOR}" MATCHES "Make|Ninja") # compiler launcher is only supported for Makefile and Ninja set (${_cmdVar} ${_compilerLauncher} "${_compilerExe}" ${_compilerArg1} PARENT_SCOPE) else() set (${_cmdVar} "${_compilerExe}" ${_compilerArg1} PARENT_SCOPE) endif() endfunction() macro (cotire_add_definitions_to_cmd _cmdVar _language) foreach (_definition ${ARGN}) if (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel") list (APPEND ${_cmdVar} "/D${_definition}") else() list (APPEND ${_cmdVar} "-D${_definition}") endif() endforeach() endmacro() function (cotire_add_includes_to_cmd _cmdVar _language _includesVar _systemIncludesVar) set (_includeDirs ${${_includesVar}} ${${_systemIncludesVar}}) if (_includeDirs) list (REMOVE_DUPLICATES _includeDirs) foreach (_include ${_includeDirs}) if (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel") file (TO_NATIVE_PATH "${_include}" _include) list (APPEND ${_cmdVar} "${CMAKE_INCLUDE_FLAG_${_language}}${CMAKE_INCLUDE_FLAG_SEP_${_language}}${_include}") else() set (_index -1) if ("${CMAKE_INCLUDE_SYSTEM_FLAG_${_language}}" MATCHES ".+") list (FIND ${_systemIncludesVar} "${_include}" _index) endif() if (_index GREATER -1) list (APPEND ${_cmdVar} "${CMAKE_INCLUDE_SYSTEM_FLAG_${_language}}${CMAKE_INCLUDE_FLAG_SEP_${_language}}${_include}") else() list (APPEND ${_cmdVar} "${CMAKE_INCLUDE_FLAG_${_language}}${CMAKE_INCLUDE_FLAG_SEP_${_language}}${_include}") endif() endif() endforeach() endif() set (${_cmdVar} ${${_cmdVar}} PARENT_SCOPE) endfunction() function (cotire_add_frameworks_to_cmd _cmdVar _language _includesVar _systemIncludesVar) if (APPLE) set (_frameworkDirs "") foreach (_include ${${_includesVar}}) if (IS_ABSOLUTE "${_include}" AND _include MATCHES "\\.framework$") get_filename_component(_frameworkDir "${_include}" DIRECTORY) list (APPEND _frameworkDirs "${_frameworkDir}") endif() endforeach() set (_systemFrameworkDirs "") foreach (_include ${${_systemIncludesVar}}) if (IS_ABSOLUTE "${_include}" AND _include MATCHES "\\.framework$") get_filename_component(_frameworkDir "${_include}" DIRECTORY) list (APPEND _systemFrameworkDirs "${_frameworkDir}") endif() endforeach() if (_systemFrameworkDirs) list (APPEND _frameworkDirs ${_systemFrameworkDirs}) endif() if (_frameworkDirs) list (REMOVE_DUPLICATES _frameworkDirs) foreach (_frameworkDir ${_frameworkDirs}) set (_index -1) if ("${CMAKE_${_language}_SYSTEM_FRAMEWORK_SEARCH_FLAG}" MATCHES ".+") list (FIND _systemFrameworkDirs "${_frameworkDir}" _index) endif() if (_index GREATER -1) list (APPEND ${_cmdVar} "${CMAKE_${_language}_SYSTEM_FRAMEWORK_SEARCH_FLAG}${_frameworkDir}") else() list (APPEND ${_cmdVar} "${CMAKE_${_language}_FRAMEWORK_SEARCH_FLAG}${_frameworkDir}") endif() endforeach() endif() endif() set (${_cmdVar} ${${_cmdVar}} PARENT_SCOPE) endfunction() macro (cotire_add_compile_flags_to_cmd _cmdVar) foreach (_flag ${ARGN}) list (APPEND ${_cmdVar} "${_flag}") endforeach() endmacro() function (cotire_check_file_up_to_date _fileIsUpToDateVar _file) if (EXISTS "${_file}") set (_triggerFile "") foreach (_dependencyFile ${ARGN}) if (EXISTS "${_dependencyFile}") # IS_NEWER_THAN returns TRUE if both files have the same timestamp # thus we do the comparison in both directions to exclude ties if ("${_dependencyFile}" IS_NEWER_THAN "${_file}" AND NOT "${_file}" IS_NEWER_THAN "${_dependencyFile}") set (_triggerFile "${_dependencyFile}") break() endif() endif() endforeach() if (_triggerFile) if (COTIRE_VERBOSE) get_filename_component(_fileName "${_file}" NAME) message (STATUS "${_fileName} update triggered by ${_triggerFile} change.") endif() set (${_fileIsUpToDateVar} FALSE PARENT_SCOPE) else() if (COTIRE_VERBOSE) get_filename_component(_fileName "${_file}" NAME) message (STATUS "${_fileName} is up-to-date.") endif() set (${_fileIsUpToDateVar} TRUE PARENT_SCOPE) endif() else() if (COTIRE_VERBOSE) get_filename_component(_fileName "${_file}" NAME) message (STATUS "${_fileName} does not exist yet.") endif() set (${_fileIsUpToDateVar} FALSE PARENT_SCOPE) endif() endfunction() macro (cotire_find_closest_relative_path _headerFile _includeDirs _relPathVar) set (${_relPathVar} "") foreach (_includeDir ${_includeDirs}) if (IS_DIRECTORY "${_includeDir}") file (RELATIVE_PATH _relPath "${_includeDir}" "${_headerFile}") if (NOT IS_ABSOLUTE "${_relPath}" AND NOT "${_relPath}" MATCHES "^\\.\\.") string (LENGTH "${${_relPathVar}}" _closestLen) string (LENGTH "${_relPath}" _relLen) if (_closestLen EQUAL 0 OR _relLen LESS _closestLen) set (${_relPathVar} "${_relPath}") endif() endif() elseif ("${_includeDir}" STREQUAL "${_headerFile}") # if path matches exactly, return short non-empty string set (${_relPathVar} "1") break() endif() endforeach() endmacro() macro (cotire_check_header_file_location _headerFile _insideIncludeDirs _outsideIncludeDirs _headerIsInside) # check header path against ignored and honored include directories cotire_find_closest_relative_path("${_headerFile}" "${_insideIncludeDirs}" _insideRelPath) if (_insideRelPath) # header is inside, but could be become outside if there is a shorter outside match cotire_find_closest_relative_path("${_headerFile}" "${_outsideIncludeDirs}" _outsideRelPath) if (_outsideRelPath) string (LENGTH "${_insideRelPath}" _insideRelPathLen) string (LENGTH "${_outsideRelPath}" _outsideRelPathLen) if (_outsideRelPathLen LESS _insideRelPathLen) set (${_headerIsInside} FALSE) else() set (${_headerIsInside} TRUE) endif() else() set (${_headerIsInside} TRUE) endif() else() # header is outside set (${_headerIsInside} FALSE) endif() endmacro() macro (cotire_check_ignore_header_file_path _headerFile _headerIsIgnoredVar) if (NOT EXISTS "${_headerFile}") set (${_headerIsIgnoredVar} TRUE) elseif (IS_DIRECTORY "${_headerFile}") set (${_headerIsIgnoredVar} TRUE) elseif ("${_headerFile}" MATCHES "\\.\\.|[_-]fixed" AND "${_headerFile}" MATCHES "\\.h$") # heuristic: ignore C headers with embedded parent directory references or "-fixed" or "_fixed" in path # these often stem from using GCC #include_next tricks, which may break the precompiled header compilation # with the error message "error: no include path in which to search for header.h" set (${_headerIsIgnoredVar} TRUE) else() set (${_headerIsIgnoredVar} FALSE) endif() endmacro() macro (cotire_check_ignore_header_file_ext _headerFile _ignoreExtensionsVar _headerIsIgnoredVar) # check header file extension cotire_get_source_file_extension("${_headerFile}" _headerFileExt) set (${_headerIsIgnoredVar} FALSE) if (_headerFileExt) list (FIND ${_ignoreExtensionsVar} "${_headerFileExt}" _index) if (_index GREATER -1) set (${_headerIsIgnoredVar} TRUE) endif() endif() endmacro() macro (cotire_parse_line _line _headerFileVar _headerDepthVar) if (MSVC) # cl.exe /showIncludes produces different output, depending on the language pack used, e.g.: # English: "Note: including file: C:\directory\file" # German: "Hinweis: Einlesen der Datei: C:\directory\file" # We use a very general regular expression, relying on the presence of the : characters if (_line MATCHES "( +)([a-zA-Z]:[^:]+)$") string (LENGTH "${CMAKE_MATCH_1}" ${_headerDepthVar}) get_filename_component(${_headerFileVar} "${CMAKE_MATCH_2}" ABSOLUTE) else() set (${_headerFileVar} "") set (${_headerDepthVar} 0) endif() else() if (_line MATCHES "^(\\.+) (.*)$") # GCC like output string (LENGTH "${CMAKE_MATCH_1}" ${_headerDepthVar}) if (IS_ABSOLUTE "${CMAKE_MATCH_2}") set (${_headerFileVar} "${CMAKE_MATCH_2}") else() get_filename_component(${_headerFileVar} "${CMAKE_MATCH_2}" REALPATH) endif() else() set (${_headerFileVar} "") set (${_headerDepthVar} 0) endif() endif() endmacro() function (cotire_parse_includes _language _scanOutput _ignoredIncludeDirs _honoredIncludeDirs _ignoredExtensions _selectedIncludesVar _unparsedLinesVar) if (WIN32) # prevent CMake macro invocation errors due to backslash characters in Windows paths string (REPLACE "\\" "/" _scanOutput "${_scanOutput}") endif() # canonize slashes string (REPLACE "//" "/" _scanOutput "${_scanOutput}") # prevent semicolon from being interpreted as a line separator string (REPLACE ";" "\\;" _scanOutput "${_scanOutput}") # then separate lines string (REGEX REPLACE "\n" ";" _scanOutput "${_scanOutput}") list (LENGTH _scanOutput _len) # remove duplicate lines to speed up parsing list (REMOVE_DUPLICATES _scanOutput) list (LENGTH _scanOutput _uniqueLen) if (COTIRE_VERBOSE OR COTIRE_DEBUG) message (STATUS "Scanning ${_uniqueLen} unique lines of ${_len} for includes") if (_ignoredExtensions) message (STATUS "Ignored extensions: ${_ignoredExtensions}") endif() if (_ignoredIncludeDirs) message (STATUS "Ignored paths: ${_ignoredIncludeDirs}") endif() if (_honoredIncludeDirs) message (STATUS "Included paths: ${_honoredIncludeDirs}") endif() endif() set (_sourceFiles ${ARGN}) set (_selectedIncludes "") set (_unparsedLines "") # stack keeps track of inside/outside project status of processed header files set (_headerIsInsideStack "") foreach (_line IN LISTS _scanOutput) if (_line) cotire_parse_line("${_line}" _headerFile _headerDepth) if (_headerFile) cotire_check_header_file_location("${_headerFile}" "${_ignoredIncludeDirs}" "${_honoredIncludeDirs}" _headerIsInside) if (COTIRE_DEBUG) message (STATUS "${_headerDepth}: ${_headerFile} ${_headerIsInside}") endif() # update stack list (LENGTH _headerIsInsideStack _stackLen) if (_headerDepth GREATER _stackLen) math (EXPR _stackLen "${_stackLen} + 1") foreach (_index RANGE ${_stackLen} ${_headerDepth}) list (APPEND _headerIsInsideStack ${_headerIsInside}) endforeach() else() foreach (_index RANGE ${_headerDepth} ${_stackLen}) list (REMOVE_AT _headerIsInsideStack -1) endforeach() list (APPEND _headerIsInsideStack ${_headerIsInside}) endif() if (COTIRE_DEBUG) message (STATUS "${_headerIsInsideStack}") endif() # header is a candidate if it is outside project if (NOT _headerIsInside) # get parent header file's inside/outside status if (_headerDepth GREATER 1) math (EXPR _index "${_headerDepth} - 2") list (GET _headerIsInsideStack ${_index} _parentHeaderIsInside) else() set (_parentHeaderIsInside TRUE) endif() # select header file if parent header file is inside project # (e.g., a project header file that includes a standard header file) if (_parentHeaderIsInside) cotire_check_ignore_header_file_path("${_headerFile}" _headerIsIgnored) if (NOT _headerIsIgnored) cotire_check_ignore_header_file_ext("${_headerFile}" _ignoredExtensions _headerIsIgnored) if (NOT _headerIsIgnored) list (APPEND _selectedIncludes "${_headerFile}") else() # fix header's inside status on stack, it is ignored by extension now list (REMOVE_AT _headerIsInsideStack -1) list (APPEND _headerIsInsideStack TRUE) endif() endif() if (COTIRE_DEBUG) message (STATUS "${_headerFile} ${_ignoredExtensions} ${_headerIsIgnored}") endif() endif() endif() else() if (MSVC) # for cl.exe do not keep unparsed lines which solely consist of a source file name string (FIND "${_sourceFiles}" "${_line}" _index) if (_index LESS 0) list (APPEND _unparsedLines "${_line}") endif() else() list (APPEND _unparsedLines "${_line}") endif() endif() endif() endforeach() list (REMOVE_DUPLICATES _selectedIncludes) set (${_selectedIncludesVar} ${_selectedIncludes} PARENT_SCOPE) set (${_unparsedLinesVar} ${_unparsedLines} PARENT_SCOPE) endfunction() function (cotire_scan_includes _includesVar) set(_options "") set(_oneValueArgs COMPILER_ID COMPILER_EXECUTABLE COMPILER_ARG1 COMPILER_VERSION LANGUAGE UNPARSED_LINES SCAN_RESULT) set(_multiValueArgs COMPILE_DEFINITIONS COMPILE_FLAGS INCLUDE_DIRECTORIES SYSTEM_INCLUDE_DIRECTORIES IGNORE_PATH INCLUDE_PATH IGNORE_EXTENSIONS INCLUDE_PRIORITY_PATH COMPILER_LAUNCHER) cmake_parse_arguments(_option "${_options}" "${_oneValueArgs}" "${_multiValueArgs}" ${ARGN}) set (_sourceFiles ${_option_UNPARSED_ARGUMENTS}) if (NOT _option_LANGUAGE) set (_option_LANGUAGE "CXX") endif() if (NOT _option_COMPILER_ID) set (_option_COMPILER_ID "${CMAKE_${_option_LANGUAGE}_ID}") endif() if (NOT _option_COMPILER_VERSION) set (_option_COMPILER_VERSION "${CMAKE_${_option_LANGUAGE}_COMPILER_VERSION}") endif() cotire_init_compile_cmd(_cmd "${_option_LANGUAGE}" "${_option_COMPILER_LAUNCHER}" "${_option_COMPILER_EXECUTABLE}" "${_option_COMPILER_ARG1}") cotire_add_definitions_to_cmd(_cmd "${_option_LANGUAGE}" ${_option_COMPILE_DEFINITIONS}) cotire_add_compile_flags_to_cmd(_cmd ${_option_COMPILE_FLAGS}) cotire_add_includes_to_cmd(_cmd "${_option_LANGUAGE}" _option_INCLUDE_DIRECTORIES _option_SYSTEM_INCLUDE_DIRECTORIES) cotire_add_frameworks_to_cmd(_cmd "${_option_LANGUAGE}" _option_INCLUDE_DIRECTORIES _option_SYSTEM_INCLUDE_DIRECTORIES) cotire_add_makedep_flags("${_option_LANGUAGE}" "${_option_COMPILER_ID}" "${_option_COMPILER_VERSION}" _cmd) # only consider existing source files for scanning set (_existingSourceFiles "") foreach (_sourceFile ${_sourceFiles}) if (EXISTS "${_sourceFile}") list (APPEND _existingSourceFiles "${_sourceFile}") endif() endforeach() if (NOT _existingSourceFiles) set (${_includesVar} "" PARENT_SCOPE) return() endif() # add source files to be scanned if (WIN32) foreach (_sourceFile ${_existingSourceFiles}) file (TO_NATIVE_PATH "${_sourceFile}" _sourceFileNative) list (APPEND _cmd "${_sourceFileNative}") endforeach() else() list (APPEND _cmd ${_existingSourceFiles}) endif() if (COTIRE_VERBOSE) message (STATUS "execute_process: ${_cmd}") endif() if (MSVC_IDE OR _option_COMPILER_ID MATCHES "MSVC") # cl.exe messes with the output streams unless the environment variable VS_UNICODE_OUTPUT is cleared unset (ENV{VS_UNICODE_OUTPUT}) endif() execute_process( COMMAND ${_cmd} WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" RESULT_VARIABLE _result OUTPUT_QUIET ERROR_VARIABLE _output) if (_result) message (STATUS "Result ${_result} scanning includes of ${_existingSourceFiles}.") endif() cotire_parse_includes( "${_option_LANGUAGE}" "${_output}" "${_option_IGNORE_PATH}" "${_option_INCLUDE_PATH}" "${_option_IGNORE_EXTENSIONS}" _includes _unparsedLines ${_sourceFiles}) if (_option_INCLUDE_PRIORITY_PATH) set (_sortedIncludes "") foreach (_priorityPath ${_option_INCLUDE_PRIORITY_PATH}) foreach (_include ${_includes}) string (FIND ${_include} ${_priorityPath} _position) if (_position GREATER -1) list (APPEND _sortedIncludes ${_include}) endif() endforeach() endforeach() if (_sortedIncludes) list (INSERT _includes 0 ${_sortedIncludes}) list (REMOVE_DUPLICATES _includes) endif() endif() set (${_includesVar} ${_includes} PARENT_SCOPE) if (_option_UNPARSED_LINES) set (${_option_UNPARSED_LINES} ${_unparsedLines} PARENT_SCOPE) endif() if (_option_SCAN_RESULT) set (${_option_SCAN_RESULT} ${_result} PARENT_SCOPE) endif() endfunction() macro (cotire_append_undefs _contentsVar) set (_undefs ${ARGN}) if (_undefs) list (REMOVE_DUPLICATES _undefs) foreach (_definition ${_undefs}) list (APPEND ${_contentsVar} "#undef ${_definition}") endforeach() endif() endmacro() macro (cotire_comment_str _language _commentText _commentVar) if ("${_language}" STREQUAL "CMAKE") set (${_commentVar} "# ${_commentText}") else() set (${_commentVar} "/* ${_commentText} */") endif() endmacro() function (cotire_write_file _language _file _contents _force) get_filename_component(_moduleName "${COTIRE_CMAKE_MODULE_FILE}" NAME) cotire_comment_str("${_language}" "${_moduleName} ${COTIRE_CMAKE_MODULE_VERSION} generated file" _header1) cotire_comment_str("${_language}" "${_file}" _header2) set (_contents "${_header1}\n${_header2}\n${_contents}") if (COTIRE_DEBUG) message (STATUS "${_contents}") endif() if (_force OR NOT EXISTS "${_file}") file (WRITE "${_file}" "${_contents}") else() file (READ "${_file}" _oldContents) if (NOT "${_oldContents}" STREQUAL "${_contents}") file (WRITE "${_file}" "${_contents}") else() if (COTIRE_DEBUG) message (STATUS "${_file} unchanged") endif() endif() endif() endfunction() function (cotire_generate_unity_source _unityFile) set(_options "") set(_oneValueArgs LANGUAGE) set(_multiValueArgs DEPENDS SOURCES_COMPILE_DEFINITIONS PRE_UNDEFS SOURCES_PRE_UNDEFS POST_UNDEFS SOURCES_POST_UNDEFS PROLOGUE EPILOGUE) cmake_parse_arguments(_option "${_options}" "${_oneValueArgs}" "${_multiValueArgs}" ${ARGN}) if (_option_DEPENDS) cotire_check_file_up_to_date(_unityFileIsUpToDate "${_unityFile}" ${_option_DEPENDS}) if (_unityFileIsUpToDate) return() endif() endif() set (_sourceFiles ${_option_UNPARSED_ARGUMENTS}) if (NOT _option_PRE_UNDEFS) set (_option_PRE_UNDEFS "") endif() if (NOT _option_SOURCES_PRE_UNDEFS) set (_option_SOURCES_PRE_UNDEFS "") endif() if (NOT _option_POST_UNDEFS) set (_option_POST_UNDEFS "") endif() if (NOT _option_SOURCES_POST_UNDEFS) set (_option_SOURCES_POST_UNDEFS "") endif() set (_contents "") if (_option_PROLOGUE) list (APPEND _contents ${_option_PROLOGUE}) endif() if (_option_LANGUAGE AND _sourceFiles) if ("${_option_LANGUAGE}" STREQUAL "CXX") list (APPEND _contents "#ifdef __cplusplus") elseif ("${_option_LANGUAGE}" STREQUAL "C") list (APPEND _contents "#ifndef __cplusplus") endif() endif() set (_compileUndefinitions "") foreach (_sourceFile ${_sourceFiles}) cotire_get_source_compile_definitions( "${_option_CONFIGURATION}" "${_option_LANGUAGE}" "${_sourceFile}" _compileDefinitions ${_option_SOURCES_COMPILE_DEFINITIONS}) cotire_get_source_undefs("${_sourceFile}" COTIRE_UNITY_SOURCE_PRE_UNDEFS _sourcePreUndefs ${_option_SOURCES_PRE_UNDEFS}) cotire_get_source_undefs("${_sourceFile}" COTIRE_UNITY_SOURCE_POST_UNDEFS _sourcePostUndefs ${_option_SOURCES_POST_UNDEFS}) if (_option_PRE_UNDEFS) list (APPEND _compileUndefinitions ${_option_PRE_UNDEFS}) endif() if (_sourcePreUndefs) list (APPEND _compileUndefinitions ${_sourcePreUndefs}) endif() if (_compileUndefinitions) cotire_append_undefs(_contents ${_compileUndefinitions}) set (_compileUndefinitions "") endif() if (_sourcePostUndefs) list (APPEND _compileUndefinitions ${_sourcePostUndefs}) endif() if (_option_POST_UNDEFS) list (APPEND _compileUndefinitions ${_option_POST_UNDEFS}) endif() foreach (_definition ${_compileDefinitions}) if (_definition MATCHES "^([a-zA-Z0-9_]+)=(.+)$") list (APPEND _contents "#define ${CMAKE_MATCH_1} ${CMAKE_MATCH_2}") list (INSERT _compileUndefinitions 0 "${CMAKE_MATCH_1}") else() list (APPEND _contents "#define ${_definition}") list (INSERT _compileUndefinitions 0 "${_definition}") endif() endforeach() # use absolute path as source file location get_filename_component(_sourceFileLocation "${_sourceFile}" ABSOLUTE) if (WIN32) file (TO_NATIVE_PATH "${_sourceFileLocation}" _sourceFileLocation) endif() list (APPEND _contents "#include \"${_sourceFileLocation}\"") endforeach() if (_compileUndefinitions) cotire_append_undefs(_contents ${_compileUndefinitions}) set (_compileUndefinitions "") endif() if (_option_LANGUAGE AND _sourceFiles) list (APPEND _contents "#endif") endif() if (_option_EPILOGUE) list (APPEND _contents ${_option_EPILOGUE}) endif() list (APPEND _contents "") string (REPLACE ";" "\n" _contents "${_contents}") if (COTIRE_VERBOSE) message ("${_contents}") endif() cotire_write_file("${_option_LANGUAGE}" "${_unityFile}" "${_contents}" TRUE) endfunction() function (cotire_generate_prefix_header _prefixFile) set(_options "") set(_oneValueArgs LANGUAGE COMPILER_EXECUTABLE COMPILER_ARG1 COMPILER_ID COMPILER_VERSION) set(_multiValueArgs DEPENDS COMPILE_DEFINITIONS COMPILE_FLAGS INCLUDE_DIRECTORIES SYSTEM_INCLUDE_DIRECTORIES IGNORE_PATH INCLUDE_PATH IGNORE_EXTENSIONS INCLUDE_PRIORITY_PATH COMPILER_LAUNCHER) cmake_parse_arguments(_option "${_options}" "${_oneValueArgs}" "${_multiValueArgs}" ${ARGN}) if (NOT _option_COMPILER_ID) set (_option_COMPILER_ID "${CMAKE_${_option_LANGUAGE}_ID}") endif() if (NOT _option_COMPILER_VERSION) set (_option_COMPILER_VERSION "${CMAKE_${_option_LANGUAGE}_COMPILER_VERSION}") endif() if (_option_DEPENDS) cotire_check_file_up_to_date(_prefixFileIsUpToDate "${_prefixFile}" ${_option_DEPENDS}) if (_prefixFileIsUpToDate) # create empty log file set (_unparsedLinesFile "${_prefixFile}.log") file (WRITE "${_unparsedLinesFile}" "") return() endif() endif() set (_prologue "") set (_epilogue "") if (_option_COMPILER_ID MATCHES "Clang") set (_prologue "#pragma clang system_header") elseif (_option_COMPILER_ID MATCHES "GNU") set (_prologue "#pragma GCC system_header") elseif (_option_COMPILER_ID MATCHES "MSVC") set (_prologue "#pragma warning(push, 0)") set (_epilogue "#pragma warning(pop)") elseif (_option_COMPILER_ID MATCHES "Intel") # Intel compiler requires hdrstop pragma to stop generating PCH file set (_epilogue "#pragma hdrstop") endif() set (_sourceFiles ${_option_UNPARSED_ARGUMENTS}) cotire_scan_includes(_selectedHeaders ${_sourceFiles} LANGUAGE "${_option_LANGUAGE}" COMPILER_LAUNCHER "${_option_COMPILER_LAUNCHER}" COMPILER_EXECUTABLE "${_option_COMPILER_EXECUTABLE}" COMPILER_ARG1 "${_option_COMPILER_ARG1}" COMPILER_ID "${_option_COMPILER_ID}" COMPILER_VERSION "${_option_COMPILER_VERSION}" COMPILE_DEFINITIONS ${_option_COMPILE_DEFINITIONS} COMPILE_FLAGS ${_option_COMPILE_FLAGS} INCLUDE_DIRECTORIES ${_option_INCLUDE_DIRECTORIES} SYSTEM_INCLUDE_DIRECTORIES ${_option_SYSTEM_INCLUDE_DIRECTORIES} IGNORE_PATH ${_option_IGNORE_PATH} INCLUDE_PATH ${_option_INCLUDE_PATH} IGNORE_EXTENSIONS ${_option_IGNORE_EXTENSIONS} INCLUDE_PRIORITY_PATH ${_option_INCLUDE_PRIORITY_PATH} UNPARSED_LINES _unparsedLines SCAN_RESULT _scanResult) cotire_generate_unity_source("${_prefixFile}" PROLOGUE ${_prologue} EPILOGUE ${_epilogue} LANGUAGE "${_option_LANGUAGE}" ${_selectedHeaders}) set (_unparsedLinesFile "${_prefixFile}.log") if (_unparsedLines) if (COTIRE_VERBOSE OR _scanResult OR NOT _selectedHeaders) list (LENGTH _unparsedLines _skippedLineCount) if (WIN32) file (TO_NATIVE_PATH "${_unparsedLinesFile}" _unparsedLinesLogPath) else() set (_unparsedLinesLogPath "${_unparsedLinesFile}") endif() message (STATUS "${_skippedLineCount} line(s) skipped, see ${_unparsedLinesLogPath}") endif() string (REPLACE ";" "\n" _unparsedLines "${_unparsedLines}") endif() file (WRITE "${_unparsedLinesFile}" "${_unparsedLines}\n") endfunction() function (cotire_add_makedep_flags _language _compilerID _compilerVersion _flagsVar) set (_flags ${${_flagsVar}}) if (_compilerID MATCHES "MSVC") # cl.exe options used # /nologo suppresses display of sign-on banner # /TC treat all files named on the command line as C source files # /TP treat all files named on the command line as C++ source files # /EP preprocess to stdout without #line directives # /showIncludes list include files set (_sourceFileTypeC "/TC") set (_sourceFileTypeCXX "/TP") if (_flags) # append to list list (APPEND _flags /nologo "${_sourceFileType${_language}}" /EP /showIncludes) else() # return as a flag string set (_flags "${_sourceFileType${_language}} /EP /showIncludes") endif() elseif (_compilerID MATCHES "GNU") # GCC options used # -H print the name of each header file used # -E invoke preprocessor # -fdirectives-only do not expand macros, requires GCC >= 4.3 if (_flags) # append to list list (APPEND _flags -H -E) if (NOT "${_compilerVersion}" VERSION_LESS "4.3.0") list (APPEND _flags -fdirectives-only) endif() else() # return as a flag string set (_flags "-H -E") if (NOT "${_compilerVersion}" VERSION_LESS "4.3.0") set (_flags "${_flags} -fdirectives-only") endif() endif() elseif (_compilerID MATCHES "Clang") if (UNIX) # Clang options used # -H print the name of each header file used # -E invoke preprocessor # -fno-color-diagnostics do not print diagnostics in color # -Eonly just run preprocessor, no output if (_flags) # append to list list (APPEND _flags -H -E -fno-color-diagnostics -Xclang -Eonly) else() # return as a flag string set (_flags "-H -E -fno-color-diagnostics -Xclang -Eonly") endif() elseif (WIN32) # Clang-cl.exe options used # /TC treat all files named on the command line as C source files # /TP treat all files named on the command line as C++ source files # /EP preprocess to stdout without #line directives # -H print the name of each header file used # -fno-color-diagnostics do not print diagnostics in color # -Eonly just run preprocessor, no output set (_sourceFileTypeC "/TC") set (_sourceFileTypeCXX "/TP") if (_flags) # append to list list (APPEND _flags "${_sourceFileType${_language}}" /EP -fno-color-diagnostics -Xclang -H -Xclang -Eonly) else() # return as a flag string set (_flags "${_sourceFileType${_language}} /EP -fno-color-diagnostics -Xclang -H -Xclang -Eonly") endif() endif() elseif (_compilerID MATCHES "Intel") if (WIN32) # Windows Intel options used # /nologo do not display compiler version information # /QH display the include file order # /EP preprocess to stdout, omitting #line directives # /TC process all source or unrecognized file types as C source files # /TP process all source or unrecognized file types as C++ source files set (_sourceFileTypeC "/TC") set (_sourceFileTypeCXX "/TP") if (_flags) # append to list list (APPEND _flags /nologo "${_sourceFileType${_language}}" /EP /QH) else() # return as a flag string set (_flags "${_sourceFileType${_language}} /EP /QH") endif() else() # Linux / Mac OS X Intel options used # -H print the name of each header file used # -EP preprocess to stdout, omitting #line directives # -Kc++ process all source or unrecognized file types as C++ source files if (_flags) # append to list if ("${_language}" STREQUAL "CXX") list (APPEND _flags -Kc++) endif() list (APPEND _flags -H -EP) else() # return as a flag string if ("${_language}" STREQUAL "CXX") set (_flags "-Kc++ ") endif() set (_flags "${_flags}-H -EP") endif() endif() else() message (FATAL_ERROR "cotire: unsupported ${_language} compiler ${_compilerID} version ${_compilerVersion}.") endif() set (${_flagsVar} ${_flags} PARENT_SCOPE) endfunction() function (cotire_add_pch_compilation_flags _language _compilerID _compilerVersion _prefixFile _pchFile _hostFile _flagsVar) set (_flags ${${_flagsVar}}) if (_compilerID MATCHES "MSVC") file (TO_NATIVE_PATH "${_prefixFile}" _prefixFileNative) file (TO_NATIVE_PATH "${_pchFile}" _pchFileNative) file (TO_NATIVE_PATH "${_hostFile}" _hostFileNative) # cl.exe options used # /Yc creates a precompiled header file # /Fp specifies precompiled header binary file name # /FI forces inclusion of file # /TC treat all files named on the command line as C source files # /TP treat all files named on the command line as C++ source files # /Zs syntax check only # /Zm precompiled header memory allocation scaling factor set (_sourceFileTypeC "/TC") set (_sourceFileTypeCXX "/TP") if (_flags) # append to list list (APPEND _flags /nologo "${_sourceFileType${_language}}" "/Yc${_prefixFileNative}" "/Fp${_pchFileNative}" "/FI${_prefixFileNative}" /Zs "${_hostFileNative}") if (COTIRE_PCH_MEMORY_SCALING_FACTOR) list (APPEND _flags "/Zm${COTIRE_PCH_MEMORY_SCALING_FACTOR}") endif() else() # return as a flag string set (_flags "/Yc\"${_prefixFileNative}\" /Fp\"${_pchFileNative}\" /FI\"${_prefixFileNative}\"") if (COTIRE_PCH_MEMORY_SCALING_FACTOR) set (_flags "${_flags} /Zm${COTIRE_PCH_MEMORY_SCALING_FACTOR}") endif() endif() elseif (_compilerID MATCHES "GNU") # GCC options used # -x specify the source language # -c compile but do not link # -o place output in file # note that we cannot use -w to suppress all warnings upon pre-compiling, because turning off a warning may # alter compile flags as a side effect (e.g., -Wwrite-string implies -fconst-strings) set (_xLanguage_C "c-header") set (_xLanguage_CXX "c++-header") if (_flags) # append to list list (APPEND _flags -x "${_xLanguage_${_language}}" -c "${_prefixFile}" -o "${_pchFile}") else() # return as a flag string set (_flags "-x ${_xLanguage_${_language}} -c \"${_prefixFile}\" -o \"${_pchFile}\"") endif() elseif (_compilerID MATCHES "Clang") if (UNIX) # Clang options used # -x specify the source language # -c compile but do not link # -o place output in file # -fno-pch-timestamp disable inclusion of timestamp in precompiled headers (clang 4.0.0+) set (_xLanguage_C "c-header") set (_xLanguage_CXX "c++-header") if (_flags) # append to list list (APPEND _flags -x "${_xLanguage_${_language}}" -c "${_prefixFile}" -o "${_pchFile}") if (NOT "${_compilerVersion}" VERSION_LESS "4.0.0") list (APPEND _flags -Xclang -fno-pch-timestamp) endif() else() # return as a flag string set (_flags "-x ${_xLanguage_${_language}} -c \"${_prefixFile}\" -o \"${_pchFile}\"") if (NOT "${_compilerVersion}" VERSION_LESS "4.0.0") set (_flags "${_flags} -Xclang -fno-pch-timestamp") endif() endif() elseif (WIN32) # Clang-cl.exe options used # /Yc creates a precompiled header file # /Fp specifies precompiled header binary file name # /FI forces inclusion of file # /Zs syntax check only # /TC treat all files named on the command line as C source files # /TP treat all files named on the command line as C++ source files set (_sourceFileTypeC "/TC") set (_sourceFileTypeCXX "/TP") if (_flags) # append to list list (APPEND _flags "${_sourceFileType${_language}}" "/Yc${_prefixFile}" "/Fp${_pchFile}" "/FI${_prefixFile}" /Zs "${_hostFile}") else() # return as a flag string set (_flags "/Yc\"${_prefixFile}\" /Fp\"${_pchFile}\" /FI\"${_prefixFile}\"") endif() endif() elseif (_compilerID MATCHES "Intel") if (WIN32) file (TO_NATIVE_PATH "${_prefixFile}" _prefixFileNative) file (TO_NATIVE_PATH "${_pchFile}" _pchFileNative) file (TO_NATIVE_PATH "${_hostFile}" _hostFileNative) # Windows Intel options used # /nologo do not display compiler version information # /Yc create a precompiled header (PCH) file # /Fp specify a path or file name for precompiled header files # /FI tells the preprocessor to include a specified file name as the header file # /TC process all source or unrecognized file types as C source files # /TP process all source or unrecognized file types as C++ source files # /Zs syntax check only # /Wpch-messages enable diagnostics related to pre-compiled headers (requires Intel XE 2013 Update 2) set (_sourceFileTypeC "/TC") set (_sourceFileTypeCXX "/TP") if (_flags) # append to list list (APPEND _flags /nologo "${_sourceFileType${_language}}" "/Yc" "/Fp${_pchFileNative}" "/FI${_prefixFileNative}" /Zs "${_hostFileNative}") if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") list (APPEND _flags "/Wpch-messages") endif() else() # return as a flag string set (_flags "/Yc /Fp\"${_pchFileNative}\" /FI\"${_prefixFileNative}\"") if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") set (_flags "${_flags} /Wpch-messages") endif() endif() else() # Linux / Mac OS X Intel options used # -pch-dir location for precompiled header files # -pch-create name of the precompiled header (PCH) to create # -Kc++ process all source or unrecognized file types as C++ source files # -fsyntax-only check only for correct syntax # -Wpch-messages enable diagnostics related to pre-compiled headers (requires Intel XE 2013 Update 2) get_filename_component(_pchDir "${_pchFile}" DIRECTORY) get_filename_component(_pchName "${_pchFile}" NAME) set (_xLanguage_C "c-header") set (_xLanguage_CXX "c++-header") set (_pchSuppressMessages FALSE) if ("${CMAKE_${_language}_FLAGS}" MATCHES ".*-Wno-pch-messages.*") set(_pchSuppressMessages TRUE) endif() if (_flags) # append to list if ("${_language}" STREQUAL "CXX") list (APPEND _flags -Kc++) endif() list (APPEND _flags -include "${_prefixFile}" -pch-dir "${_pchDir}" -pch-create "${_pchName}" -fsyntax-only "${_hostFile}") if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") if (NOT _pchSuppressMessages) list (APPEND _flags -Wpch-messages) endif() endif() else() # return as a flag string set (_flags "-include \"${_prefixFile}\" -pch-dir \"${_pchDir}\" -pch-create \"${_pchName}\"") if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") if (NOT _pchSuppressMessages) set (_flags "${_flags} -Wpch-messages") endif() endif() endif() endif() else() message (FATAL_ERROR "cotire: unsupported ${_language} compiler ${_compilerID} version ${_compilerVersion}.") endif() set (${_flagsVar} ${_flags} PARENT_SCOPE) endfunction() function (cotire_add_prefix_pch_inclusion_flags _language _compilerID _compilerVersion _prefixFile _pchFile _flagsVar) set (_flags ${${_flagsVar}}) if (_compilerID MATCHES "MSVC") file (TO_NATIVE_PATH "${_prefixFile}" _prefixFileNative) # cl.exe options used # /Yu uses a precompiled header file during build # /Fp specifies precompiled header binary file name # /FI forces inclusion of file # /Zm precompiled header memory allocation scaling factor if (_pchFile) file (TO_NATIVE_PATH "${_pchFile}" _pchFileNative) if (_flags) # append to list list (APPEND _flags "/Yu${_prefixFileNative}" "/Fp${_pchFileNative}" "/FI${_prefixFileNative}") if (COTIRE_PCH_MEMORY_SCALING_FACTOR) list (APPEND _flags "/Zm${COTIRE_PCH_MEMORY_SCALING_FACTOR}") endif() else() # return as a flag string set (_flags "/Yu\"${_prefixFileNative}\" /Fp\"${_pchFileNative}\" /FI\"${_prefixFileNative}\"") if (COTIRE_PCH_MEMORY_SCALING_FACTOR) set (_flags "${_flags} /Zm${COTIRE_PCH_MEMORY_SCALING_FACTOR}") endif() endif() else() # no precompiled header, force inclusion of prefix header if (_flags) # append to list list (APPEND _flags "/FI${_prefixFileNative}") else() # return as a flag string set (_flags "/FI\"${_prefixFileNative}\"") endif() endif() elseif (_compilerID MATCHES "GNU") # GCC options used # -include process include file as the first line of the primary source file # -Winvalid-pch warns if precompiled header is found but cannot be used # note: ccache requires the -include flag to be used in order to process precompiled header correctly if (_flags) # append to list list (APPEND _flags -Winvalid-pch -include "${_prefixFile}") else() # return as a flag string set (_flags "-Winvalid-pch -include \"${_prefixFile}\"") endif() elseif (_compilerID MATCHES "Clang") if (UNIX) # Clang options used # -include process include file as the first line of the primary source file # note: ccache requires the -include flag to be used in order to process precompiled header correctly if (_flags) # append to list list (APPEND _flags -include "${_prefixFile}") else() # return as a flag string set (_flags "-include \"${_prefixFile}\"") endif() elseif (WIN32) # Clang-cl.exe options used # /Yu uses a precompiled header file during build # /Fp specifies precompiled header binary file name # /FI forces inclusion of file if (_pchFile) if (_flags) # append to list list (APPEND _flags "/Yu${_prefixFile}" "/Fp${_pchFile}" "/FI${_prefixFile}") else() # return as a flag string set (_flags "/Yu\"${_prefixFile}\" /Fp\"${_pchFile}\" /FI\"${_prefixFile}\"") endif() else() # no precompiled header, force inclusion of prefix header if (_flags) # append to list list (APPEND _flags "/FI${_prefixFile}") else() # return as a flag string set (_flags "/FI\"${_prefixFile}\"") endif() endif() endif() elseif (_compilerID MATCHES "Intel") if (WIN32) file (TO_NATIVE_PATH "${_prefixFile}" _prefixFileNative) # Windows Intel options used # /Yu use a precompiled header (PCH) file # /Fp specify a path or file name for precompiled header files # /FI tells the preprocessor to include a specified file name as the header file # /Wpch-messages enable diagnostics related to pre-compiled headers (requires Intel XE 2013 Update 2) if (_pchFile) file (TO_NATIVE_PATH "${_pchFile}" _pchFileNative) if (_flags) # append to list list (APPEND _flags "/Yu" "/Fp${_pchFileNative}" "/FI${_prefixFileNative}") if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") list (APPEND _flags "/Wpch-messages") endif() else() # return as a flag string set (_flags "/Yu /Fp\"${_pchFileNative}\" /FI\"${_prefixFileNative}\"") if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") set (_flags "${_flags} /Wpch-messages") endif() endif() else() # no precompiled header, force inclusion of prefix header if (_flags) # append to list list (APPEND _flags "/FI${_prefixFileNative}") else() # return as a flag string set (_flags "/FI\"${_prefixFileNative}\"") endif() endif() else() # Linux / Mac OS X Intel options used # -pch-dir location for precompiled header files # -pch-use name of the precompiled header (PCH) to use # -include process include file as the first line of the primary source file # -Wpch-messages enable diagnostics related to pre-compiled headers (requires Intel XE 2013 Update 2) if (_pchFile) get_filename_component(_pchDir "${_pchFile}" DIRECTORY) get_filename_component(_pchName "${_pchFile}" NAME) set (_pchSuppressMessages FALSE) if ("${CMAKE_${_language}_FLAGS}" MATCHES ".*-Wno-pch-messages.*") set(_pchSuppressMessages TRUE) endif() if (_flags) # append to list list (APPEND _flags -include "${_prefixFile}" -pch-dir "${_pchDir}" -pch-use "${_pchName}") if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") if (NOT _pchSuppressMessages) list (APPEND _flags -Wpch-messages) endif() endif() else() # return as a flag string set (_flags "-include \"${_prefixFile}\" -pch-dir \"${_pchDir}\" -pch-use \"${_pchName}\"") if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") if (NOT _pchSuppressMessages) set (_flags "${_flags} -Wpch-messages") endif() endif() endif() else() # no precompiled header, force inclusion of prefix header if (_flags) # append to list list (APPEND _flags -include "${_prefixFile}") else() # return as a flag string set (_flags "-include \"${_prefixFile}\"") endif() endif() endif() else() message (FATAL_ERROR "cotire: unsupported ${_language} compiler ${_compilerID} version ${_compilerVersion}.") endif() set (${_flagsVar} ${_flags} PARENT_SCOPE) endfunction() function (cotire_precompile_prefix_header _prefixFile _pchFile _hostFile) set(_options "") set(_oneValueArgs COMPILER_EXECUTABLE COMPILER_ARG1 COMPILER_ID COMPILER_VERSION LANGUAGE) set(_multiValueArgs COMPILE_DEFINITIONS COMPILE_FLAGS INCLUDE_DIRECTORIES SYSTEM_INCLUDE_DIRECTORIES SYS COMPILER_LAUNCHER) cmake_parse_arguments(_option "${_options}" "${_oneValueArgs}" "${_multiValueArgs}" ${ARGN}) if (NOT _option_LANGUAGE) set (_option_LANGUAGE "CXX") endif() if (NOT _option_COMPILER_ID) set (_option_COMPILER_ID "${CMAKE_${_option_LANGUAGE}_ID}") endif() if (NOT _option_COMPILER_VERSION) set (_option_COMPILER_VERSION "${CMAKE_${_option_LANGUAGE}_COMPILER_VERSION}") endif() cotire_init_compile_cmd(_cmd "${_option_LANGUAGE}" "${_option_COMPILER_LAUNCHER}" "${_option_COMPILER_EXECUTABLE}" "${_option_COMPILER_ARG1}") cotire_add_definitions_to_cmd(_cmd "${_option_LANGUAGE}" ${_option_COMPILE_DEFINITIONS}) cotire_add_compile_flags_to_cmd(_cmd ${_option_COMPILE_FLAGS}) cotire_add_includes_to_cmd(_cmd "${_option_LANGUAGE}" _option_INCLUDE_DIRECTORIES _option_SYSTEM_INCLUDE_DIRECTORIES) cotire_add_frameworks_to_cmd(_cmd "${_option_LANGUAGE}" _option_INCLUDE_DIRECTORIES _option_SYSTEM_INCLUDE_DIRECTORIES) cotire_add_pch_compilation_flags( "${_option_LANGUAGE}" "${_option_COMPILER_ID}" "${_option_COMPILER_VERSION}" "${_prefixFile}" "${_pchFile}" "${_hostFile}" _cmd) if (COTIRE_VERBOSE) message (STATUS "execute_process: ${_cmd}") endif() if (MSVC_IDE OR _option_COMPILER_ID MATCHES "MSVC") # cl.exe messes with the output streams unless the environment variable VS_UNICODE_OUTPUT is cleared unset (ENV{VS_UNICODE_OUTPUT}) elseif (_option_COMPILER_ID MATCHES "Clang" AND _option_COMPILER_VERSION VERSION_LESS "4.0.0") if (_option_COMPILER_LAUNCHER MATCHES "ccache" OR _option_COMPILER_EXECUTABLE MATCHES "ccache") # Newer versions of Clang embed a compilation timestamp into the precompiled header binary, # which results in "file has been modified since the precompiled header was built" errors if ccache is used. # We work around the problem by disabling ccache upon pre-compiling the prefix header. set (ENV{CCACHE_DISABLE} "true") endif() endif() execute_process( COMMAND ${_cmd} WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" RESULT_VARIABLE _result) if (_result) message (FATAL_ERROR "cotire: error ${_result} precompiling ${_prefixFile}.") endif() endfunction() function (cotire_check_precompiled_header_support _language _target _msgVar) set (_unsupportedCompiler "Precompiled headers not supported for ${_language} compiler ${CMAKE_${_language}_COMPILER_ID}") if (CMAKE_${_language}_COMPILER_ID MATCHES "MSVC") # PCH supported since Visual Studio C++ 6.0 # and CMake does not support an earlier version set (${_msgVar} "" PARENT_SCOPE) elseif (CMAKE_${_language}_COMPILER_ID MATCHES "GNU") # GCC PCH support requires version >= 3.4 if ("${CMAKE_${_language}_COMPILER_VERSION}" VERSION_LESS "3.4.0") set (${_msgVar} "${_unsupportedCompiler} version ${CMAKE_${_language}_COMPILER_VERSION}." PARENT_SCOPE) else() set (${_msgVar} "" PARENT_SCOPE) endif() elseif (CMAKE_${_language}_COMPILER_ID MATCHES "Clang") if (UNIX) # all Unix Clang versions have PCH support set (${_msgVar} "" PARENT_SCOPE) elseif (WIN32) # only clang-cl is supported under Windows get_filename_component(_compilerName "${CMAKE_${_language}_COMPILER}" NAME_WE) if (NOT _compilerName MATCHES "cl$") set (${_msgVar} "${_unsupportedCompiler} version ${CMAKE_${_language}_COMPILER_VERSION}. Use clang-cl instead." PARENT_SCOPE) endif() endif() elseif (CMAKE_${_language}_COMPILER_ID MATCHES "Intel") # Intel PCH support requires version >= 8.0.0 if ("${CMAKE_${_language}_COMPILER_VERSION}" VERSION_LESS "8.0.0") set (${_msgVar} "${_unsupportedCompiler} version ${CMAKE_${_language}_COMPILER_VERSION}." PARENT_SCOPE) else() set (${_msgVar} "" PARENT_SCOPE) endif() else() set (${_msgVar} "${_unsupportedCompiler}." PARENT_SCOPE) endif() # check if ccache is used as a compiler launcher get_target_property(_launcher ${_target} ${_language}_COMPILER_LAUNCHER) get_filename_component(_realCompilerExe "${CMAKE_${_language}_COMPILER}" REALPATH) if (_realCompilerExe MATCHES "ccache" OR _launcher MATCHES "ccache") # verify that ccache configuration is compatible with precompiled headers # always check environment variable CCACHE_SLOPPINESS, because earlier versions of ccache # do not report the "sloppiness" setting correctly upon printing ccache configuration if (DEFINED ENV{CCACHE_SLOPPINESS}) if (NOT "$ENV{CCACHE_SLOPPINESS}" MATCHES "pch_defines" OR NOT "$ENV{CCACHE_SLOPPINESS}" MATCHES "time_macros") set (${_msgVar} "ccache requires the environment variable CCACHE_SLOPPINESS to be set to \"pch_defines,time_macros\"." PARENT_SCOPE) endif() else() if (_realCompilerExe MATCHES "ccache") set (_ccacheExe "${_realCompilerExe}") else() set (_ccacheExe "${_launcher}") endif() execute_process( COMMAND "${_ccacheExe}" "--print-config" WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" RESULT_VARIABLE _result OUTPUT_VARIABLE _ccacheConfig OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET) if (_result) set (${_msgVar} "ccache configuration cannot be determined." PARENT_SCOPE) elseif (NOT _ccacheConfig MATCHES "sloppiness.*=.*time_macros" OR NOT _ccacheConfig MATCHES "sloppiness.*=.*pch_defines") set (${_msgVar} "ccache requires configuration setting \"sloppiness\" to be set to \"pch_defines,time_macros\"." PARENT_SCOPE) endif() endif() endif() if (APPLE) # PCH compilation not supported by GCC / Clang for multi-architecture builds (e.g., i386, x86_64) cotire_get_configuration_types(_configs) foreach (_config ${_configs}) set (_targetFlags "") cotire_get_target_compile_flags("${_config}" "${_language}" "${_target}" _targetFlags) cotire_filter_compile_flags("${_language}" "arch" _architectures _ignore ${_targetFlags}) list (LENGTH _architectures _numberOfArchitectures) if (_numberOfArchitectures GREATER 1) string (REPLACE ";" ", " _architectureStr "${_architectures}") set (${_msgVar} "Precompiled headers not supported on Darwin for multi-architecture builds (${_architectureStr})." PARENT_SCOPE) break() endif() endforeach() endif() endfunction() macro (cotire_get_intermediate_dir _cotireDir) # ${CMAKE_CFG_INTDIR} may reference a build-time variable when using a generator which supports configuration types get_filename_component(${_cotireDir} "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${COTIRE_INTDIR}" ABSOLUTE) endmacro() macro (cotire_setup_file_extension_variables) set (_unityFileExt_C ".c") set (_unityFileExt_CXX ".cxx") set (_prefixFileExt_C ".h") set (_prefixFileExt_CXX ".hxx") set (_prefixSourceFileExt_C ".c") set (_prefixSourceFileExt_CXX ".cxx") endmacro() function (cotire_make_single_unity_source_file_path _language _target _unityFileVar) cotire_setup_file_extension_variables() if (NOT DEFINED _unityFileExt_${_language}) set (${_unityFileVar} "" PARENT_SCOPE) return() endif() set (_unityFileBaseName "${_target}_${_language}${COTIRE_UNITY_SOURCE_FILENAME_SUFFIX}") set (_unityFileName "${_unityFileBaseName}${_unityFileExt_${_language}}") cotire_get_intermediate_dir(_baseDir) set (_unityFile "${_baseDir}/${_unityFileName}") set (${_unityFileVar} "${_unityFile}" PARENT_SCOPE) endfunction() function (cotire_make_unity_source_file_paths _language _target _maxIncludes _unityFilesVar) cotire_setup_file_extension_variables() if (NOT DEFINED _unityFileExt_${_language}) set (${_unityFileVar} "" PARENT_SCOPE) return() endif() set (_unityFileBaseName "${_target}_${_language}${COTIRE_UNITY_SOURCE_FILENAME_SUFFIX}") cotire_get_intermediate_dir(_baseDir) set (_startIndex 0) set (_index 0) set (_unityFiles "") set (_sourceFiles ${ARGN}) foreach (_sourceFile ${_sourceFiles}) get_source_file_property(_startNew "${_sourceFile}" COTIRE_START_NEW_UNITY_SOURCE) math (EXPR _unityFileCount "${_index} - ${_startIndex}") if (_startNew OR (_maxIncludes GREATER 0 AND NOT _unityFileCount LESS _maxIncludes)) if (_index GREATER 0) # start new unity file segment math (EXPR _endIndex "${_index} - 1") set (_unityFileName "${_unityFileBaseName}_${_startIndex}_${_endIndex}${_unityFileExt_${_language}}") list (APPEND _unityFiles "${_baseDir}/${_unityFileName}") endif() set (_startIndex ${_index}) endif() math (EXPR _index "${_index} + 1") endforeach() list (LENGTH _sourceFiles _numberOfSources) if (_startIndex EQUAL 0) # there is only a single unity file cotire_make_single_unity_source_file_path(${_language} ${_target} _unityFiles) elseif (_startIndex LESS _numberOfSources) # end with final unity file segment math (EXPR _endIndex "${_index} - 1") set (_unityFileName "${_unityFileBaseName}_${_startIndex}_${_endIndex}${_unityFileExt_${_language}}") list (APPEND _unityFiles "${_baseDir}/${_unityFileName}") endif() set (${_unityFilesVar} ${_unityFiles} PARENT_SCOPE) if (COTIRE_DEBUG AND _unityFiles) message (STATUS "unity files: ${_unityFiles}") endif() endfunction() function (cotire_unity_to_prefix_file_path _language _target _unityFile _prefixFileVar) cotire_setup_file_extension_variables() if (NOT DEFINED _unityFileExt_${_language}) set (${_prefixFileVar} "" PARENT_SCOPE) return() endif() set (_unityFileBaseName "${_target}_${_language}${COTIRE_UNITY_SOURCE_FILENAME_SUFFIX}") set (_prefixFileBaseName "${_target}_${_language}${COTIRE_PREFIX_HEADER_FILENAME_SUFFIX}") string (REPLACE "${_unityFileBaseName}" "${_prefixFileBaseName}" _prefixFile "${_unityFile}") string (REGEX REPLACE "${_unityFileExt_${_language}}$" "${_prefixFileExt_${_language}}" _prefixFile "${_prefixFile}") set (${_prefixFileVar} "${_prefixFile}" PARENT_SCOPE) endfunction() function (cotire_prefix_header_to_source_file_path _language _prefixHeaderFile _prefixSourceFileVar) cotire_setup_file_extension_variables() if (NOT DEFINED _prefixSourceFileExt_${_language}) set (${_prefixSourceFileVar} "" PARENT_SCOPE) return() endif() string (REGEX REPLACE "${_prefixFileExt_${_language}}$" "${_prefixSourceFileExt_${_language}}" _prefixSourceFile "${_prefixHeaderFile}") set (${_prefixSourceFileVar} "${_prefixSourceFile}" PARENT_SCOPE) endfunction() function (cotire_make_prefix_file_name _language _target _prefixFileBaseNameVar _prefixFileNameVar) cotire_setup_file_extension_variables() if (NOT _language) set (_prefixFileBaseName "${_target}${COTIRE_PREFIX_HEADER_FILENAME_SUFFIX}") set (_prefixFileName "${_prefixFileBaseName}${_prefixFileExt_C}") elseif (DEFINED _prefixFileExt_${_language}) set (_prefixFileBaseName "${_target}_${_language}${COTIRE_PREFIX_HEADER_FILENAME_SUFFIX}") set (_prefixFileName "${_prefixFileBaseName}${_prefixFileExt_${_language}}") else() set (_prefixFileBaseName "") set (_prefixFileName "") endif() set (${_prefixFileBaseNameVar} "${_prefixFileBaseName}" PARENT_SCOPE) set (${_prefixFileNameVar} "${_prefixFileName}" PARENT_SCOPE) endfunction() function (cotire_make_prefix_file_path _language _target _prefixFileVar) cotire_make_prefix_file_name("${_language}" "${_target}" _prefixFileBaseName _prefixFileName) set (${_prefixFileVar} "" PARENT_SCOPE) if (_prefixFileName) if (NOT _language) set (_language "C") endif() if (CMAKE_${_language}_COMPILER_ID MATCHES "GNU|Clang|Intel|MSVC") cotire_get_intermediate_dir(_baseDir) set (${_prefixFileVar} "${_baseDir}/${_prefixFileName}" PARENT_SCOPE) endif() endif() endfunction() function (cotire_make_pch_file_path _language _target _pchFileVar) cotire_make_prefix_file_name("${_language}" "${_target}" _prefixFileBaseName _prefixFileName) set (${_pchFileVar} "" PARENT_SCOPE) if (_prefixFileBaseName AND _prefixFileName) cotire_check_precompiled_header_support("${_language}" "${_target}" _msg) if (NOT _msg) if (XCODE) # For Xcode, we completely hand off the compilation of the prefix header to the IDE return() endif() cotire_get_intermediate_dir(_baseDir) if (CMAKE_${_language}_COMPILER_ID MATCHES "MSVC") # MSVC uses the extension .pch added to the prefix header base name set (${_pchFileVar} "${_baseDir}/${_prefixFileBaseName}.pch" PARENT_SCOPE) elseif (CMAKE_${_language}_COMPILER_ID MATCHES "Clang") # Clang looks for a precompiled header corresponding to the prefix header with the extension .pch appended set (${_pchFileVar} "${_baseDir}/${_prefixFileName}.pch" PARENT_SCOPE) elseif (CMAKE_${_language}_COMPILER_ID MATCHES "GNU") # GCC looks for a precompiled header corresponding to the prefix header with the extension .gch appended set (${_pchFileVar} "${_baseDir}/${_prefixFileName}.gch" PARENT_SCOPE) elseif (CMAKE_${_language}_COMPILER_ID MATCHES "Intel") # Intel uses the extension .pchi added to the prefix header base name set (${_pchFileVar} "${_baseDir}/${_prefixFileBaseName}.pchi" PARENT_SCOPE) endif() endif() endif() endfunction() function (cotire_select_unity_source_files _unityFile _sourcesVar) set (_sourceFiles ${ARGN}) if (_sourceFiles AND "${_unityFile}" MATCHES "${COTIRE_UNITY_SOURCE_FILENAME_SUFFIX}_([0-9]+)_([0-9]+)") set (_startIndex ${CMAKE_MATCH_1}) set (_endIndex ${CMAKE_MATCH_2}) list (LENGTH _sourceFiles _numberOfSources) if (NOT _startIndex LESS _numberOfSources) math (EXPR _startIndex "${_numberOfSources} - 1") endif() if (NOT _endIndex LESS _numberOfSources) math (EXPR _endIndex "${_numberOfSources} - 1") endif() set (_files "") foreach (_index RANGE ${_startIndex} ${_endIndex}) list (GET _sourceFiles ${_index} _file) list (APPEND _files "${_file}") endforeach() else() set (_files ${_sourceFiles}) endif() set (${_sourcesVar} ${_files} PARENT_SCOPE) endfunction() function (cotire_get_unity_source_dependencies _language _target _dependencySourcesVar) set (_dependencySources "") # depend on target's generated source files get_target_property(_targetSourceFiles ${_target} SOURCES) cotire_get_objects_with_property_on(_generatedSources GENERATED SOURCE ${_targetSourceFiles}) if (_generatedSources) # but omit all generated source files that have the COTIRE_EXCLUDED property set to true cotire_get_objects_with_property_on(_excludedGeneratedSources COTIRE_EXCLUDED SOURCE ${_generatedSources}) if (_excludedGeneratedSources) list (REMOVE_ITEM _generatedSources ${_excludedGeneratedSources}) endif() # and omit all generated source files that have the COTIRE_DEPENDENCY property set to false explicitly cotire_get_objects_with_property_off(_excludedNonDependencySources COTIRE_DEPENDENCY SOURCE ${_generatedSources}) if (_excludedNonDependencySources) list (REMOVE_ITEM _generatedSources ${_excludedNonDependencySources}) endif() if (_generatedSources) list (APPEND _dependencySources ${_generatedSources}) endif() endif() if (COTIRE_DEBUG AND _dependencySources) message (STATUS "${_language} ${_target} unity source dependencies: ${_dependencySources}") endif() set (${_dependencySourcesVar} ${_dependencySources} PARENT_SCOPE) endfunction() function (cotire_get_prefix_header_dependencies _language _target _dependencySourcesVar) set (_dependencySources "") # depend on target source files marked with custom COTIRE_DEPENDENCY property get_target_property(_targetSourceFiles ${_target} SOURCES) cotire_get_objects_with_property_on(_dependencySources COTIRE_DEPENDENCY SOURCE ${_targetSourceFiles}) if (COTIRE_DEBUG AND _dependencySources) message (STATUS "${_language} ${_target} prefix header dependencies: ${_dependencySources}") endif() set (${_dependencySourcesVar} ${_dependencySources} PARENT_SCOPE) endfunction() function (cotire_generate_target_script _language _configurations _target _targetScriptVar _targetConfigScriptVar) set (_targetSources ${ARGN}) cotire_get_prefix_header_dependencies(${_language} ${_target} COTIRE_TARGET_PREFIX_DEPENDS ${_targetSources}) cotire_get_unity_source_dependencies(${_language} ${_target} COTIRE_TARGET_UNITY_DEPENDS ${_targetSources}) # set up variables to be configured set (COTIRE_TARGET_LANGUAGE "${_language}") get_target_property(COTIRE_TARGET_IGNORE_PATH ${_target} COTIRE_PREFIX_HEADER_IGNORE_PATH) cotire_add_sys_root_paths(COTIRE_TARGET_IGNORE_PATH) get_target_property(COTIRE_TARGET_INCLUDE_PATH ${_target} COTIRE_PREFIX_HEADER_INCLUDE_PATH) cotire_add_sys_root_paths(COTIRE_TARGET_INCLUDE_PATH) get_target_property(COTIRE_TARGET_PRE_UNDEFS ${_target} COTIRE_UNITY_SOURCE_PRE_UNDEFS) get_target_property(COTIRE_TARGET_POST_UNDEFS ${_target} COTIRE_UNITY_SOURCE_POST_UNDEFS) get_target_property(COTIRE_TARGET_MAXIMUM_NUMBER_OF_INCLUDES ${_target} COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES) get_target_property(COTIRE_TARGET_INCLUDE_PRIORITY_PATH ${_target} COTIRE_PREFIX_HEADER_INCLUDE_PRIORITY_PATH) cotire_get_source_files_undefs(COTIRE_UNITY_SOURCE_PRE_UNDEFS COTIRE_TARGET_SOURCES_PRE_UNDEFS ${_targetSources}) cotire_get_source_files_undefs(COTIRE_UNITY_SOURCE_POST_UNDEFS COTIRE_TARGET_SOURCES_POST_UNDEFS ${_targetSources}) set (COTIRE_TARGET_CONFIGURATION_TYPES "${_configurations}") foreach (_config ${_configurations}) string (TOUPPER "${_config}" _upperConfig) cotire_get_target_include_directories( "${_config}" "${_language}" "${_target}" COTIRE_TARGET_INCLUDE_DIRECTORIES_${_upperConfig} COTIRE_TARGET_SYSTEM_INCLUDE_DIRECTORIES_${_upperConfig}) cotire_get_target_compile_definitions( "${_config}" "${_language}" "${_target}" COTIRE_TARGET_COMPILE_DEFINITIONS_${_upperConfig}) cotire_get_target_compiler_flags( "${_config}" "${_language}" "${_target}" COTIRE_TARGET_COMPILE_FLAGS_${_upperConfig}) cotire_get_source_files_compile_definitions( "${_config}" "${_language}" COTIRE_TARGET_SOURCES_COMPILE_DEFINITIONS_${_upperConfig} ${_targetSources}) endforeach() get_target_property(COTIRE_TARGET_${_language}_COMPILER_LAUNCHER ${_target} ${_language}_COMPILER_LAUNCHER) # set up COTIRE_TARGET_SOURCES set (COTIRE_TARGET_SOURCES "") foreach (_sourceFile ${_targetSources}) get_source_file_property(_generated "${_sourceFile}" GENERATED) if (_generated) # use absolute paths for generated files only, retrieving the LOCATION property is an expensive operation get_source_file_property(_sourceLocation "${_sourceFile}" LOCATION) list (APPEND COTIRE_TARGET_SOURCES "${_sourceLocation}") else() list (APPEND COTIRE_TARGET_SOURCES "${_sourceFile}") endif() endforeach() # copy variable definitions to cotire target script get_cmake_property(_vars VARIABLES) string (REGEX MATCHALL "COTIRE_[A-Za-z0-9_]+" _matchVars "${_vars}") # omit COTIRE_*_INIT variables string (REGEX MATCHALL "COTIRE_[A-Za-z0-9_]+_INIT" _initVars "${_matchVars}") if (_initVars) list (REMOVE_ITEM _matchVars ${_initVars}) endif() # omit COTIRE_VERBOSE which is passed as a CMake define on command line list (REMOVE_ITEM _matchVars COTIRE_VERBOSE) set (_contents "") set (_contentsHasGeneratorExpressions FALSE) foreach (_var IN LISTS _matchVars ITEMS XCODE MSVC CMAKE_GENERATOR CMAKE_BUILD_TYPE CMAKE_CONFIGURATION_TYPES CMAKE_${_language}_COMPILER_ID CMAKE_${_language}_COMPILER_VERSION CMAKE_${_language}_COMPILER_LAUNCHER CMAKE_${_language}_COMPILER CMAKE_${_language}_COMPILER_ARG1 CMAKE_INCLUDE_FLAG_${_language} CMAKE_INCLUDE_FLAG_SEP_${_language} CMAKE_INCLUDE_SYSTEM_FLAG_${_language} CMAKE_${_language}_FRAMEWORK_SEARCH_FLAG CMAKE_${_language}_SYSTEM_FRAMEWORK_SEARCH_FLAG CMAKE_${_language}_SOURCE_FILE_EXTENSIONS) if (DEFINED ${_var}) string (REPLACE "\"" "\\\"" _value "${${_var}}") set (_contents "${_contents}set (${_var} \"${_value}\")\n") if (NOT _contentsHasGeneratorExpressions) if ("${_value}" MATCHES "\\$<.*>") set (_contentsHasGeneratorExpressions TRUE) endif() endif() endif() endforeach() # generate target script file get_filename_component(_moduleName "${COTIRE_CMAKE_MODULE_FILE}" NAME) set (_targetCotireScript "${CMAKE_CURRENT_BINARY_DIR}/${_target}_${_language}_${_moduleName}") cotire_write_file("CMAKE" "${_targetCotireScript}" "${_contents}" FALSE) if (_contentsHasGeneratorExpressions) # use file(GENERATE ...) to expand generator expressions in the target script at CMake generate-time set (_configNameOrNoneGeneratorExpression "$<$:None>$<$>:$>") set (_targetCotireConfigScript "${CMAKE_CURRENT_BINARY_DIR}/${_target}_${_language}_${_configNameOrNoneGeneratorExpression}_${_moduleName}") file (GENERATE OUTPUT "${_targetCotireConfigScript}" INPUT "${_targetCotireScript}") else() set (_targetCotireConfigScript "${_targetCotireScript}") endif() set (${_targetScriptVar} "${_targetCotireScript}" PARENT_SCOPE) set (${_targetConfigScriptVar} "${_targetCotireConfigScript}" PARENT_SCOPE) endfunction() function (cotire_setup_pch_file_compilation _language _target _targetScript _prefixFile _pchFile _hostFile) set (_sourceFiles ${ARGN}) if (CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel" OR (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "Clang")) # for MSVC, Intel and Clang-cl, we attach the precompiled header compilation to the host file # the remaining files include the precompiled header, see cotire_setup_pch_file_inclusion if (_sourceFiles) set (_flags "") cotire_add_pch_compilation_flags( "${_language}" "${CMAKE_${_language}_COMPILER_ID}" "${CMAKE_${_language}_COMPILER_VERSION}" "${_prefixFile}" "${_pchFile}" "${_hostFile}" _flags) set_property (SOURCE ${_hostFile} APPEND_STRING PROPERTY COMPILE_FLAGS " ${_flags} ") set_property (SOURCE ${_hostFile} APPEND PROPERTY OBJECT_OUTPUTS "${_pchFile}") # make object file generated from host file depend on prefix header set_property (SOURCE ${_hostFile} APPEND PROPERTY OBJECT_DEPENDS "${_prefixFile}") # mark host file as cotired to prevent it from being used in another cotired target set_property (SOURCE ${_hostFile} PROPERTY COTIRE_TARGET "${_target}") endif() elseif ("${CMAKE_GENERATOR}" MATCHES "Make|Ninja") # for makefile based generator, we add a custom command to precompile the prefix header if (_targetScript) cotire_set_cmd_to_prologue(_cmds) list (APPEND _cmds -P "${COTIRE_CMAKE_MODULE_FILE}" "precompile" "${_targetScript}" "${_prefixFile}" "${_pchFile}" "${_hostFile}") if (MSVC_IDE) file (TO_NATIVE_PATH "${_pchFile}" _pchFileLogPath) else() file (RELATIVE_PATH _pchFileLogPath "${CMAKE_BINARY_DIR}" "${_pchFile}") endif() # make precompiled header compilation depend on the actual compiler executable used to force # re-compilation when the compiler executable is updated. This prevents "created by a different GCC executable" # warnings when the precompiled header is included. get_filename_component(_realCompilerExe "${CMAKE_${_language}_COMPILER}" ABSOLUTE) if (COTIRE_DEBUG) message (STATUS "add_custom_command: OUTPUT ${_pchFile} ${_cmds} DEPENDS ${_prefixFile} ${_realCompilerExe} IMPLICIT_DEPENDS ${_language} ${_prefixFile}") endif() set_property (SOURCE "${_pchFile}" PROPERTY GENERATED TRUE) add_custom_command( OUTPUT "${_pchFile}" COMMAND ${_cmds} DEPENDS "${_prefixFile}" "${_realCompilerExe}" IMPLICIT_DEPENDS ${_language} "${_prefixFile}" WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" COMMENT "Building ${_language} precompiled header ${_pchFileLogPath}" VERBATIM) endif() endif() endfunction() function (cotire_setup_pch_file_inclusion _language _target _wholeTarget _prefixFile _pchFile _hostFile) if (CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel" OR (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "Clang")) # for MSVC, Intel and clang-cl, we include the precompiled header in all but the host file # the host file does the precompiled header compilation, see cotire_setup_pch_file_compilation set (_sourceFiles ${ARGN}) list (LENGTH _sourceFiles _numberOfSourceFiles) if (_numberOfSourceFiles GREATER 0) # mark sources as cotired to prevent them from being used in another cotired target set_source_files_properties(${_sourceFiles} PROPERTIES COTIRE_TARGET "${_target}") set (_flags "") cotire_add_prefix_pch_inclusion_flags( "${_language}" "${CMAKE_${_language}_COMPILER_ID}" "${CMAKE_${_language}_COMPILER_VERSION}" "${_prefixFile}" "${_pchFile}" _flags) set_property (SOURCE ${_sourceFiles} APPEND_STRING PROPERTY COMPILE_FLAGS " ${_flags} ") # make object files generated from source files depend on precompiled header set_property (SOURCE ${_sourceFiles} APPEND PROPERTY OBJECT_DEPENDS "${_pchFile}") endif() elseif ("${CMAKE_GENERATOR}" MATCHES "Make|Ninja") set (_sourceFiles ${_hostFile} ${ARGN}) if (NOT _wholeTarget) # for makefile based generator, we force the inclusion of the prefix header for a subset # of the source files, if this is a multi-language target or has excluded files set (_flags "") cotire_add_prefix_pch_inclusion_flags( "${_language}" "${CMAKE_${_language}_COMPILER_ID}" "${CMAKE_${_language}_COMPILER_VERSION}" "${_prefixFile}" "${_pchFile}" _flags) set_property (SOURCE ${_sourceFiles} APPEND_STRING PROPERTY COMPILE_FLAGS " ${_flags} ") # mark sources as cotired to prevent them from being used in another cotired target set_source_files_properties(${_sourceFiles} PROPERTIES COTIRE_TARGET "${_target}") endif() # make object files generated from source files depend on precompiled header set_property (SOURCE ${_sourceFiles} APPEND PROPERTY OBJECT_DEPENDS "${_pchFile}") endif() endfunction() function (cotire_setup_prefix_file_inclusion _language _target _prefixFile) set (_sourceFiles ${ARGN}) # force the inclusion of the prefix header for the given source files set (_flags "") set (_pchFile "") cotire_add_prefix_pch_inclusion_flags( "${_language}" "${CMAKE_${_language}_COMPILER_ID}" "${CMAKE_${_language}_COMPILER_VERSION}" "${_prefixFile}" "${_pchFile}" _flags) set_property (SOURCE ${_sourceFiles} APPEND_STRING PROPERTY COMPILE_FLAGS " ${_flags} ") # mark sources as cotired to prevent them from being used in another cotired target set_source_files_properties(${_sourceFiles} PROPERTIES COTIRE_TARGET "${_target}") # make object files generated from source files depend on prefix header set_property (SOURCE ${_sourceFiles} APPEND PROPERTY OBJECT_DEPENDS "${_prefixFile}") endfunction() function (cotire_get_first_set_property_value _propertyValueVar _type _object) set (_properties ${ARGN}) foreach (_property ${_properties}) get_property(_propertyValue ${_type} "${_object}" PROPERTY ${_property}) if (_propertyValue) set (${_propertyValueVar} ${_propertyValue} PARENT_SCOPE) return() endif() endforeach() set (${_propertyValueVar} "" PARENT_SCOPE) endfunction() function (cotire_setup_combine_command _language _targetScript _joinedFile _cmdsVar) set (_files ${ARGN}) set (_filesPaths "") foreach (_file ${_files}) get_filename_component(_filePath "${_file}" ABSOLUTE) list (APPEND _filesPaths "${_filePath}") endforeach() cotire_set_cmd_to_prologue(_prefixCmd) list (APPEND _prefixCmd -P "${COTIRE_CMAKE_MODULE_FILE}" "combine") if (_targetScript) list (APPEND _prefixCmd "${_targetScript}") endif() list (APPEND _prefixCmd "${_joinedFile}" ${_filesPaths}) if (COTIRE_DEBUG) message (STATUS "add_custom_command: OUTPUT ${_joinedFile} COMMAND ${_prefixCmd} DEPENDS ${_files}") endif() set_property (SOURCE "${_joinedFile}" PROPERTY GENERATED TRUE) if (MSVC_IDE) file (TO_NATIVE_PATH "${_joinedFile}" _joinedFileLogPath) else() file (RELATIVE_PATH _joinedFileLogPath "${CMAKE_BINARY_DIR}" "${_joinedFile}") endif() get_filename_component(_joinedFileBaseName "${_joinedFile}" NAME_WE) get_filename_component(_joinedFileExt "${_joinedFile}" EXT) if (_language AND _joinedFileBaseName MATCHES "${COTIRE_UNITY_SOURCE_FILENAME_SUFFIX}$") set (_comment "Generating ${_language} unity source ${_joinedFileLogPath}") elseif (_language AND _joinedFileBaseName MATCHES "${COTIRE_PREFIX_HEADER_FILENAME_SUFFIX}$") if (_joinedFileExt MATCHES "^\\.c") set (_comment "Generating ${_language} prefix source ${_joinedFileLogPath}") else() set (_comment "Generating ${_language} prefix header ${_joinedFileLogPath}") endif() else() set (_comment "Generating ${_joinedFileLogPath}") endif() add_custom_command( OUTPUT "${_joinedFile}" COMMAND ${_prefixCmd} DEPENDS ${_files} COMMENT "${_comment}" WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" VERBATIM) list (APPEND ${_cmdsVar} COMMAND ${_prefixCmd}) set (${_cmdsVar} ${${_cmdsVar}} PARENT_SCOPE) endfunction() function (cotire_setup_target_pch_usage _languages _target _wholeTarget) if (XCODE) # for Xcode, we attach a pre-build action to generate the unity sources and prefix headers set (_prefixFiles "") foreach (_language ${_languages}) get_property(_prefixFile TARGET ${_target} PROPERTY COTIRE_${_language}_PREFIX_HEADER) if (_prefixFile) list (APPEND _prefixFiles "${_prefixFile}") endif() endforeach() set (_cmds ${ARGN}) list (LENGTH _prefixFiles _numberOfPrefixFiles) if (_numberOfPrefixFiles GREATER 1) # we also generate a generic, single prefix header which includes all language specific prefix headers set (_language "") set (_targetScript "") cotire_make_prefix_file_path("${_language}" ${_target} _prefixHeader) cotire_setup_combine_command("${_language}" "${_targetScript}" "${_prefixHeader}" _cmds ${_prefixFiles}) else() set (_prefixHeader "${_prefixFiles}") endif() if (COTIRE_DEBUG) message (STATUS "add_custom_command: TARGET ${_target} PRE_BUILD ${_cmds}") endif() # because CMake PRE_BUILD command does not support dependencies, # we check dependencies explicity in cotire script mode when the pre-build action is run add_custom_command( TARGET "${_target}" PRE_BUILD ${_cmds} WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" COMMENT "Updating target ${_target} prefix headers" VERBATIM) # make Xcode precompile the generated prefix header with ProcessPCH and ProcessPCH++ set_target_properties(${_target} PROPERTIES XCODE_ATTRIBUTE_GCC_PRECOMPILE_PREFIX_HEADER "YES") set_target_properties(${_target} PROPERTIES XCODE_ATTRIBUTE_GCC_PREFIX_HEADER "${_prefixHeader}") elseif ("${CMAKE_GENERATOR}" MATCHES "Make|Ninja") # for makefile based generator, we force inclusion of the prefix header for all target source files # if this is a single-language target without any excluded files if (_wholeTarget) set (_language "${_languages}") # for MSVC, Intel and clang-cl, precompiled header inclusion is always done on the source file level # see cotire_setup_pch_file_inclusion if (NOT CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel" AND NOT (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "Clang")) get_property(_prefixFile TARGET ${_target} PROPERTY COTIRE_${_language}_PREFIX_HEADER) if (_prefixFile) get_property(_pchFile TARGET ${_target} PROPERTY COTIRE_${_language}_PRECOMPILED_HEADER) set (_options COMPILE_OPTIONS) cotire_add_prefix_pch_inclusion_flags( "${_language}" "${CMAKE_${_language}_COMPILER_ID}" "${CMAKE_${_language}_COMPILER_VERSION}" "${_prefixFile}" "${_pchFile}" _options) set_property(TARGET ${_target} APPEND PROPERTY ${_options}) endif() endif() endif() endif() endfunction() function (cotire_setup_unity_generation_commands _language _target _targetScript _targetConfigScript _unityFiles _cmdsVar) set (_dependencySources "") cotire_get_unity_source_dependencies(${_language} ${_target} _dependencySources ${ARGN}) foreach (_unityFile ${_unityFiles}) set_property (SOURCE "${_unityFile}" PROPERTY GENERATED TRUE) # set up compiled unity source dependencies via OBJECT_DEPENDS # this ensures that missing source files are generated before the unity file is compiled if (COTIRE_DEBUG AND _dependencySources) message (STATUS "${_unityFile} OBJECT_DEPENDS ${_dependencySources}") endif() if (_dependencySources) # the OBJECT_DEPENDS property requires a list of full paths set (_objectDependsPaths "") foreach (_sourceFile ${_dependencySources}) get_source_file_property(_sourceLocation "${_sourceFile}" LOCATION) list (APPEND _objectDependsPaths "${_sourceLocation}") endforeach() set_property (SOURCE "${_unityFile}" PROPERTY OBJECT_DEPENDS ${_objectDependsPaths}) endif() if (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel") # unity file compilation results in potentially huge object file, # thus use /bigobj by default unter cl.exe and Windows Intel set_property (SOURCE "${_unityFile}" APPEND_STRING PROPERTY COMPILE_FLAGS "/bigobj") endif() cotire_set_cmd_to_prologue(_unityCmd) list (APPEND _unityCmd -P "${COTIRE_CMAKE_MODULE_FILE}" "unity" "${_targetConfigScript}" "${_unityFile}") if (CMAKE_VERSION VERSION_LESS "3.1.0") set (_unityCmdDepends "${_targetScript}") else() # CMake 3.1.0 supports generator expressions in arguments to DEPENDS set (_unityCmdDepends "${_targetConfigScript}") endif() if (MSVC_IDE) file (TO_NATIVE_PATH "${_unityFile}" _unityFileLogPath) else() file (RELATIVE_PATH _unityFileLogPath "${CMAKE_BINARY_DIR}" "${_unityFile}") endif() if (COTIRE_DEBUG) message (STATUS "add_custom_command: OUTPUT ${_unityFile} COMMAND ${_unityCmd} DEPENDS ${_unityCmdDepends}") endif() add_custom_command( OUTPUT "${_unityFile}" COMMAND ${_unityCmd} DEPENDS ${_unityCmdDepends} COMMENT "Generating ${_language} unity source ${_unityFileLogPath}" WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" VERBATIM) list (APPEND ${_cmdsVar} COMMAND ${_unityCmd}) endforeach() set (${_cmdsVar} ${${_cmdsVar}} PARENT_SCOPE) endfunction() function (cotire_setup_prefix_generation_command _language _target _targetScript _prefixFile _unityFiles _cmdsVar) set (_sourceFiles ${ARGN}) set (_dependencySources "") cotire_get_prefix_header_dependencies(${_language} ${_target} _dependencySources ${_sourceFiles}) cotire_set_cmd_to_prologue(_prefixCmd) list (APPEND _prefixCmd -P "${COTIRE_CMAKE_MODULE_FILE}" "prefix" "${_targetScript}" "${_prefixFile}" ${_unityFiles}) set_property (SOURCE "${_prefixFile}" PROPERTY GENERATED TRUE) # make prefix header generation depend on the actual compiler executable used to force # re-generation when the compiler executable is updated. This prevents "file not found" # errors for compiler version specific system header files. get_filename_component(_realCompilerExe "${CMAKE_${_language}_COMPILER}" ABSOLUTE) if (COTIRE_DEBUG) message (STATUS "add_custom_command: OUTPUT ${_prefixFile} COMMAND ${_prefixCmd} DEPENDS ${_unityFile} ${_dependencySources} ${_realCompilerExe}") endif() if (MSVC_IDE) file (TO_NATIVE_PATH "${_prefixFile}" _prefixFileLogPath) else() file (RELATIVE_PATH _prefixFileLogPath "${CMAKE_BINARY_DIR}" "${_prefixFile}") endif() get_filename_component(_prefixFileExt "${_prefixFile}" EXT) if (_prefixFileExt MATCHES "^\\.c") set (_comment "Generating ${_language} prefix source ${_prefixFileLogPath}") else() set (_comment "Generating ${_language} prefix header ${_prefixFileLogPath}") endif() # prevent pre-processing errors upon generating the prefix header when a target's generated include file does not yet exist # we do not add a file-level dependency for the target's generated files though, because we only want to depend on their existence # thus we make the prefix header generation depend on a custom helper target which triggers the generation of the files set (_preTargetName "${_target}${COTIRE_PCH_TARGET_SUFFIX}_pre") if (TARGET ${_preTargetName}) # custom helper target has already been generated while processing a different language list (APPEND _dependencySources ${_preTargetName}) else() get_target_property(_targetSourceFiles ${_target} SOURCES) cotire_get_objects_with_property_on(_generatedSources GENERATED SOURCE ${_targetSourceFiles}) if (_generatedSources) add_custom_target("${_preTargetName}" DEPENDS ${_generatedSources}) cotire_init_target("${_preTargetName}") list (APPEND _dependencySources ${_preTargetName}) endif() endif() add_custom_command( OUTPUT "${_prefixFile}" "${_prefixFile}.log" COMMAND ${_prefixCmd} DEPENDS ${_unityFiles} ${_dependencySources} "${_realCompilerExe}" COMMENT "${_comment}" WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" VERBATIM) list (APPEND ${_cmdsVar} COMMAND ${_prefixCmd}) set (${_cmdsVar} ${${_cmdsVar}} PARENT_SCOPE) endfunction() function (cotire_setup_prefix_generation_from_unity_command _language _target _targetScript _prefixFile _unityFiles _cmdsVar) set (_sourceFiles ${ARGN}) if (CMAKE_${_language}_COMPILER_ID MATCHES "GNU|Clang") # GNU and Clang require indirect compilation of the prefix header to make them honor the system_header pragma cotire_prefix_header_to_source_file_path(${_language} "${_prefixFile}" _prefixSourceFile) else() set (_prefixSourceFile "${_prefixFile}") endif() cotire_setup_prefix_generation_command( ${_language} ${_target} "${_targetScript}" "${_prefixSourceFile}" "${_unityFiles}" ${_cmdsVar} ${_sourceFiles}) if (CMAKE_${_language}_COMPILER_ID MATCHES "GNU|Clang") # set up generation of a prefix source file which includes the prefix header cotire_setup_combine_command(${_language} "${_targetScript}" "${_prefixFile}" _cmds ${_prefixSourceFile}) endif() set (${_cmdsVar} ${${_cmdsVar}} PARENT_SCOPE) endfunction() function (cotire_setup_prefix_generation_from_provided_command _language _target _targetScript _prefixFile _cmdsVar) set (_prefixHeaderFiles ${ARGN}) if (CMAKE_${_language}_COMPILER_ID MATCHES "GNU|Clang") # GNU and Clang require indirect compilation of the prefix header to make them honor the system_header pragma cotire_prefix_header_to_source_file_path(${_language} "${_prefixFile}" _prefixSourceFile) else() set (_prefixSourceFile "${_prefixFile}") endif() cotire_setup_combine_command(${_language} "${_targetScript}" "${_prefixSourceFile}" _cmds ${_prefixHeaderFiles}) if (CMAKE_${_language}_COMPILER_ID MATCHES "GNU|Clang") # set up generation of a prefix source file which includes the prefix header cotire_setup_combine_command(${_language} "${_targetScript}" "${_prefixFile}" _cmds ${_prefixSourceFile}) endif() set (${_cmdsVar} ${${_cmdsVar}} PARENT_SCOPE) endfunction() function (cotire_init_cotire_target_properties _target) get_property(_isSet TARGET ${_target} PROPERTY COTIRE_ENABLE_PRECOMPILED_HEADER SET) if (NOT _isSet) set_property(TARGET ${_target} PROPERTY COTIRE_ENABLE_PRECOMPILED_HEADER TRUE) endif() get_property(_isSet TARGET ${_target} PROPERTY COTIRE_ADD_UNITY_BUILD SET) if (NOT _isSet) set_property(TARGET ${_target} PROPERTY COTIRE_ADD_UNITY_BUILD TRUE) endif() get_property(_isSet TARGET ${_target} PROPERTY COTIRE_ADD_CLEAN SET) if (NOT _isSet) set_property(TARGET ${_target} PROPERTY COTIRE_ADD_CLEAN FALSE) endif() get_property(_isSet TARGET ${_target} PROPERTY COTIRE_PREFIX_HEADER_IGNORE_PATH SET) if (NOT _isSet) set_property(TARGET ${_target} PROPERTY COTIRE_PREFIX_HEADER_IGNORE_PATH "${CMAKE_SOURCE_DIR}") cotire_check_is_path_relative_to("${CMAKE_BINARY_DIR}" _isRelative "${CMAKE_SOURCE_DIR}") if (NOT _isRelative) set_property(TARGET ${_target} APPEND PROPERTY COTIRE_PREFIX_HEADER_IGNORE_PATH "${CMAKE_BINARY_DIR}") endif() endif() get_property(_isSet TARGET ${_target} PROPERTY COTIRE_PREFIX_HEADER_INCLUDE_PATH SET) if (NOT _isSet) set_property(TARGET ${_target} PROPERTY COTIRE_PREFIX_HEADER_INCLUDE_PATH "") endif() get_property(_isSet TARGET ${_target} PROPERTY COTIRE_PREFIX_HEADER_INCLUDE_PRIORITY_PATH SET) if (NOT _isSet) set_property(TARGET ${_target} PROPERTY COTIRE_PREFIX_HEADER_INCLUDE_PRIORITY_PATH "") endif() get_property(_isSet TARGET ${_target} PROPERTY COTIRE_UNITY_SOURCE_PRE_UNDEFS SET) if (NOT _isSet) set_property(TARGET ${_target} PROPERTY COTIRE_UNITY_SOURCE_PRE_UNDEFS "") endif() get_property(_isSet TARGET ${_target} PROPERTY COTIRE_UNITY_SOURCE_POST_UNDEFS SET) if (NOT _isSet) set_property(TARGET ${_target} PROPERTY COTIRE_UNITY_SOURCE_POST_UNDEFS "") endif() get_property(_isSet TARGET ${_target} PROPERTY COTIRE_UNITY_LINK_LIBRARIES_INIT SET) if (NOT _isSet) set_property(TARGET ${_target} PROPERTY COTIRE_UNITY_LINK_LIBRARIES_INIT "COPY_UNITY") endif() get_property(_isSet TARGET ${_target} PROPERTY COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES SET) if (NOT _isSet) if (COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES) set_property(TARGET ${_target} PROPERTY COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES "${COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES}") else() set_property(TARGET ${_target} PROPERTY COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES "") endif() endif() endfunction() function (cotire_make_target_message _target _languages _disableMsg _targetMsgVar) get_target_property(_targetUsePCH ${_target} COTIRE_ENABLE_PRECOMPILED_HEADER) get_target_property(_targetAddSCU ${_target} COTIRE_ADD_UNITY_BUILD) string (REPLACE ";" " " _languagesStr "${_languages}") math (EXPR _numberOfExcludedFiles "${ARGC} - 4") if (_numberOfExcludedFiles EQUAL 0) set (_excludedStr "") elseif (COTIRE_VERBOSE OR _numberOfExcludedFiles LESS 4) string (REPLACE ";" ", " _excludedStr "excluding ${ARGN}") else() set (_excludedStr "excluding ${_numberOfExcludedFiles} files") endif() set (_targetMsg "") if (NOT _languages) set (_targetMsg "Target ${_target} cannot be cotired.") if (_disableMsg) set (_targetMsg "${_targetMsg} ${_disableMsg}") endif() elseif (NOT _targetUsePCH AND NOT _targetAddSCU) set (_targetMsg "${_languagesStr} target ${_target} cotired without unity build and precompiled header.") if (_disableMsg) set (_targetMsg "${_targetMsg} ${_disableMsg}") endif() elseif (NOT _targetUsePCH) if (_excludedStr) set (_targetMsg "${_languagesStr} target ${_target} cotired without precompiled header ${_excludedStr}.") else() set (_targetMsg "${_languagesStr} target ${_target} cotired without precompiled header.") endif() if (_disableMsg) set (_targetMsg "${_targetMsg} ${_disableMsg}") endif() elseif (NOT _targetAddSCU) if (_excludedStr) set (_targetMsg "${_languagesStr} target ${_target} cotired without unity build ${_excludedStr}.") else() set (_targetMsg "${_languagesStr} target ${_target} cotired without unity build.") endif() if (_disableMsg) set (_targetMsg "${_targetMsg} ${_disableMsg}") endif() else() if (_excludedStr) set (_targetMsg "${_languagesStr} target ${_target} cotired ${_excludedStr}.") else() set (_targetMsg "${_languagesStr} target ${_target} cotired.") endif() endif() set (${_targetMsgVar} "${_targetMsg}" PARENT_SCOPE) endfunction() function (cotire_choose_target_languages _target _targetLanguagesVar _wholeTargetVar) set (_languages ${ARGN}) set (_allSourceFiles "") set (_allExcludedSourceFiles "") set (_allCotiredSourceFiles "") set (_targetLanguages "") set (_pchEligibleTargetLanguages "") get_target_property(_targetType ${_target} TYPE) get_target_property(_targetSourceFiles ${_target} SOURCES) get_target_property(_targetUsePCH ${_target} COTIRE_ENABLE_PRECOMPILED_HEADER) get_target_property(_targetAddSCU ${_target} COTIRE_ADD_UNITY_BUILD) set (_disableMsg "") foreach (_language ${_languages}) get_target_property(_prefixHeader ${_target} COTIRE_${_language}_PREFIX_HEADER) get_target_property(_unityBuildFile ${_target} COTIRE_${_language}_UNITY_SOURCE) if (_prefixHeader OR _unityBuildFile) message (STATUS "cotire: target ${_target} has already been cotired.") set (${_targetLanguagesVar} "" PARENT_SCOPE) return() endif() if (_targetUsePCH AND "${_language}" MATCHES "^C|CXX$" AND DEFINED CMAKE_${_language}_COMPILER_ID) if (CMAKE_${_language}_COMPILER_ID) cotire_check_precompiled_header_support("${_language}" "${_target}" _disableMsg) if (_disableMsg) set (_targetUsePCH FALSE) endif() endif() endif() set (_sourceFiles "") set (_excludedSources "") set (_cotiredSources "") cotire_filter_language_source_files(${_language} ${_target} _sourceFiles _excludedSources _cotiredSources ${_targetSourceFiles}) if (_sourceFiles OR _excludedSources OR _cotiredSources) list (APPEND _targetLanguages ${_language}) endif() if (_sourceFiles) list (APPEND _allSourceFiles ${_sourceFiles}) endif() list (LENGTH _sourceFiles _numberOfSources) if (NOT _numberOfSources LESS ${COTIRE_MINIMUM_NUMBER_OF_TARGET_SOURCES}) list (APPEND _pchEligibleTargetLanguages ${_language}) endif() if (_excludedSources) list (APPEND _allExcludedSourceFiles ${_excludedSources}) endif() if (_cotiredSources) list (APPEND _allCotiredSourceFiles ${_cotiredSources}) endif() endforeach() set (_targetMsgLevel STATUS) if (NOT _targetLanguages) string (REPLACE ";" " or " _languagesStr "${_languages}") set (_disableMsg "No ${_languagesStr} source files.") set (_targetUsePCH FALSE) set (_targetAddSCU FALSE) endif() if (_targetUsePCH) if (_allCotiredSourceFiles) cotire_get_source_file_property_values(_cotireTargets COTIRE_TARGET ${_allCotiredSourceFiles}) list (REMOVE_DUPLICATES _cotireTargets) string (REPLACE ";" ", " _cotireTargetsStr "${_cotireTargets}") set (_disableMsg "Target sources already include a precompiled header for target(s) ${_cotireTargets}.") set (_disableMsg "${_disableMsg} Set target property COTIRE_ENABLE_PRECOMPILED_HEADER to FALSE for targets ${_target},") set (_disableMsg "${_disableMsg} ${_cotireTargetsStr} to get a workable build system.") set (_targetMsgLevel SEND_ERROR) set (_targetUsePCH FALSE) elseif (NOT _pchEligibleTargetLanguages) set (_disableMsg "Too few applicable sources.") set (_targetUsePCH FALSE) elseif (XCODE AND _allExcludedSourceFiles) # for Xcode, we cannot apply the precompiled header to individual sources, only to the whole target set (_disableMsg "Exclusion of source files not supported for generator Xcode.") set (_targetUsePCH FALSE) elseif (XCODE AND "${_targetType}" STREQUAL "OBJECT_LIBRARY") # for Xcode, we cannot apply the required PRE_BUILD action to generate the prefix header to an OBJECT_LIBRARY target set (_disableMsg "Required PRE_BUILD action not supported for OBJECT_LIBRARY targets for generator Xcode.") set (_targetUsePCH FALSE) endif() endif() if (_targetAddSCU) # disable unity builds if automatic Qt processing is used get_target_property(_targetAutoMoc ${_target} AUTOMOC) get_target_property(_targetAutoUic ${_target} AUTOUIC) get_target_property(_targetAutoRcc ${_target} AUTORCC) if (_targetAutoMoc OR _targetAutoUic OR _targetAutoRcc) if (_disableMsg) set (_disableMsg "${_disableMsg} Target uses automatic CMake Qt processing.") else() set (_disableMsg "Target uses automatic CMake Qt processing.") endif() set (_targetAddSCU FALSE) endif() endif() set_property(TARGET ${_target} PROPERTY COTIRE_ENABLE_PRECOMPILED_HEADER ${_targetUsePCH}) set_property(TARGET ${_target} PROPERTY COTIRE_ADD_UNITY_BUILD ${_targetAddSCU}) cotire_make_target_message(${_target} "${_targetLanguages}" "${_disableMsg}" _targetMsg ${_allExcludedSourceFiles}) if (_targetMsg) if (NOT DEFINED COTIREMSG_${_target}) set (COTIREMSG_${_target} "") endif() if (COTIRE_VERBOSE OR NOT "${_targetMsgLevel}" STREQUAL "STATUS" OR NOT "${COTIREMSG_${_target}}" STREQUAL "${_targetMsg}") # cache message to avoid redundant messages on re-configure set (COTIREMSG_${_target} "${_targetMsg}" CACHE INTERNAL "${_target} cotire message.") message (${_targetMsgLevel} "${_targetMsg}") endif() endif() list (LENGTH _targetLanguages _numberOfLanguages) if (_numberOfLanguages GREATER 1 OR _allExcludedSourceFiles) set (${_wholeTargetVar} FALSE PARENT_SCOPE) else() set (${_wholeTargetVar} TRUE PARENT_SCOPE) endif() set (${_targetLanguagesVar} ${_targetLanguages} PARENT_SCOPE) endfunction() function (cotire_compute_unity_max_number_of_includes _target _maxIncludesVar) set (_sourceFiles ${ARGN}) get_target_property(_maxIncludes ${_target} COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES) if (_maxIncludes MATCHES "(-j|--parallel|--jobs) ?([0-9]*)") if (DEFINED CMAKE_MATCH_2) set (_numberOfThreads "${CMAKE_MATCH_2}") else() set (_numberOfThreads "") endif() if (NOT _numberOfThreads) # use all available cores ProcessorCount(_numberOfThreads) endif() list (LENGTH _sourceFiles _numberOfSources) math (EXPR _maxIncludes "(${_numberOfSources} + ${_numberOfThreads} - 1) / ${_numberOfThreads}") elseif (NOT _maxIncludes MATCHES "[0-9]+") set (_maxIncludes 0) endif() if (COTIRE_DEBUG) message (STATUS "${_target} unity source max includes: ${_maxIncludes}") endif() set (${_maxIncludesVar} ${_maxIncludes} PARENT_SCOPE) endfunction() function (cotire_process_target_language _language _configurations _target _wholeTarget _cmdsVar) set (${_cmdsVar} "" PARENT_SCOPE) get_target_property(_targetSourceFiles ${_target} SOURCES) set (_sourceFiles "") set (_excludedSources "") set (_cotiredSources "") cotire_filter_language_source_files(${_language} ${_target} _sourceFiles _excludedSources _cotiredSources ${_targetSourceFiles}) if (NOT _sourceFiles AND NOT _cotiredSources) return() endif() set (_cmds "") # check for user provided unity source file list get_property(_unitySourceFiles TARGET ${_target} PROPERTY COTIRE_${_language}_UNITY_SOURCE_INIT) if (NOT _unitySourceFiles) set (_unitySourceFiles ${_sourceFiles} ${_cotiredSources}) endif() cotire_generate_target_script( ${_language} "${_configurations}" ${_target} _targetScript _targetConfigScript ${_unitySourceFiles}) # set up unity files for parallel compilation cotire_compute_unity_max_number_of_includes(${_target} _maxIncludes ${_unitySourceFiles}) cotire_make_unity_source_file_paths(${_language} ${_target} ${_maxIncludes} _unityFiles ${_unitySourceFiles}) list (LENGTH _unityFiles _numberOfUnityFiles) if (_numberOfUnityFiles EQUAL 0) return() elseif (_numberOfUnityFiles GREATER 1) cotire_setup_unity_generation_commands( ${_language} ${_target} "${_targetScript}" "${_targetConfigScript}" "${_unityFiles}" _cmds ${_unitySourceFiles}) endif() # set up single unity file for prefix header generation cotire_make_single_unity_source_file_path(${_language} ${_target} _unityFile) cotire_setup_unity_generation_commands( ${_language} ${_target} "${_targetScript}" "${_targetConfigScript}" "${_unityFile}" _cmds ${_unitySourceFiles}) cotire_make_prefix_file_path(${_language} ${_target} _prefixFile) # set up prefix header if (_prefixFile) # check for user provided prefix header files get_property(_prefixHeaderFiles TARGET ${_target} PROPERTY COTIRE_${_language}_PREFIX_HEADER_INIT) if (_prefixHeaderFiles) cotire_setup_prefix_generation_from_provided_command( ${_language} ${_target} "${_targetConfigScript}" "${_prefixFile}" _cmds ${_prefixHeaderFiles}) else() cotire_setup_prefix_generation_from_unity_command( ${_language} ${_target} "${_targetConfigScript}" "${_prefixFile}" "${_unityFile}" _cmds ${_unitySourceFiles}) endif() # check if selected language has enough sources at all list (LENGTH _sourceFiles _numberOfSources) if (_numberOfSources LESS ${COTIRE_MINIMUM_NUMBER_OF_TARGET_SOURCES}) set (_targetUsePCH FALSE) else() get_target_property(_targetUsePCH ${_target} COTIRE_ENABLE_PRECOMPILED_HEADER) endif() if (_targetUsePCH) cotire_make_pch_file_path(${_language} ${_target} _pchFile) if (_pchFile) # first file in _sourceFiles is passed as the host file cotire_setup_pch_file_compilation( ${_language} ${_target} "${_targetConfigScript}" "${_prefixFile}" "${_pchFile}" ${_sourceFiles}) cotire_setup_pch_file_inclusion( ${_language} ${_target} ${_wholeTarget} "${_prefixFile}" "${_pchFile}" ${_sourceFiles}) endif() elseif (_prefixHeaderFiles) # user provided prefix header must be included unconditionally cotire_setup_prefix_file_inclusion(${_language} ${_target} "${_prefixFile}" ${_sourceFiles}) endif() endif() # mark target as cotired for language set_property(TARGET ${_target} PROPERTY COTIRE_${_language}_UNITY_SOURCE "${_unityFiles}") if (_prefixFile) set_property(TARGET ${_target} PROPERTY COTIRE_${_language}_PREFIX_HEADER "${_prefixFile}") if (_targetUsePCH AND _pchFile) set_property(TARGET ${_target} PROPERTY COTIRE_${_language}_PRECOMPILED_HEADER "${_pchFile}") endif() endif() set (${_cmdsVar} ${_cmds} PARENT_SCOPE) endfunction() function (cotire_setup_clean_target _target) set (_cleanTargetName "${_target}${COTIRE_CLEAN_TARGET_SUFFIX}") if (NOT TARGET "${_cleanTargetName}") cotire_set_cmd_to_prologue(_cmds) get_filename_component(_outputDir "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}" ABSOLUTE) list (APPEND _cmds -P "${COTIRE_CMAKE_MODULE_FILE}" "cleanup" "${_outputDir}" "${COTIRE_INTDIR}" "${_target}") add_custom_target(${_cleanTargetName} COMMAND ${_cmds} WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" COMMENT "Cleaning up target ${_target} cotire generated files" VERBATIM) cotire_init_target("${_cleanTargetName}") endif() endfunction() function (cotire_setup_pch_target _languages _configurations _target) if ("${CMAKE_GENERATOR}" MATCHES "Make|Ninja") # for makefile based generators, we add a custom target to trigger the generation of the cotire related files set (_dependsFiles "") foreach (_language ${_languages}) set (_props COTIRE_${_language}_PREFIX_HEADER COTIRE_${_language}_UNITY_SOURCE) if (NOT CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel" AND NOT (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "Clang")) # MSVC, Intel and clang-cl only create precompiled header as a side effect list (INSERT _props 0 COTIRE_${_language}_PRECOMPILED_HEADER) endif() cotire_get_first_set_property_value(_dependsFile TARGET ${_target} ${_props}) if (_dependsFile) list (APPEND _dependsFiles "${_dependsFile}") endif() endforeach() if (_dependsFiles) set (_pchTargetName "${_target}${COTIRE_PCH_TARGET_SUFFIX}") add_custom_target("${_pchTargetName}" DEPENDS ${_dependsFiles}) cotire_init_target("${_pchTargetName}") cotire_add_to_pch_all_target(${_pchTargetName}) endif() else() # for other generators, we add the "clean all" target to clean up the precompiled header cotire_setup_clean_all_target() endif() endfunction() function (cotire_filter_object_libraries _target _objectLibrariesVar) set (_objectLibraries "") foreach (_source ${ARGN}) if (_source MATCHES "^\\$$") list (APPEND _objectLibraries "${_source}") endif() endforeach() set (${_objectLibrariesVar} ${_objectLibraries} PARENT_SCOPE) endfunction() function (cotire_collect_unity_target_sources _target _languages _unityTargetSourcesVar) get_target_property(_targetSourceFiles ${_target} SOURCES) set (_unityTargetSources ${_targetSourceFiles}) foreach (_language ${_languages}) get_property(_unityFiles TARGET ${_target} PROPERTY COTIRE_${_language}_UNITY_SOURCE) if (_unityFiles) # remove source files that are included in the unity source set (_sourceFiles "") set (_excludedSources "") set (_cotiredSources "") cotire_filter_language_source_files(${_language} ${_target} _sourceFiles _excludedSources _cotiredSources ${_targetSourceFiles}) if (_sourceFiles OR _cotiredSources) list (REMOVE_ITEM _unityTargetSources ${_sourceFiles} ${_cotiredSources}) endif() # add unity source files instead list (APPEND _unityTargetSources ${_unityFiles}) endif() endforeach() # handle object libraries which are part of the target's sources get_target_property(_linkLibrariesStrategy ${_target} COTIRE_UNITY_LINK_LIBRARIES_INIT) if ("${_linkLibrariesStrategy}" MATCHES "^COPY_UNITY$") cotire_filter_object_libraries(${_target} _objectLibraries ${_targetSourceFiles}) if (_objectLibraries) cotire_map_libraries("${_linkLibrariesStrategy}" _unityObjectLibraries ${_objectLibraries}) list (REMOVE_ITEM _unityTargetSources ${_objectLibraries}) list (APPEND _unityTargetSources ${_unityObjectLibraries}) endif() endif() set (${_unityTargetSourcesVar} ${_unityTargetSources} PARENT_SCOPE) endfunction() function (cotire_setup_unity_target_pch_usage _languages _target) foreach (_language ${_languages}) get_property(_unityFiles TARGET ${_target} PROPERTY COTIRE_${_language}_UNITY_SOURCE) if (_unityFiles) get_property(_userPrefixFile TARGET ${_target} PROPERTY COTIRE_${_language}_PREFIX_HEADER_INIT) get_property(_prefixFile TARGET ${_target} PROPERTY COTIRE_${_language}_PREFIX_HEADER) if (_userPrefixFile AND _prefixFile) # user provided prefix header must be included unconditionally by unity sources cotire_setup_prefix_file_inclusion(${_language} ${_target} "${_prefixFile}" ${_unityFiles}) endif() endif() endforeach() endfunction() function (cotire_setup_unity_build_target _languages _configurations _target) get_target_property(_unityTargetName ${_target} COTIRE_UNITY_TARGET_NAME) if (NOT _unityTargetName) set (_unityTargetName "${_target}${COTIRE_UNITY_BUILD_TARGET_SUFFIX}") endif() # determine unity target sub type get_target_property(_targetType ${_target} TYPE) if ("${_targetType}" STREQUAL "EXECUTABLE") set (_unityTargetSubType "") elseif (_targetType MATCHES "(STATIC|SHARED|MODULE|OBJECT)_LIBRARY") set (_unityTargetSubType "${CMAKE_MATCH_1}") else() message (WARNING "cotire: target ${_target} has unknown target type ${_targetType}.") return() endif() # determine unity target sources set (_unityTargetSources "") cotire_collect_unity_target_sources(${_target} "${_languages}" _unityTargetSources) # prevent AUTOMOC, AUTOUIC and AUTORCC properties from being set when the unity target is created set (CMAKE_AUTOMOC OFF) set (CMAKE_AUTOUIC OFF) set (CMAKE_AUTORCC OFF) if (COTIRE_DEBUG) message (STATUS "add target ${_targetType} ${_unityTargetName} ${_unityTargetSubType} EXCLUDE_FROM_ALL ${_unityTargetSources}") endif() # generate unity target if ("${_targetType}" STREQUAL "EXECUTABLE") add_executable(${_unityTargetName} ${_unityTargetSubType} EXCLUDE_FROM_ALL ${_unityTargetSources}) else() add_library(${_unityTargetName} ${_unityTargetSubType} EXCLUDE_FROM_ALL ${_unityTargetSources}) endif() # copy output location properties set (_outputDirProperties ARCHIVE_OUTPUT_DIRECTORY ARCHIVE_OUTPUT_DIRECTORY_ LIBRARY_OUTPUT_DIRECTORY LIBRARY_OUTPUT_DIRECTORY_ RUNTIME_OUTPUT_DIRECTORY RUNTIME_OUTPUT_DIRECTORY_) if (COTIRE_UNITY_OUTPUT_DIRECTORY) set (_setDefaultOutputDir TRUE) if (IS_ABSOLUTE "${COTIRE_UNITY_OUTPUT_DIRECTORY}") set (_outputDir "${COTIRE_UNITY_OUTPUT_DIRECTORY}") else() # append relative COTIRE_UNITY_OUTPUT_DIRECTORY to target's actual output directory cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} ${_outputDirProperties}) cotire_resolve_config_properties("${_configurations}" _properties ${_outputDirProperties}) foreach (_property ${_properties}) get_property(_outputDir TARGET ${_target} PROPERTY ${_property}) if (_outputDir) get_filename_component(_outputDir "${_outputDir}/${COTIRE_UNITY_OUTPUT_DIRECTORY}" ABSOLUTE) set_property(TARGET ${_unityTargetName} PROPERTY ${_property} "${_outputDir}") set (_setDefaultOutputDir FALSE) endif() endforeach() if (_setDefaultOutputDir) get_filename_component(_outputDir "${CMAKE_CURRENT_BINARY_DIR}/${COTIRE_UNITY_OUTPUT_DIRECTORY}" ABSOLUTE) endif() endif() if (_setDefaultOutputDir) set_target_properties(${_unityTargetName} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${_outputDir}" LIBRARY_OUTPUT_DIRECTORY "${_outputDir}" RUNTIME_OUTPUT_DIRECTORY "${_outputDir}") endif() else() cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} ${_outputDirProperties}) endif() # copy output name cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} ARCHIVE_OUTPUT_NAME ARCHIVE_OUTPUT_NAME_ LIBRARY_OUTPUT_NAME LIBRARY_OUTPUT_NAME_ OUTPUT_NAME OUTPUT_NAME_ RUNTIME_OUTPUT_NAME RUNTIME_OUTPUT_NAME_ PREFIX _POSTFIX SUFFIX IMPORT_PREFIX IMPORT_SUFFIX) # copy compile stuff cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} COMPILE_DEFINITIONS COMPILE_DEFINITIONS_ COMPILE_FLAGS COMPILE_OPTIONS Fortran_FORMAT Fortran_MODULE_DIRECTORY INCLUDE_DIRECTORIES INTERPROCEDURAL_OPTIMIZATION INTERPROCEDURAL_OPTIMIZATION_ POSITION_INDEPENDENT_CODE C_COMPILER_LAUNCHER CXX_COMPILER_LAUNCHER C_INCLUDE_WHAT_YOU_USE CXX_INCLUDE_WHAT_YOU_USE C_VISIBILITY_PRESET CXX_VISIBILITY_PRESET VISIBILITY_INLINES_HIDDEN C_CLANG_TIDY CXX_CLANG_TIDY) # copy compile features cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} C_EXTENSIONS C_STANDARD C_STANDARD_REQUIRED CXX_EXTENSIONS CXX_STANDARD CXX_STANDARD_REQUIRED COMPILE_FEATURES) # copy interface stuff cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} COMPATIBLE_INTERFACE_BOOL COMPATIBLE_INTERFACE_NUMBER_MAX COMPATIBLE_INTERFACE_NUMBER_MIN COMPATIBLE_INTERFACE_STRING INTERFACE_COMPILE_DEFINITIONS INTERFACE_COMPILE_FEATURES INTERFACE_COMPILE_OPTIONS INTERFACE_INCLUDE_DIRECTORIES INTERFACE_SOURCES INTERFACE_POSITION_INDEPENDENT_CODE INTERFACE_SYSTEM_INCLUDE_DIRECTORIES INTERFACE_AUTOUIC_OPTIONS NO_SYSTEM_FROM_IMPORTED) # copy link stuff cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} BUILD_WITH_INSTALL_RPATH BUILD_WITH_INSTALL_NAME_DIR INSTALL_RPATH INSTALL_RPATH_USE_LINK_PATH SKIP_BUILD_RPATH LINKER_LANGUAGE LINK_DEPENDS LINK_DEPENDS_NO_SHARED LINK_FLAGS LINK_FLAGS_ LINK_INTERFACE_LIBRARIES LINK_INTERFACE_LIBRARIES_ LINK_INTERFACE_MULTIPLICITY LINK_INTERFACE_MULTIPLICITY_ LINK_SEARCH_START_STATIC LINK_SEARCH_END_STATIC STATIC_LIBRARY_FLAGS STATIC_LIBRARY_FLAGS_ NO_SONAME SOVERSION VERSION LINK_WHAT_YOU_USE BUILD_RPATH) # copy cmake stuff cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} IMPLICIT_DEPENDS_INCLUDE_TRANSFORM RULE_LAUNCH_COMPILE RULE_LAUNCH_CUSTOM RULE_LAUNCH_LINK) # copy Apple platform specific stuff cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} BUNDLE BUNDLE_EXTENSION FRAMEWORK FRAMEWORK_VERSION INSTALL_NAME_DIR MACOSX_BUNDLE MACOSX_BUNDLE_INFO_PLIST MACOSX_FRAMEWORK_INFO_PLIST MACOSX_RPATH OSX_ARCHITECTURES OSX_ARCHITECTURES_ PRIVATE_HEADER PUBLIC_HEADER RESOURCE XCTEST IOS_INSTALL_COMBINED XCODE_EXPLICIT_FILE_TYPE XCODE_PRODUCT_TYPE) # copy Windows platform specific stuff cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} GNUtoMS COMPILE_PDB_NAME COMPILE_PDB_NAME_ COMPILE_PDB_OUTPUT_DIRECTORY COMPILE_PDB_OUTPUT_DIRECTORY_ PDB_NAME PDB_NAME_ PDB_OUTPUT_DIRECTORY PDB_OUTPUT_DIRECTORY_ VS_DESKTOP_EXTENSIONS_VERSION VS_DOTNET_REFERENCES VS_DOTNET_TARGET_FRAMEWORK_VERSION VS_GLOBAL_KEYWORD VS_GLOBAL_PROJECT_TYPES VS_GLOBAL_ROOTNAMESPACE VS_IOT_EXTENSIONS_VERSION VS_IOT_STARTUP_TASK VS_KEYWORD VS_MOBILE_EXTENSIONS_VERSION VS_SCC_AUXPATH VS_SCC_LOCALPATH VS_SCC_PROJECTNAME VS_SCC_PROVIDER VS_WINDOWS_TARGET_PLATFORM_MIN_VERSION VS_WINRT_COMPONENT VS_WINRT_EXTENSIONS VS_WINRT_REFERENCES WIN32_EXECUTABLE WINDOWS_EXPORT_ALL_SYMBOLS DEPLOYMENT_REMOTE_DIRECTORY VS_CONFIGURATION_TYPE VS_SDK_REFERENCES VS_USER_PROPS VS_DEBUGGER_WORKING_DIRECTORY) # copy Android platform specific stuff cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} ANDROID_API ANDROID_API_MIN ANDROID_GUI ANDROID_ANT_ADDITIONAL_OPTIONS ANDROID_ARCH ANDROID_ASSETS_DIRECTORIES ANDROID_JAR_DEPENDENCIES ANDROID_JAR_DIRECTORIES ANDROID_JAVA_SOURCE_DIR ANDROID_NATIVE_LIB_DEPENDENCIES ANDROID_NATIVE_LIB_DIRECTORIES ANDROID_PROCESS_MAX ANDROID_PROGUARD ANDROID_PROGUARD_CONFIG_PATH ANDROID_SECURE_PROPS_PATH ANDROID_SKIP_ANT_STEP ANDROID_STL_TYPE) # copy CUDA platform specific stuff cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} CUDA_PTX_COMPILATION CUDA_SEPARABLE_COMPILATION CUDA_RESOLVE_DEVICE_SYMBOLS CUDA_EXTENSIONS CUDA_STANDARD CUDA_STANDARD_REQUIRED) # use output name from original target get_target_property(_targetOutputName ${_unityTargetName} OUTPUT_NAME) if (NOT _targetOutputName) set_property(TARGET ${_unityTargetName} PROPERTY OUTPUT_NAME "${_target}") endif() # use export symbol from original target cotire_get_target_export_symbol("${_target}" _defineSymbol) if (_defineSymbol) set_property(TARGET ${_unityTargetName} PROPERTY DEFINE_SYMBOL "${_defineSymbol}") if ("${_targetType}" STREQUAL "EXECUTABLE") set_property(TARGET ${_unityTargetName} PROPERTY ENABLE_EXPORTS TRUE) endif() endif() # enable parallel compilation for MSVC if (MSVC AND "${CMAKE_GENERATOR}" MATCHES "Visual Studio") list (LENGTH _unityTargetSources _numberOfUnityTargetSources) if (_numberOfUnityTargetSources GREATER 1) set_property(TARGET ${_unityTargetName} APPEND PROPERTY COMPILE_OPTIONS "/MP") endif() endif() cotire_init_target(${_unityTargetName}) cotire_add_to_unity_all_target(${_unityTargetName}) set_property(TARGET ${_target} PROPERTY COTIRE_UNITY_TARGET_NAME "${_unityTargetName}") endfunction(cotire_setup_unity_build_target) function (cotire_target _target) set(_options "") set(_oneValueArgs "") set(_multiValueArgs LANGUAGES CONFIGURATIONS) cmake_parse_arguments(_option "${_options}" "${_oneValueArgs}" "${_multiValueArgs}" ${ARGN}) if (NOT _option_LANGUAGES) get_property (_option_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) endif() if (NOT _option_CONFIGURATIONS) cotire_get_configuration_types(_option_CONFIGURATIONS) endif() # check if cotire can be applied to target at all cotire_is_target_supported(${_target} _isSupported) if (NOT _isSupported) get_target_property(_imported ${_target} IMPORTED) get_target_property(_targetType ${_target} TYPE) if (_imported) message (WARNING "cotire: imported ${_targetType} target ${_target} cannot be cotired.") else() message (STATUS "cotire: ${_targetType} target ${_target} cannot be cotired.") endif() return() endif() # resolve alias get_target_property(_aliasName ${_target} ALIASED_TARGET) if (_aliasName) if (COTIRE_DEBUG) message (STATUS "${_target} is an alias. Applying cotire to aliased target ${_aliasName} instead.") endif() set (_target ${_aliasName}) endif() # check if target needs to be cotired for build type # when using configuration types, the test is performed at build time cotire_init_cotire_target_properties(${_target}) if (NOT CMAKE_CONFIGURATION_TYPES) if (CMAKE_BUILD_TYPE) list (FIND _option_CONFIGURATIONS "${CMAKE_BUILD_TYPE}" _index) else() list (FIND _option_CONFIGURATIONS "None" _index) endif() if (_index EQUAL -1) if (COTIRE_DEBUG) message (STATUS "CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} not cotired (${_option_CONFIGURATIONS})") endif() return() endif() endif() # when not using configuration types, immediately create cotire intermediate dir if (NOT CMAKE_CONFIGURATION_TYPES) cotire_get_intermediate_dir(_baseDir) file (MAKE_DIRECTORY "${_baseDir}") endif() # choose languages that apply to the target cotire_choose_target_languages("${_target}" _targetLanguages _wholeTarget ${_option_LANGUAGES}) if (NOT _targetLanguages) return() endif() set (_cmds "") foreach (_language ${_targetLanguages}) cotire_process_target_language("${_language}" "${_option_CONFIGURATIONS}" ${_target} ${_wholeTarget} _cmd) if (_cmd) list (APPEND _cmds ${_cmd}) endif() endforeach() get_target_property(_targetAddSCU ${_target} COTIRE_ADD_UNITY_BUILD) if (_targetAddSCU) cotire_setup_unity_build_target("${_targetLanguages}" "${_option_CONFIGURATIONS}" ${_target}) endif() get_target_property(_targetUsePCH ${_target} COTIRE_ENABLE_PRECOMPILED_HEADER) if (_targetUsePCH) cotire_setup_target_pch_usage("${_targetLanguages}" ${_target} ${_wholeTarget} ${_cmds}) cotire_setup_pch_target("${_targetLanguages}" "${_option_CONFIGURATIONS}" ${_target}) if (_targetAddSCU) cotire_setup_unity_target_pch_usage("${_targetLanguages}" ${_target}) endif() endif() get_target_property(_targetAddCleanTarget ${_target} COTIRE_ADD_CLEAN) if (_targetAddCleanTarget) cotire_setup_clean_target(${_target}) endif() endfunction(cotire_target) function (cotire_map_libraries _strategy _mappedLibrariesVar) set (_mappedLibraries "") foreach (_library ${ARGN}) if (_library MATCHES "^\\$$") set (_libraryName "${CMAKE_MATCH_1}") set (_linkOnly TRUE) set (_objectLibrary FALSE) elseif (_library MATCHES "^\\$$") set (_libraryName "${CMAKE_MATCH_1}") set (_linkOnly FALSE) set (_objectLibrary TRUE) else() set (_libraryName "${_library}") set (_linkOnly FALSE) set (_objectLibrary FALSE) endif() if ("${_strategy}" MATCHES "COPY_UNITY") cotire_is_target_supported(${_libraryName} _isSupported) if (_isSupported) # use target's corresponding unity target, if available get_target_property(_libraryUnityTargetName ${_libraryName} COTIRE_UNITY_TARGET_NAME) if (TARGET "${_libraryUnityTargetName}") if (_linkOnly) list (APPEND _mappedLibraries "$") elseif (_objectLibrary) list (APPEND _mappedLibraries "$") else() list (APPEND _mappedLibraries "${_libraryUnityTargetName}") endif() else() list (APPEND _mappedLibraries "${_library}") endif() else() list (APPEND _mappedLibraries "${_library}") endif() else() list (APPEND _mappedLibraries "${_library}") endif() endforeach() list (REMOVE_DUPLICATES _mappedLibraries) set (${_mappedLibrariesVar} ${_mappedLibraries} PARENT_SCOPE) endfunction() function (cotire_target_link_libraries _target) cotire_is_target_supported(${_target} _isSupported) if (NOT _isSupported) return() endif() get_target_property(_unityTargetName ${_target} COTIRE_UNITY_TARGET_NAME) if (TARGET "${_unityTargetName}") get_target_property(_linkLibrariesStrategy ${_target} COTIRE_UNITY_LINK_LIBRARIES_INIT) if (COTIRE_DEBUG) message (STATUS "unity target ${_unityTargetName} link strategy: ${_linkLibrariesStrategy}") endif() if ("${_linkLibrariesStrategy}" MATCHES "^(COPY|COPY_UNITY)$") get_target_property(_linkLibraries ${_target} LINK_LIBRARIES) if (_linkLibraries) cotire_map_libraries("${_linkLibrariesStrategy}" _unityLinkLibraries ${_linkLibraries}) set_target_properties(${_unityTargetName} PROPERTIES LINK_LIBRARIES "${_unityLinkLibraries}") if (COTIRE_DEBUG) message (STATUS "unity target ${_unityTargetName} link libraries: ${_unityLinkLibraries}") endif() endif() get_target_property(_interfaceLinkLibraries ${_target} INTERFACE_LINK_LIBRARIES) if (_interfaceLinkLibraries) cotire_map_libraries("${_linkLibrariesStrategy}" _unityLinkInterfaceLibraries ${_interfaceLinkLibraries}) set_target_properties(${_unityTargetName} PROPERTIES INTERFACE_LINK_LIBRARIES "${_unityLinkInterfaceLibraries}") if (COTIRE_DEBUG) message (STATUS "unity target ${_unityTargetName} interface link libraries: ${_unityLinkInterfaceLibraries}") endif() endif() get_target_property(_manualDependencies ${_target} MANUALLY_ADDED_DEPENDENCIES) if (_manualDependencies) cotire_map_libraries("${_linkLibrariesStrategy}" _unityManualDependencies ${_manualDependencies}) if (_unityManualDependencies) add_dependencies("${_unityTargetName}" ${_unityManualDependencies}) endif() endif() endif() endif() endfunction(cotire_target_link_libraries) function (cotire_cleanup _binaryDir _cotireIntermediateDirName _targetName) if (_targetName) file (GLOB_RECURSE _cotireFiles "${_binaryDir}/${_targetName}*.*") else() file (GLOB_RECURSE _cotireFiles "${_binaryDir}/*.*") endif() # filter files in intermediate directory set (_filesToRemove "") foreach (_file ${_cotireFiles}) get_filename_component(_dir "${_file}" DIRECTORY) get_filename_component(_dirName "${_dir}" NAME) if ("${_dirName}" STREQUAL "${_cotireIntermediateDirName}") list (APPEND _filesToRemove "${_file}") endif() endforeach() if (_filesToRemove) if (COTIRE_VERBOSE) message (STATUS "cleaning up ${_filesToRemove}") endif() file (REMOVE ${_filesToRemove}) endif() endfunction() function (cotire_init_target _targetName) if (COTIRE_TARGETS_FOLDER) set_target_properties(${_targetName} PROPERTIES FOLDER "${COTIRE_TARGETS_FOLDER}") endif() set_target_properties(${_targetName} PROPERTIES EXCLUDE_FROM_ALL TRUE) if (MSVC_IDE) set_target_properties(${_targetName} PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD TRUE) endif() endfunction() function (cotire_add_to_pch_all_target _pchTargetName) set (_targetName "${COTIRE_PCH_ALL_TARGET_NAME}") if (NOT TARGET "${_targetName}") add_custom_target("${_targetName}" WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" VERBATIM) cotire_init_target("${_targetName}") endif() cotire_setup_clean_all_target() add_dependencies(${_targetName} ${_pchTargetName}) endfunction() function (cotire_add_to_unity_all_target _unityTargetName) set (_targetName "${COTIRE_UNITY_BUILD_ALL_TARGET_NAME}") if (NOT TARGET "${_targetName}") add_custom_target("${_targetName}" WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" VERBATIM) cotire_init_target("${_targetName}") endif() cotire_setup_clean_all_target() add_dependencies(${_targetName} ${_unityTargetName}) endfunction() function (cotire_setup_clean_all_target) set (_targetName "${COTIRE_CLEAN_ALL_TARGET_NAME}") if (NOT TARGET "${_targetName}") cotire_set_cmd_to_prologue(_cmds) list (APPEND _cmds -P "${COTIRE_CMAKE_MODULE_FILE}" "cleanup" "${CMAKE_BINARY_DIR}" "${COTIRE_INTDIR}") add_custom_target(${_targetName} COMMAND ${_cmds} WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" COMMENT "Cleaning up all cotire generated files" VERBATIM) cotire_init_target("${_targetName}") endif() endfunction() function (cotire) set(_options "") set(_oneValueArgs "") set(_multiValueArgs LANGUAGES CONFIGURATIONS) cmake_parse_arguments(_option "${_options}" "${_oneValueArgs}" "${_multiValueArgs}" ${ARGN}) set (_targets ${_option_UNPARSED_ARGUMENTS}) foreach (_target ${_targets}) if (TARGET ${_target}) cotire_target(${_target} LANGUAGES ${_option_LANGUAGES} CONFIGURATIONS ${_option_CONFIGURATIONS}) else() message (WARNING "cotire: ${_target} is not a target.") endif() endforeach() foreach (_target ${_targets}) if (TARGET ${_target}) cotire_target_link_libraries(${_target}) endif() endforeach() endfunction() if (CMAKE_SCRIPT_MODE_FILE) # cotire is being run in script mode # locate -P on command args set (COTIRE_ARGC -1) foreach (_index RANGE ${CMAKE_ARGC}) if (COTIRE_ARGC GREATER -1) set (COTIRE_ARGV${COTIRE_ARGC} "${CMAKE_ARGV${_index}}") math (EXPR COTIRE_ARGC "${COTIRE_ARGC} + 1") elseif ("${CMAKE_ARGV${_index}}" STREQUAL "-P") set (COTIRE_ARGC 0) endif() endforeach() # include target script if available if ("${COTIRE_ARGV2}" MATCHES "\\.cmake$") # the included target scripts sets up additional variables relating to the target (e.g., COTIRE_TARGET_SOURCES) include("${COTIRE_ARGV2}") endif() if (COTIRE_DEBUG) message (STATUS "${COTIRE_ARGV0} ${COTIRE_ARGV1} ${COTIRE_ARGV2} ${COTIRE_ARGV3} ${COTIRE_ARGV4} ${COTIRE_ARGV5}") endif() if (NOT COTIRE_BUILD_TYPE) set (COTIRE_BUILD_TYPE "None") endif() string (TOUPPER "${COTIRE_BUILD_TYPE}" _upperConfig) set (_includeDirs ${COTIRE_TARGET_INCLUDE_DIRECTORIES_${_upperConfig}}) set (_systemIncludeDirs ${COTIRE_TARGET_SYSTEM_INCLUDE_DIRECTORIES_${_upperConfig}}) set (_compileDefinitions ${COTIRE_TARGET_COMPILE_DEFINITIONS_${_upperConfig}}) set (_compileFlags ${COTIRE_TARGET_COMPILE_FLAGS_${_upperConfig}}) # check if target has been cotired for actual build type COTIRE_BUILD_TYPE list (FIND COTIRE_TARGET_CONFIGURATION_TYPES "${COTIRE_BUILD_TYPE}" _index) if (_index GREATER -1) set (_sources ${COTIRE_TARGET_SOURCES}) set (_sourcesDefinitions ${COTIRE_TARGET_SOURCES_COMPILE_DEFINITIONS_${_upperConfig}}) else() if (COTIRE_DEBUG) message (STATUS "COTIRE_BUILD_TYPE=${COTIRE_BUILD_TYPE} not cotired (${COTIRE_TARGET_CONFIGURATION_TYPES})") endif() set (_sources "") set (_sourcesDefinitions "") endif() set (_targetPreUndefs ${COTIRE_TARGET_PRE_UNDEFS}) set (_targetPostUndefs ${COTIRE_TARGET_POST_UNDEFS}) set (_sourcesPreUndefs ${COTIRE_TARGET_SOURCES_PRE_UNDEFS}) set (_sourcesPostUndefs ${COTIRE_TARGET_SOURCES_POST_UNDEFS}) if ("${COTIRE_ARGV1}" STREQUAL "unity") if (XCODE) # executing pre-build action under Xcode, check dependency on target script set (_dependsOption DEPENDS "${COTIRE_ARGV2}") else() # executing custom command, no need to re-check for dependencies set (_dependsOption "") endif() cotire_select_unity_source_files("${COTIRE_ARGV3}" _sources ${_sources}) cotire_generate_unity_source( "${COTIRE_ARGV3}" ${_sources} LANGUAGE "${COTIRE_TARGET_LANGUAGE}" SOURCES_COMPILE_DEFINITIONS ${_sourcesDefinitions} PRE_UNDEFS ${_targetPreUndefs} POST_UNDEFS ${_targetPostUndefs} SOURCES_PRE_UNDEFS ${_sourcesPreUndefs} SOURCES_POST_UNDEFS ${_sourcesPostUndefs} ${_dependsOption}) elseif ("${COTIRE_ARGV1}" STREQUAL "prefix") if (XCODE) # executing pre-build action under Xcode, check dependency on unity file and prefix dependencies set (_dependsOption DEPENDS "${COTIRE_ARGV4}" ${COTIRE_TARGET_PREFIX_DEPENDS}) else() # executing custom command, no need to re-check for dependencies set (_dependsOption "") endif() set (_files "") foreach (_index RANGE 4 ${COTIRE_ARGC}) if (COTIRE_ARGV${_index}) list (APPEND _files "${COTIRE_ARGV${_index}}") endif() endforeach() cotire_generate_prefix_header( "${COTIRE_ARGV3}" ${_files} COMPILER_LAUNCHER "${COTIRE_TARGET_${COTIRE_TARGET_LANGUAGE}_COMPILER_LAUNCHER}" COMPILER_EXECUTABLE "${CMAKE_${COTIRE_TARGET_LANGUAGE}_COMPILER}" COMPILER_ARG1 ${CMAKE_${COTIRE_TARGET_LANGUAGE}_COMPILER_ARG1} COMPILER_ID "${CMAKE_${COTIRE_TARGET_LANGUAGE}_COMPILER_ID}" COMPILER_VERSION "${CMAKE_${COTIRE_TARGET_LANGUAGE}_COMPILER_VERSION}" LANGUAGE "${COTIRE_TARGET_LANGUAGE}" IGNORE_PATH "${COTIRE_TARGET_IGNORE_PATH};${COTIRE_ADDITIONAL_PREFIX_HEADER_IGNORE_PATH}" INCLUDE_PATH ${COTIRE_TARGET_INCLUDE_PATH} IGNORE_EXTENSIONS "${CMAKE_${COTIRE_TARGET_LANGUAGE}_SOURCE_FILE_EXTENSIONS};${COTIRE_ADDITIONAL_PREFIX_HEADER_IGNORE_EXTENSIONS}" INCLUDE_PRIORITY_PATH ${COTIRE_TARGET_INCLUDE_PRIORITY_PATH} INCLUDE_DIRECTORIES ${_includeDirs} SYSTEM_INCLUDE_DIRECTORIES ${_systemIncludeDirs} COMPILE_DEFINITIONS ${_compileDefinitions} COMPILE_FLAGS ${_compileFlags} ${_dependsOption}) elseif ("${COTIRE_ARGV1}" STREQUAL "precompile") set (_files "") foreach (_index RANGE 5 ${COTIRE_ARGC}) if (COTIRE_ARGV${_index}) list (APPEND _files "${COTIRE_ARGV${_index}}") endif() endforeach() cotire_precompile_prefix_header( "${COTIRE_ARGV3}" "${COTIRE_ARGV4}" "${COTIRE_ARGV5}" COMPILER_LAUNCHER "${COTIRE_TARGET_${COTIRE_TARGET_LANGUAGE}_COMPILER_LAUNCHER}" COMPILER_EXECUTABLE "${CMAKE_${COTIRE_TARGET_LANGUAGE}_COMPILER}" COMPILER_ARG1 ${CMAKE_${COTIRE_TARGET_LANGUAGE}_COMPILER_ARG1} COMPILER_ID "${CMAKE_${COTIRE_TARGET_LANGUAGE}_COMPILER_ID}" COMPILER_VERSION "${CMAKE_${COTIRE_TARGET_LANGUAGE}_COMPILER_VERSION}" LANGUAGE "${COTIRE_TARGET_LANGUAGE}" INCLUDE_DIRECTORIES ${_includeDirs} SYSTEM_INCLUDE_DIRECTORIES ${_systemIncludeDirs} COMPILE_DEFINITIONS ${_compileDefinitions} COMPILE_FLAGS ${_compileFlags}) elseif ("${COTIRE_ARGV1}" STREQUAL "combine") if (COTIRE_TARGET_LANGUAGE) set (_combinedFile "${COTIRE_ARGV3}") set (_startIndex 4) else() set (_combinedFile "${COTIRE_ARGV2}") set (_startIndex 3) endif() set (_files "") foreach (_index RANGE ${_startIndex} ${COTIRE_ARGC}) if (COTIRE_ARGV${_index}) list (APPEND _files "${COTIRE_ARGV${_index}}") endif() endforeach() if (XCODE) # executing pre-build action under Xcode, check dependency on files to be combined set (_dependsOption DEPENDS ${_files}) else() # executing custom command, no need to re-check for dependencies set (_dependsOption "") endif() if (COTIRE_TARGET_LANGUAGE) cotire_generate_unity_source( "${_combinedFile}" ${_files} LANGUAGE "${COTIRE_TARGET_LANGUAGE}" ${_dependsOption}) else() cotire_generate_unity_source("${_combinedFile}" ${_files} ${_dependsOption}) endif() elseif ("${COTIRE_ARGV1}" STREQUAL "cleanup") cotire_cleanup("${COTIRE_ARGV2}" "${COTIRE_ARGV3}" "${COTIRE_ARGV4}") else() message (FATAL_ERROR "cotire: unknown command \"${COTIRE_ARGV1}\".") endif() else() # cotire is being run in include mode # set up all variable and property definitions if (NOT DEFINED COTIRE_DEBUG_INIT) if (DEFINED COTIRE_DEBUG) set (COTIRE_DEBUG_INIT ${COTIRE_DEBUG}) else() set (COTIRE_DEBUG_INIT FALSE) endif() endif() option (COTIRE_DEBUG "Enable cotire debugging output?" ${COTIRE_DEBUG_INIT}) if (NOT DEFINED COTIRE_VERBOSE_INIT) if (DEFINED COTIRE_VERBOSE) set (COTIRE_VERBOSE_INIT ${COTIRE_VERBOSE}) else() set (COTIRE_VERBOSE_INIT FALSE) endif() endif() option (COTIRE_VERBOSE "Enable cotire verbose output?" ${COTIRE_VERBOSE_INIT}) set (COTIRE_ADDITIONAL_PREFIX_HEADER_IGNORE_EXTENSIONS "inc;inl;ipp" CACHE STRING "Ignore headers with the listed file extensions from the generated prefix header.") set (COTIRE_ADDITIONAL_PREFIX_HEADER_IGNORE_PATH "" CACHE STRING "Ignore headers from these directories when generating the prefix header.") set (COTIRE_UNITY_SOURCE_EXCLUDE_EXTENSIONS "m;mm" CACHE STRING "Ignore sources with the listed file extensions from the generated unity source.") set (COTIRE_MINIMUM_NUMBER_OF_TARGET_SOURCES "2" CACHE STRING "Minimum number of sources in target required to enable use of precompiled header.") if (NOT DEFINED COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES_INIT) if (DEFINED COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES) set (COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES_INIT ${COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES}) elseif ("${CMAKE_GENERATOR}" MATCHES "JOM|Ninja|Visual Studio") # enable parallelization for generators that run multiple jobs by default set (COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES_INIT "-j") else() set (COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES_INIT "0") endif() endif() set (COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES "${COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES_INIT}" CACHE STRING "Maximum number of source files to include in a single unity source file.") if (NOT COTIRE_PREFIX_HEADER_FILENAME_SUFFIX) set (COTIRE_PREFIX_HEADER_FILENAME_SUFFIX "_prefix") endif() if (NOT COTIRE_UNITY_SOURCE_FILENAME_SUFFIX) set (COTIRE_UNITY_SOURCE_FILENAME_SUFFIX "_unity") endif() if (NOT COTIRE_INTDIR) set (COTIRE_INTDIR "cotire") endif() if (NOT COTIRE_PCH_ALL_TARGET_NAME) set (COTIRE_PCH_ALL_TARGET_NAME "all_pch") endif() if (NOT COTIRE_UNITY_BUILD_ALL_TARGET_NAME) set (COTIRE_UNITY_BUILD_ALL_TARGET_NAME "all_unity") endif() if (NOT COTIRE_CLEAN_ALL_TARGET_NAME) set (COTIRE_CLEAN_ALL_TARGET_NAME "clean_cotire") endif() if (NOT COTIRE_CLEAN_TARGET_SUFFIX) set (COTIRE_CLEAN_TARGET_SUFFIX "_clean_cotire") endif() if (NOT COTIRE_PCH_TARGET_SUFFIX) set (COTIRE_PCH_TARGET_SUFFIX "_pch") endif() if (MSVC) # MSVC default PCH memory scaling factor of 100 percent (75 MB) is too small for template heavy C++ code # use a bigger default factor of 170 percent (128 MB) if (NOT DEFINED COTIRE_PCH_MEMORY_SCALING_FACTOR) set (COTIRE_PCH_MEMORY_SCALING_FACTOR "170") endif() endif() if (NOT COTIRE_UNITY_BUILD_TARGET_SUFFIX) set (COTIRE_UNITY_BUILD_TARGET_SUFFIX "_unity") endif() if (NOT DEFINED COTIRE_TARGETS_FOLDER) set (COTIRE_TARGETS_FOLDER "cotire") endif() if (NOT DEFINED COTIRE_UNITY_OUTPUT_DIRECTORY) if ("${CMAKE_GENERATOR}" MATCHES "Ninja") # generated Ninja build files do not work if the unity target produces the same output file as the cotired target set (COTIRE_UNITY_OUTPUT_DIRECTORY "unity") else() set (COTIRE_UNITY_OUTPUT_DIRECTORY "") endif() endif() # define cotire cache variables define_property( CACHED_VARIABLE PROPERTY "COTIRE_ADDITIONAL_PREFIX_HEADER_IGNORE_PATH" BRIEF_DOCS "Ignore headers from these directories when generating the prefix header." FULL_DOCS "The variable can be set to a semicolon separated list of include directories." "If a header file is found in one of these directories or sub-directories, it will be excluded from the generated prefix header." "If not defined, defaults to empty list." ) define_property( CACHED_VARIABLE PROPERTY "COTIRE_ADDITIONAL_PREFIX_HEADER_IGNORE_EXTENSIONS" BRIEF_DOCS "Ignore includes with the listed file extensions from the generated prefix header." FULL_DOCS "The variable can be set to a semicolon separated list of file extensions." "If a header file extension matches one in the list, it will be excluded from the generated prefix header." "Includes with an extension in CMAKE__SOURCE_FILE_EXTENSIONS are always ignored." "If not defined, defaults to inc;inl;ipp." ) define_property( CACHED_VARIABLE PROPERTY "COTIRE_UNITY_SOURCE_EXCLUDE_EXTENSIONS" BRIEF_DOCS "Exclude sources with the listed file extensions from the generated unity source." FULL_DOCS "The variable can be set to a semicolon separated list of file extensions." "If a source file extension matches one in the list, it will be excluded from the generated unity source file." "Source files with an extension in CMAKE__IGNORE_EXTENSIONS are always excluded." "If not defined, defaults to m;mm." ) define_property( CACHED_VARIABLE PROPERTY "COTIRE_MINIMUM_NUMBER_OF_TARGET_SOURCES" BRIEF_DOCS "Minimum number of sources in target required to enable use of precompiled header." FULL_DOCS "The variable can be set to an integer > 0." "If a target contains less than that number of source files, cotire will not enable the use of the precompiled header for the target." "If not defined, defaults to 2." ) define_property( CACHED_VARIABLE PROPERTY "COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES" BRIEF_DOCS "Maximum number of source files to include in a single unity source file." FULL_DOCS "This may be set to an integer >= 0." "If 0, cotire will only create a single unity source file." "If a target contains more than that number of source files, cotire will create multiple unity source files for it." "Can be set to \"-j\" to optimize the count of unity source files for the number of available processor cores." "Can be set to \"-j jobs\" to optimize the number of unity source files for the given number of simultaneous jobs." "Is used to initialize the target property COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES." "Defaults to \"-j\" for the generators Visual Studio, JOM or Ninja. Defaults to 0 otherwise." ) # define cotire directory properties define_property( DIRECTORY PROPERTY "COTIRE_ENABLE_PRECOMPILED_HEADER" BRIEF_DOCS "Modify build command of cotired targets added in this directory to make use of the generated precompiled header." FULL_DOCS "See target property COTIRE_ENABLE_PRECOMPILED_HEADER." ) define_property( DIRECTORY PROPERTY "COTIRE_ADD_UNITY_BUILD" BRIEF_DOCS "Add a new target that performs a unity build for cotired targets added in this directory." FULL_DOCS "See target property COTIRE_ADD_UNITY_BUILD." ) define_property( DIRECTORY PROPERTY "COTIRE_ADD_CLEAN" BRIEF_DOCS "Add a new target that cleans all cotire generated files for cotired targets added in this directory." FULL_DOCS "See target property COTIRE_ADD_CLEAN." ) define_property( DIRECTORY PROPERTY "COTIRE_PREFIX_HEADER_IGNORE_PATH" BRIEF_DOCS "Ignore headers from these directories when generating the prefix header." FULL_DOCS "See target property COTIRE_PREFIX_HEADER_IGNORE_PATH." ) define_property( DIRECTORY PROPERTY "COTIRE_PREFIX_HEADER_INCLUDE_PATH" BRIEF_DOCS "Honor headers from these directories when generating the prefix header." FULL_DOCS "See target property COTIRE_PREFIX_HEADER_INCLUDE_PATH." ) define_property( DIRECTORY PROPERTY "COTIRE_PREFIX_HEADER_INCLUDE_PRIORITY_PATH" BRIEF_DOCS "Header paths matching one of these directories are put at the top of the prefix header." FULL_DOCS "See target property COTIRE_PREFIX_HEADER_INCLUDE_PRIORITY_PATH." ) define_property( DIRECTORY PROPERTY "COTIRE_UNITY_SOURCE_PRE_UNDEFS" BRIEF_DOCS "Preprocessor undefs to place in the generated unity source file before the inclusion of each source file." FULL_DOCS "See target property COTIRE_UNITY_SOURCE_PRE_UNDEFS." ) define_property( DIRECTORY PROPERTY "COTIRE_UNITY_SOURCE_POST_UNDEFS" BRIEF_DOCS "Preprocessor undefs to place in the generated unity source file after the inclusion of each source file." FULL_DOCS "See target property COTIRE_UNITY_SOURCE_POST_UNDEFS." ) define_property( DIRECTORY PROPERTY "COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES" BRIEF_DOCS "Maximum number of source files to include in a single unity source file." FULL_DOCS "See target property COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES." ) define_property( DIRECTORY PROPERTY "COTIRE_UNITY_LINK_LIBRARIES_INIT" BRIEF_DOCS "Define strategy for setting up the unity target's link libraries." FULL_DOCS "See target property COTIRE_UNITY_LINK_LIBRARIES_INIT." ) # define cotire target properties define_property( TARGET PROPERTY "COTIRE_ENABLE_PRECOMPILED_HEADER" INHERITED BRIEF_DOCS "Modify this target's build command to make use of the generated precompiled header." FULL_DOCS "If this property is set to TRUE, cotire will modify the build command to make use of the generated precompiled header." "Irrespective of the value of this property, cotire will setup custom commands to generate the unity source and prefix header for the target." "For makefile based generators cotire will also set up a custom target to manually invoke the generation of the precompiled header." "The target name will be set to this target's name with the suffix _pch appended." "Inherited from directory." "Defaults to TRUE." ) define_property( TARGET PROPERTY "COTIRE_ADD_UNITY_BUILD" INHERITED BRIEF_DOCS "Add a new target that performs a unity build for this target." FULL_DOCS "If this property is set to TRUE, cotire creates a new target of the same type that uses the generated unity source file instead of the target sources." "Most of the relevant target properties will be copied from this target to the new unity build target." "Target dependencies and linked libraries have to be manually set up for the new unity build target." "The unity target name will be set to this target's name with the suffix _unity appended." "Inherited from directory." "Defaults to TRUE." ) define_property( TARGET PROPERTY "COTIRE_ADD_CLEAN" INHERITED BRIEF_DOCS "Add a new target that cleans all cotire generated files for this target." FULL_DOCS "If this property is set to TRUE, cotire creates a new target that clean all files (unity source, prefix header, precompiled header)." "The clean target name will be set to this target's name with the suffix _clean_cotire appended." "Inherited from directory." "Defaults to FALSE." ) define_property( TARGET PROPERTY "COTIRE_PREFIX_HEADER_IGNORE_PATH" INHERITED BRIEF_DOCS "Ignore headers from these directories when generating the prefix header." FULL_DOCS "The property can be set to a list of directories." "If a header file is found in one of these directories or sub-directories, it will be excluded from the generated prefix header." "Inherited from directory." "If not set, this property is initialized to \${CMAKE_SOURCE_DIR};\${CMAKE_BINARY_DIR}." ) define_property( TARGET PROPERTY "COTIRE_PREFIX_HEADER_INCLUDE_PATH" INHERITED BRIEF_DOCS "Honor headers from these directories when generating the prefix header." FULL_DOCS "The property can be set to a list of directories." "If a header file is found in one of these directories or sub-directories, it will be included in the generated prefix header." "If a header file is both selected by COTIRE_PREFIX_HEADER_IGNORE_PATH and COTIRE_PREFIX_HEADER_INCLUDE_PATH," "the option which yields the closer relative path match wins." "Inherited from directory." "If not set, this property is initialized to the empty list." ) define_property( TARGET PROPERTY "COTIRE_PREFIX_HEADER_INCLUDE_PRIORITY_PATH" INHERITED BRIEF_DOCS "Header paths matching one of these directories are put at the top of prefix header." FULL_DOCS "The property can be set to a list of directories." "Header file paths matching one of these directories will be inserted at the beginning of the generated prefix header." "Header files are sorted according to the order of the directories in the property." "If not set, this property is initialized to the empty list." ) define_property( TARGET PROPERTY "COTIRE_UNITY_SOURCE_PRE_UNDEFS" INHERITED BRIEF_DOCS "Preprocessor undefs to place in the generated unity source file before the inclusion of each target source file." FULL_DOCS "This may be set to a semicolon-separated list of preprocessor symbols." "cotire will add corresponding #undef directives to the generated unit source file before each target source file." "Inherited from directory." "Defaults to empty string." ) define_property( TARGET PROPERTY "COTIRE_UNITY_SOURCE_POST_UNDEFS" INHERITED BRIEF_DOCS "Preprocessor undefs to place in the generated unity source file after the inclusion of each target source file." FULL_DOCS "This may be set to a semicolon-separated list of preprocessor symbols." "cotire will add corresponding #undef directives to the generated unit source file after each target source file." "Inherited from directory." "Defaults to empty string." ) define_property( TARGET PROPERTY "COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES" INHERITED BRIEF_DOCS "Maximum number of source files to include in a single unity source file." FULL_DOCS "This may be set to an integer > 0." "If a target contains more than that number of source files, cotire will create multiple unity build files for it." "If not set, cotire will only create a single unity source file." "Inherited from directory." "Defaults to empty." ) define_property( TARGET PROPERTY "COTIRE__UNITY_SOURCE_INIT" BRIEF_DOCS "User provided unity source file to be used instead of the automatically generated one." FULL_DOCS "If set, cotire will only add the given file(s) to the generated unity source file." "If not set, cotire will add all the target source files to the generated unity source file." "The property can be set to a user provided unity source file." "Defaults to empty." ) define_property( TARGET PROPERTY "COTIRE__PREFIX_HEADER_INIT" BRIEF_DOCS "User provided prefix header file to be used instead of the automatically generated one." FULL_DOCS "If set, cotire will add the given header file(s) to the generated prefix header file." "If not set, cotire will generate a prefix header by tracking the header files included by the unity source file." "The property can be set to a user provided prefix header file (e.g., stdafx.h)." "Defaults to empty." ) define_property( TARGET PROPERTY "COTIRE_UNITY_LINK_LIBRARIES_INIT" INHERITED BRIEF_DOCS "Define strategy for setting up unity target's link libraries." FULL_DOCS "If this property is empty or set to NONE, the generated unity target's link libraries have to be set up manually." "If this property is set to COPY, the unity target's link libraries will be copied from this target." "If this property is set to COPY_UNITY, the unity target's link libraries will be copied from this target with considering existing unity targets." "Inherited from directory." "Defaults to empty." ) define_property( TARGET PROPERTY "COTIRE__UNITY_SOURCE" BRIEF_DOCS "Read-only property. The generated unity source file(s)." FULL_DOCS "cotire sets this property to the path of the generated single computation unit source file for the target." "Defaults to empty string." ) define_property( TARGET PROPERTY "COTIRE__PREFIX_HEADER" BRIEF_DOCS "Read-only property. The generated prefix header file." FULL_DOCS "cotire sets this property to the full path of the generated language prefix header for the target." "Defaults to empty string." ) define_property( TARGET PROPERTY "COTIRE__PRECOMPILED_HEADER" BRIEF_DOCS "Read-only property. The generated precompiled header file." FULL_DOCS "cotire sets this property to the full path of the generated language precompiled header binary for the target." "Defaults to empty string." ) define_property( TARGET PROPERTY "COTIRE_UNITY_TARGET_NAME" BRIEF_DOCS "The name of the generated unity build target corresponding to this target." FULL_DOCS "This property can be set to the desired name of the unity target that will be created by cotire." "If not set, the unity target name will be set to this target's name with the suffix _unity appended." "After this target has been processed by cotire, the property is set to the actual name of the generated unity target." "Defaults to empty string." ) # define cotire source properties define_property( SOURCE PROPERTY "COTIRE_EXCLUDED" BRIEF_DOCS "Do not modify source file's build command." FULL_DOCS "If this property is set to TRUE, the source file's build command will not be modified to make use of the precompiled header." "The source file will also be excluded from the generated unity source file." "Source files that have their COMPILE_FLAGS property set will be excluded by default." "Defaults to FALSE." ) define_property( SOURCE PROPERTY "COTIRE_DEPENDENCY" BRIEF_DOCS "Add this source file to dependencies of the automatically generated prefix header file." FULL_DOCS "If this property is set to TRUE, the source file is added to dependencies of the generated prefix header file." "If the file is modified, cotire will re-generate the prefix header source upon build." "Defaults to FALSE." ) define_property( SOURCE PROPERTY "COTIRE_UNITY_SOURCE_PRE_UNDEFS" BRIEF_DOCS "Preprocessor undefs to place in the generated unity source file before the inclusion of this source file." FULL_DOCS "This may be set to a semicolon-separated list of preprocessor symbols." "cotire will add corresponding #undef directives to the generated unit source file before this file is included." "Defaults to empty string." ) define_property( SOURCE PROPERTY "COTIRE_UNITY_SOURCE_POST_UNDEFS" BRIEF_DOCS "Preprocessor undefs to place in the generated unity source file after the inclusion of this source file." FULL_DOCS "This may be set to a semicolon-separated list of preprocessor symbols." "cotire will add corresponding #undef directives to the generated unit source file after this file is included." "Defaults to empty string." ) define_property( SOURCE PROPERTY "COTIRE_START_NEW_UNITY_SOURCE" BRIEF_DOCS "Start a new unity source file which includes this source file as the first one." FULL_DOCS "If this property is set to TRUE, cotire will complete the current unity file and start a new one." "The new unity source file will include this source file as the first one." "This property essentially works as a separator for unity source files." "Defaults to FALSE." ) define_property( SOURCE PROPERTY "COTIRE_TARGET" BRIEF_DOCS "Read-only property. Mark this source file as cotired for the given target." FULL_DOCS "cotire sets this property to the name of target, that the source file's build command has been altered for." "Defaults to empty string." ) message (STATUS "cotire ${COTIRE_CMAKE_MODULE_VERSION} loaded.") endif()LucenePlusPlus-rel_3.0.9/cmake/dependencies.cmake000066400000000000000000000010401456444476200220330ustar00rootroot00000000000000#################################### # get dependencies #################################### find_package(Boost COMPONENTS date_time filesystem iostreams regex system thread REQUIRED ) set(Boost_USE_MULTITHREADED ON) set(Boost_USE_STATIC_LIBS ${LUCENE_USE_STATIC_BOOST_LIBS}) set(lucene_boost_libs ${Boost_LIBRARIES} ${Boost_FILESYSTEM_LIBRARIES} ${Boost_IOSTREAMS_LIBRARIES} ${Boost_REGEX_LIBRARIES} ${Boost_SYSTEM_LIBRARIES} ${Boost_THREAD_LIBRARIES} ) find_package(ZLIB REQUIRED) find_package(Threads REQUIRED) LucenePlusPlus-rel_3.0.9/doc/000077500000000000000000000000001456444476200160755ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/doc/BUILDING.md000066400000000000000000000031131456444476200176120ustar00rootroot00000000000000Build Instructions ========== You'll need the following dependencies installed on your system. - [ZLib](https://zlib.net/) - [Boost](http://www.boost.org) libraries:: - date-time - filesystem - regex - thread - iostreams e.g. on Debian systems, the following packages are required: - zlib1g-dev - libboost-date-time-dev - libboost-filesystem-dev - libboost-regex-dev - libboost-thread-dev - libboost-iostreams-dev Build Instructions for linux systems -------------------------------------- To build the library the following commands should be issued:: $ mkdir build; cd build $ cmake .. $ make $ make install Build Instructions for Windows systems -------------------------------------- Once you have installed the dependencies and added the installation location to your `CMAKE_PREFIX_PATH`, open cmake-gui and configure the build. When building on windows, ensure that the `ENABLE_CYCLIC_CHECK` option is set to `true`. Next, open the visual studio project with the 'open project' button. the project is built using the `ALL_BUILD` solution in the projects column. If you would like to install the project, build the `INSTALL` solution after the fact. ** Note: if you wish to install the Lucene++ library to a protected area, you must re-open the visual studio project as an administrator ** ** Note: "BOOST_ROOT" environment variable must be defined to point to the Boost library directory (eg. c:\\local\\Boost). cmake should automatically find the installed libraries if they are installed within that path; e.g. C:\\local\\Boost\\lib64-msvc-14.2 ** LucenePlusPlus-rel_3.0.9/doc/doxygen/000077500000000000000000000000001456444476200175525ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/doc/doxygen/Doxyfile.cmake000066400000000000000000000206451456444476200223460ustar00rootroot00000000000000# Doxyfile 1.2.18 #--------------------------------------------------------------------------- # General configuration options #--------------------------------------------------------------------------- PROJECT_NAME = Lucene++ PROJECT_NUMBER = @lucene++_SOVERSION@ OUTPUT_DIRECTORY = @PROJECT_BINARY_DIR@/doc OUTPUT_LANGUAGE = English EXTRACT_ALL = YES EXTRACT_PRIVATE = NO EXTRACT_STATIC = YES EXTRACT_LOCAL_CLASSES = YES EXTRACT_LOCAL_METHODS = YES HIDE_UNDOC_MEMBERS = NO HIDE_UNDOC_CLASSES = NO HIDE_FRIEND_COMPOUNDS = YES HIDE_IN_BODY_DOCS = NO BRIEF_MEMBER_DESC = YES REPEAT_BRIEF = YES ALWAYS_DETAILED_SEC = NO INLINE_INHERITED_MEMB = NO FULL_PATH_NAMES = NO STRIP_FROM_PATH = INTERNAL_DOCS = NO STRIP_CODE_COMMENTS = YES CASE_SENSE_NAMES = YES SHORT_NAMES = NO HIDE_SCOPE_NAMES = NO VERBATIM_HEADERS = YES SHOW_INCLUDE_FILES = YES JAVADOC_AUTOBRIEF = YES MULTILINE_CPP_IS_BRIEF = YES DETAILS_AT_TOP = NO INHERIT_DOCS = YES INLINE_INFO = YES SORT_MEMBER_DOCS = YES DISTRIBUTE_GROUP_DOC = NO TAB_SIZE = 8 GENERATE_TODOLIST = YES GENERATE_TESTLIST = YES GENERATE_BUGLIST = YES GENERATE_DEPRECATEDLIST= YES ALIASES = "memory=\par Memory management:\n" ENABLED_SECTIONS = MAX_INITIALIZER_LINES = 30 OPTIMIZE_OUTPUT_FOR_C = YES OPTIMIZE_OUTPUT_JAVA = NO SHOW_USED_FILES = YES DIRECTORY_GRAPH = YES DOCSET_BUNDLE_ID = org.doxygen.Project DOCSET_FEEDNAME = "Doxygen generated docs" DOXYFILE_ENCODING = UTF-8 FORMULA_FONTSIZE = 10 TYPEDEF_HIDES_STRUCT = YES ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the #--------------------------------------------------------------------------- # configuration options related to warning and progress messages #--------------------------------------------------------------------------- QUIET = NO WARNINGS = YES WARN_IF_UNDOCUMENTED = YES WARN_IF_DOC_ERROR = YES WARN_NO_PARAMDOC = NO WARN_FORMAT = "$file:$line: $text" WARN_LOGFILE = @PROJECT_BINARY_DIR@/doc/doxygen.warnings.log #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- INPUT = @PROJECT_SOURCE_DIR@/include FILE_PATTERNS = *.h RECURSIVE = YES EXCLUDE_SYMLINKS = NO EXCLUDE_PATTERNS = "**/.svn/**" \ "**/.git/**" \ "**/Lucene.h" "*/test/*" \ "*/md5/*" \ "*/nedmalloc/*" \ "*/utf8/*" EXAMPLE_PATH = EXAMPLE_PATTERNS = EXAMPLE_RECURSIVE = NO IMAGE_PATH = INPUT_FILTER = FILTER_SOURCE_FILES = NO #--------------------------------------------------------------------------- # configuration options related to source browsing #--------------------------------------------------------------------------- SOURCE_BROWSER = NO INLINE_SOURCES = NO REFERENCED_BY_RELATION = YES REFERENCES_RELATION = YES #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index #--------------------------------------------------------------------------- ALPHABETICAL_INDEX = NO COLS_IN_ALPHA_INDEX = 5 IGNORE_PREFIX = #--------------------------------------------------------------------------- # configuration options related to the HTML output #--------------------------------------------------------------------------- GENERATE_HTML = @DOCS_HTML@ HTML_OUTPUT = html HTML_FILE_EXTENSION = .html HTML_HEADER = @PROJECT_BINARY_DIR@/doc/helpheader.htm HTML_FOOTER = @PROJECT_BINARY_DIR@/doc/helpfooter.htm HTML_STYLESHEET = HTML_ALIGN_MEMBERS = YES HTML_DYNAMIC_SECTIONS = YES GENERATE_HTMLHELP = @DOCS_HTML_HELP@ CHM_FILE = ../lucene++.chm HHC_LOCATION = @HTML_HELP_COMPILER_EX@ GENERATE_CHI = YES BINARY_TOC = YES TOC_EXPAND = NO DISABLE_INDEX = NO ENUM_VALUES_PER_LINE = 4 GENERATE_TREEVIEW = NO TREEVIEW_WIDTH = 250 #--------------------------------------------------------------------------- # configuration options related to the LaTeX output #--------------------------------------------------------------------------- GENERATE_LATEX = @DOCS_LATEX@ LATEX_OUTPUT = latex LATEX_CMD_NAME = @LATEX_COMPILER@ MAKEINDEX_CMD_NAME = makeindex COMPACT_LATEX = NO PAPER_TYPE = a4wide EXTRA_PACKAGES = LATEX_HEADER = PDF_HYPERLINKS = YES USE_PDFLATEX = NO LATEX_BATCHMODE = NO #--------------------------------------------------------------------------- # configuration options related to the RTF output #--------------------------------------------------------------------------- GENERATE_RTF = @DOCS_RTF@ RTF_OUTPUT = rtf COMPACT_RTF = NO RTF_HYPERLINKS = NO RTF_STYLESHEET_FILE = RTF_EXTENSIONS_FILE = #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- GENERATE_MAN = @DOCS_MAN@ MAN_OUTPUT = man MAN_EXTENSION = .3 MAN_LINKS = NO #--------------------------------------------------------------------------- # configuration options related to the XML output #--------------------------------------------------------------------------- GENERATE_XML = @DOCS_XML@ XML_SCHEMA = XML_DTD = XML_OUTPUT = xml XML_PROGRAMLISTING = YES #--------------------------------------------------------------------------- # configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- GENERATE_AUTOGEN_DEF = NO #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- ENABLE_PREPROCESSING = YES MACRO_EXPANSION = YES EXPAND_ONLY_PREDEF = NO SEARCH_INCLUDES = YES INCLUDE_PATH = INCLUDE_FILE_PATTERNS = PREDEFINED = "" EXPAND_AS_DEFINED = SKIP_FUNCTION_MACROS = YES #--------------------------------------------------------------------------- # Configuration::addtions related to external references #--------------------------------------------------------------------------- TAGFILES = GENERATE_TAGFILE = @DOCS_TAGFILE_LOCATION@ ALLEXTERNALS = NO EXTERNAL_GROUPS = YES PERL_PATH = @PERL_EXECUTABLE@ #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- CLASS_DIAGRAMS = YES HIDE_UNDOC_RELATIONS = YES HAVE_DOT = @HAVE_DOT@ CLASS_GRAPH = YES COLLABORATION_GRAPH = YES TEMPLATE_RELATIONS = NO INCLUDE_GRAPH = YES INCLUDED_BY_GRAPH = YES GRAPHICAL_HIERARCHY = YES DOT_IMAGE_FORMAT = png DOT_PATH = @DOXYGEN_DOT_EXECUTABLE@ DOTFILE_DIRS = GENERATE_LEGEND = YES DOT_CLEANUP = YES DOT_FONTNAME = FreeSans DOT_FONTPATH = DOT_FONTSIZE = 10 DOT_GRAPH_MAX_NODES = 50 DOT_MULTI_TARGETS = NO DOT_TRANSPARENT = NO #--------------------------------------------------------------------------- # Configuration::addtions related to the search engine #--------------------------------------------------------------------------- SEARCHENGINE = YES LucenePlusPlus-rel_3.0.9/doc/doxygen/doxygen.css.cmake000066400000000000000000000120301456444476200230140ustar00rootroot00000000000000H1 { text-align: center; font-family: Arial, Helvetica, sans-serif; } H2 { font-family: Geneva, Arial, Helvetica, sans-serif; } CAPTION { font-weight: bold } DIV.qindex { width: 100%; background-color: #eeeeff; border: 4px solid #eeeeff; text-align: center; margin-bottom: 2px } A.qindex { text-decoration: none; font-weight: bold; } A.qindex:hover { text-decoration: none; background-color: #ddddff } A.qindexHL { text-decoration: none; font-weight: bold; background-color: #6666cc; color: #ffffff } A.qindexHL:hover { text-decoration: none; background-color: #6666cc } A.qindexRef { text-decoration: none; font-weight: bold; } A.qindexRef:hover { text-decoration: none; background-color: #ddddff } A.qindexRefHL { text-decoration: none; font-weight: bold; background-color: #6666cc; color: #ffffff } A.qindexRefHL:hover { text-decoration: none; background-color: #6666cc } A.el { text-decoration: none; font-weight: bold } A.elRef { font-weight: bold } A.code { text-decoration: none; font-weight: normal; color: #4444ee } A.codeRef { font-weight: normal; color: #4444ee } A:hover { text-decoration: none; background-color: #f2f2ff } DL.el { margin-left: -1cm } DIV.fragment { width: 98%; border: 1px solid #CCCCCC; background-color: #f5f5f5; padding-left: 4px; margin: 4px; } DIV.ah { background-color: black; font-weight: bold; color: #ffffff; margin-bottom: 3px; margin-top: 3px } TD.md { background-color: #f2f2ff; font-weight: bold; } TD.mdname1 { background-color: #f2f2ff; font-weight: bold; color: #602020; } TD.mdname { background-color: #f2f2ff; font-weight: bold; color: #602020; width: 600px; } DIV.groupHeader { margin-left: 16px; margin-top: 12px; margin-bottom: 6px; font-weight: bold } DIV.groupText { margin-left: 16px; font-style: italic; font-size: smaller } BODY { background: white; color: black; margin-right: 20px; margin-left: 20px; } TD.indexkey { background-color: #eeeeff; font-weight: bold; padding-right : 10px; padding-top : 2px; padding-left : 10px; padding-bottom : 2px; margin-left : 0px; margin-right : 0px; margin-top : 2px; margin-bottom : 2px } TD.indexvalue { background-color: #eeeeff; font-style: italic; padding-right : 10px; padding-top : 2px; padding-left : 10px; padding-bottom : 2px; margin-left : 0px; margin-right : 0px; margin-top : 2px; margin-bottom : 2px } TR.memlist { background-color: #f0f0f0; } P.formulaDsp { text-align: center; } IMG.formulaDsp { } IMG.formulaInl { vertical-align: middle; } SPAN.keyword { color: #008000 } SPAN.keywordtype { color: #604020 } SPAN.keywordflow { color: #e08000 } SPAN.comment { color: #800000 } SPAN.preprocessor { color: #806020 } SPAN.stringliteral { color: #002080 } SPAN.charliteral { color: #008080 } .mdTable { border: 1px solid #868686; background-color: #f2f2ff; } .mdRow { padding: 8px 20px; } .mdescLeft { font-size: smaller; font-family: Arial, Helvetica, sans-serif; background-color: #FAFAFA; padding-left: 8px; border-top: 1px none #E0E0E0; border-right: 1px none #E0E0E0; border-bottom: 1px none #E0E0E0; border-left: 1px none #E0E0E0; margin: 0px; } .mdescRight { font-size: smaller; font-family: Arial, Helvetica, sans-serif; font-style: italic; background-color: #FAFAFA; padding-left: 4px; border-top: 1px none #E0E0E0; border-right: 1px none #E0E0E0; border-bottom: 1px none #E0E0E0; border-left: 1px none #E0E0E0; margin: 0px; padding-bottom: 0px; padding-right: 8px; } .memItemLeft { padding: 1px 0px 0px 8px; margin: 4px; border-top-width: 1px; border-right-width: 1px; border-bottom-width: 1px; border-left-width: 1px; border-top-style: solid; border-top-color: #E0E0E0; border-right-color: #E0E0E0; border-bottom-color: #E0E0E0; border-left-color: #E0E0E0; border-right-style: none; border-bottom-style: none; border-left-style: none; background-color: #FAFAFA; font-family: Geneva, Arial, Helvetica, sans-serif; font-size: 12px; } .memItemRight { padding: 1px 0px 0px 8px; margin: 4px; border-top-width: 1px; border-right-width: 1px; border-bottom-width: 1px; border-left-width: 1px; border-top-style: solid; border-top-color: #E0E0E0; border-right-color: #E0E0E0; border-bottom-color: #E0E0E0; border-left-color: #E0E0E0; border-right-style: none; border-bottom-style: none; border-left-style: none; background-color: #FAFAFA; font-family: Geneva, Arial, Helvetica, sans-serif; font-size: 13px; } LucenePlusPlus-rel_3.0.9/doc/doxygen/helpfooter.htm.cmake000066400000000000000000000001451456444476200235120ustar00rootroot00000000000000

clucene.sourceforge.net

LucenePlusPlus-rel_3.0.9/doc/doxygen/helpheader.htm.cmake000066400000000000000000000010531456444476200234430ustar00rootroot00000000000000 Lucene++ API Documentation (Version @lucene++_SOVERSION@)

Lucene++ - a full-featured, c++ search engine
API Documentation


LucenePlusPlus-rel_3.0.9/include/000077500000000000000000000000001456444476200167535ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/include/CMakeLists.txt000066400000000000000000000003121456444476200215070ustar00rootroot00000000000000#################################### # include directories #################################### add_subdirectory(config_h) add_subdirectory(lucene++) include_directories("${CMAKE_CURRENT_SOURCE_DIR}")LucenePlusPlus-rel_3.0.9/include/config_h/000077500000000000000000000000001456444476200205275ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/include/config_h/CMakeLists.txt000066400000000000000000000014261456444476200232720ustar00rootroot00000000000000################################# # set Config.h vars ################################# if(ENABLE_BOOST_INTEGER) set(USE_BOOST_INTEGER "define") else() set(USE_BOOST_INTEGER "undef") endif() if(ENABLE_CYCLIC_CHECK) set(USE_CYCLIC_CHECK "define") else() set(USE_CYCLIC_CHECK "undef") endif() if(LUCENE_BUILD_SHARED) set(LPP_SHARED_DLL "define") else() set(LPP_SHARED_DLL "undef") endif() ################################# # generate Config.h ################################# configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/Config.h.in" "${lucene++_BINARY_DIR}/include/Config.h" @ONLY ) ################################# # install Config.h ################################# install( FILES "${lucene++_BINARY_DIR}/include/Config.h" DESTINATION include/lucene++ ) LucenePlusPlus-rel_3.0.9/include/config_h/Config.h.in000066400000000000000000000053571456444476200225240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CONFIG_H #define CONFIG_H #if defined(_WIN32) || defined(_WIN64) #pragma warning(disable:4251) #pragma warning(disable:4275) #pragma warning(disable:4005) #pragma warning(disable:4996) #ifndef _WIN64 #pragma warning(disable:4244) #endif #endif // Define to enable boost integer types #@USE_BOOST_INTEGER@ LPP_USE_BOOST_INTEGER // Generic helper definitions for shared library support #if defined(_WIN32) || defined(_WIN64) || defined(__CYGWIN__) #define LPP_IMPORT __declspec(dllimport) #define LPP_EXPORT __declspec(dllexport) #define LPP_LOCAL #else #if __GNUC__ >= 4 #define LPP_IMPORT __attribute__ ((visibility ("default"))) #define LPP_EXPORT __attribute__ ((visibility ("default"))) #define LPP_LOCAL __attribute__ ((visibility ("hidden"))) #else #define LPP_IMPORT #define LPP_EXPORT #define LPP_LOCAL #endif #endif // bulding shared? #@LPP_SHARED_DLL@ LPP_SHARED_LIB // setup library binds #ifdef LPP_SHARED_LIB #ifdef LPP_BUILDING_LIB #define LPP_API LPP_EXPORT #define LPP_CONTRIB_API LPP_EXPORT #else #define LPP_API LPP_IMPORT #define LPP_CONTRIB_API LPP_IMPORT #endif #else #define LPP_API #define LPP_CONTRIB_API #define LPP_LOCAL #endif // LPP_LOCAL // legacy binds #define LPPAPI LPP_API #define LPPCONTRIBAPI LPP_CONTRIB_API #define LPPLOCAL LPP_LOCAL // Check windows #if defined(_WIN32) || defined(_WIN64) #define LPP_UNICODE_CHAR_SIZE_2 #if defined(_WIN64) #define LPP_BUILD_64 #else #define LPP_BUILD_32 #endif #endif // Check GCC #if defined(__GNUC__) #define LPP_UNICODE_CHAR_SIZE_4 #if defined(__x86_64__) || defined(__ppc64__) #define LPP_BUILD_64 #else #define LPP_BUILD_32 #endif #endif // Default to 32-bit platforms #if !defined(LPP_BUILD_32) && !defined(LPP_BUILD_64) #define LPP_BUILD_32 #endif // Default to 4-byte unicode format #if !defined(LPP_UNICODE_CHAR_SIZE_2) && !defined(LPP_UNICODE_CHAR_SIZE_4) #define LPP_UNICODE_CHAR_SIZE_4 #endif // Define to enable cyclic checking in debug builds #@USE_CYCLIC_CHECK@ LPP_USE_CYCLIC_CHECK // Make internal bitset storage public #define BOOST_DYNAMIC_BITSET_DONT_USE_FRIENDS #define BOOST_FILESYSTEM_VERSION 3 // Use windows definitions #if defined(_WIN32) || defined(_WIN64) #define BOOST_USE_WINDOWS_H #endif // Disable deprication warnings in windows #if defined(_WIN32) || defined(_WIN64) #define _CRT_SECURE_NO_WARNINGS #endif #endif //CONFIG_H LucenePlusPlus-rel_3.0.9/include/lucene++/000077500000000000000000000000001456444476200203545ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/include/lucene++/ASCIIFoldingFilter.h000066400000000000000000000075601456444476200240360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ASCIIFOLDINGFILTER_H #define ASCIIFOLDINGFILTER_H #include "TokenFilter.h" namespace Lucene { /// This class converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127 ASCII /// characters (the "Basic Latin" Unicode block) into their ASCII equivalents, if one exists. /// /// Characters from the following Unicode blocks are converted; however, only those characters with reasonable ASCII /// alternatives are converted: /// /// C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf /// Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf /// Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf /// Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf /// Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf /// Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf /// IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf /// Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf /// Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf /// General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf /// Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf /// Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf /// Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf /// Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf /// Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf /// Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf /// /// See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode /// /// The set of character conversions supported by this class is a superset of those supported by Lucene's {@link /// ISOLatin1AccentFilter} which strips accents from Latin1 characters. For example, 'à' will be replaced by 'a'. /// class LPPAPI ASCIIFoldingFilter : public TokenFilter { public: ASCIIFoldingFilter(const TokenStreamPtr& input); virtual ~ASCIIFoldingFilter(); LUCENE_CLASS(ASCIIFoldingFilter); protected: CharArray output; int32_t outputPos; TermAttributePtr termAtt; public: virtual bool incrementToken(); /// Converts characters above ASCII to their ASCII equivalents. For example, accents are removed from /// accented characters. /// @param input The string to fold /// @param length The number of characters in the input string void foldToASCII(const wchar_t* input, int32_t length); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/AbstractAllTermDocs.h000066400000000000000000000023631456444476200243660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ABSTRACTALLTERMDOCS_H #define ABSTRACTALLTERMDOCS_H #include "TermDocs.h" namespace Lucene { /// Base class for enumerating all but deleted docs. /// /// NOTE: this class is meant only to be used internally by Lucene; it's only public so it /// can be shared across packages. class LPPAPI AbstractAllTermDocs : public TermDocs, public LuceneObject { public: AbstractAllTermDocs(int32_t maxDoc); virtual ~AbstractAllTermDocs(); LUCENE_CLASS(AbstractAllTermDocs); protected: int32_t maxDoc; int32_t _doc; public: virtual void seek(const TermPtr& term); virtual void seek(const TermEnumPtr& termEnum); virtual int32_t doc(); virtual int32_t freq(); virtual bool next(); virtual int32_t read(Collection& docs, Collection& freqs); virtual bool skipTo(int32_t target); virtual void close(); virtual bool isDeleted(int32_t doc) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/AbstractField.h000066400000000000000000000227301456444476200232400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ABSTRACTFIELD_H #define ABSTRACTFIELD_H #include "Fieldable.h" namespace Lucene { class LPPAPI AbstractField : public Fieldable, public LuceneObject { public: /// Specifies whether and how a field should be stored. enum Store { /// Store the original field value in the index. This is useful for short texts like a document's title /// which should be displayed with the results. The value is stored in its original form, ie. no analyzer /// is used before it is stored. STORE_YES, /// Do not store the field value in the index. STORE_NO }; /// Specifies whether and how a field should be indexed. enum Index { /// Do not index the field value. This field can thus not be searched, but one can still access its /// contents provided it is {@link Field.Store stored}. INDEX_NO, /// Index the tokens produced by running the field's value through an Analyzer. This is useful for /// common text. INDEX_ANALYZED, /// Index the field's value without using an Analyzer, so it can be searched. As no analyzer is used /// the value will be stored as a single term. This is useful for unique Ids like product numbers. INDEX_NOT_ANALYZED, /// Index the field's value without an Analyzer, and also disable the storing of norms. Note that you /// can also separately enable/disable norms by calling {@link Field#setOmitNorms}. No norms means /// that index-time field and document boosting and field length normalization are disabled. The benefit /// is less memory usage as norms take up one byte of RAM per indexed field for every document in the /// index, during searching. Note that once you index a given field with norms enabled, disabling norms /// will have no effect. In other words, for this to have the above described effect on a field, all /// instances of that field must be indexed with NOT_ANALYZED_NO_NORMS from the beginning. INDEX_NOT_ANALYZED_NO_NORMS, /// Index the tokens produced by running the field's value through an Analyzer, and also separately /// disable the storing of norms. See {@link #NOT_ANALYZED_NO_NORMS} for what norms are and why you /// may want to disable them. INDEX_ANALYZED_NO_NORMS }; /// Specifies whether and how a field should have term vectors. enum TermVector { /// Do not store term vectors. TERM_VECTOR_NO, /// Store the term vectors of each document. A term vector is a list of the document's terms and their /// number of occurrences in that document. TERM_VECTOR_YES, /// Store the term vector + token position information /// @see #YES TERM_VECTOR_WITH_POSITIONS, /// Store the term vector + token offset information /// @see #YES TERM_VECTOR_WITH_OFFSETS, /// Store the term vector + token position and offset information /// @see #YES /// @see #WITH_POSITIONS /// @see #WITH_OFFSETS TERM_VECTOR_WITH_POSITIONS_OFFSETS }; public: virtual ~AbstractField(); LUCENE_CLASS(AbstractField); protected: AbstractField(); AbstractField(const String& name, Store store, Index index, TermVector termVector); String _name; bool storeTermVector; bool storeOffsetWithTermVector; bool storePositionWithTermVector; bool _omitNorms; bool _isStored; bool _isIndexed; bool _isTokenized; bool _isBinary; bool lazy; bool omitTermFreqAndPositions; double boost; // the data object for all different kind of field values FieldsData fieldsData; // pre-analyzed tokenStream for indexed fields TokenStreamPtr tokenStream; // length/offset for all primitive types int32_t binaryLength; int32_t binaryOffset; public: /// Sets the boost factor hits on this field. This value will be multiplied into the score of all /// hits on this this field of this document. /// /// The boost is multiplied by {@link Document#getBoost()} of the document containing this field. /// If a document has multiple fields with the same name, all such values are multiplied together. /// This product is then used to compute the norm factor for the field. By default, in the {@link /// Similarity#computeNorm(String, FieldInvertState)} method, the boost value is multiplied by the /// {@link Similarity#lengthNorm(String,int)} and then rounded by {@link Similarity#encodeNorm(double)} /// before it is stored in the index. One should attempt to ensure that this product does not overflow /// the range of that encoding. /// /// @see Document#setBoost(double) /// @see Similarity#computeNorm(String, FieldInvertState) /// @see Similarity#encodeNorm(double) virtual void setBoost(double boost); /// Returns the boost factor for hits for this field. /// /// The default value is 1.0. /// /// Note: this value is not stored directly with the document in the index. Documents returned from /// {@link IndexReader#document(int)} and {@link Searcher#doc(int)} may thus not have the same value /// present as when this field was indexed. virtual double getBoost(); /// Returns the name of the field as an interned string. For example "date", "title", "body", ... virtual String name(); /// True if the value of the field is to be stored in the index for return with search hits. It is an /// error for this to be true if a field is Reader-valued. virtual bool isStored(); /// True if the value of the field is to be indexed, so that it may be searched on. virtual bool isIndexed(); /// True if the value of the field should be tokenized as text prior to indexing. Un-tokenized fields /// are indexed as a single word and may not be Reader-valued. virtual bool isTokenized(); /// True if the term or terms used to index this field are stored as a term vector, available from /// {@link IndexReader#getTermFreqVector(int,String)}. These methods do not provide access to the /// original content of the field, only to terms used to index it. If the original content must be /// preserved, use the stored attribute instead. virtual bool isTermVectorStored(); /// True if terms are stored as term vector together with their offsets (start and end position in /// source text). virtual bool isStoreOffsetWithTermVector(); /// True if terms are stored as term vector together with their token positions. virtual bool isStorePositionWithTermVector(); /// True if the value of the field is stored as binary. virtual bool isBinary(); /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. /// @return reference to the Field value as byte[]. virtual ByteArray getBinaryValue(); /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. /// @return reference to the Field value as byte[]. virtual ByteArray getBinaryValue(ByteArray result); /// Returns length of byte[] segment that is used as value, if Field is not binary returned value is /// undefined. /// @return length of byte[] segment that represents this Field value. virtual int32_t getBinaryLength(); /// Returns offset into byte[] segment that is used as value, if Field is not binary returned value is /// undefined. /// @return index of the first character in byte[] segment that represents this Field value. virtual int32_t getBinaryOffset(); /// True if norms are omitted for this indexed field. virtual bool getOmitNorms(); /// @see #setOmitTermFreqAndPositions virtual bool getOmitTermFreqAndPositions(); /// If set, omit normalization factors associated with this indexed field. /// This effectively disables indexing boosts and length normalization for this field. virtual void setOmitNorms(bool omitNorms); /// If set, omit term freq, positions and payloads from postings for this field. /// /// NOTE: While this option reduces storage space required in the index, it also means any query requiring /// positional information, such as {@link PhraseQuery} or {@link SpanQuery} subclasses will silently fail /// to find results. virtual void setOmitTermFreqAndPositions(bool omitTermFreqAndPositions); /// Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field /// is lazily loaded, retrieving it's values via {@link #stringValue()} or {@link #getBinaryValue()} /// is only valid as long as the {@link IndexReader} that retrieved the {@link Document} is still open. /// /// @return true if this field can be loaded lazily virtual bool isLazy(); /// Prints a Field for human consumption. virtual String toString(); protected: void setStoreTermVector(TermVector termVector); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/AllTermDocs.h000066400000000000000000000013131456444476200226740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ALLTERMDOCS_H #define ALLTERMDOCS_H #include "AbstractAllTermDocs.h" namespace Lucene { class AllTermDocs : public AbstractAllTermDocs { public: AllTermDocs(const SegmentReaderPtr& parent); virtual ~AllTermDocs(); LUCENE_CLASS(AllTermDocs); protected: BitVectorWeakPtr _deletedDocs; public: virtual bool isDeleted(int32_t doc); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Analyzer.h000066400000000000000000000066041456444476200223200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ANALYZER_H #define ANALYZER_H #include "CloseableThreadLocal.h" namespace Lucene { /// An Analyzer builds TokenStreams, which analyze text. It thus represents a policy for extracting index terms /// from text. /// /// Typical implementations first build a Tokenizer, which breaks the stream of characters from the Reader into /// raw Tokens. One or more TokenFilters may then be applied to the output of the Tokenizer. class LPPAPI Analyzer : public LuceneObject { public: virtual ~Analyzer(); LUCENE_CLASS(Analyzer); protected: CloseableThreadLocal tokenStreams; public: /// Creates a TokenStream which tokenizes all the text in the provided Reader. Must be able to handle null /// field name for backward compatibility. virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader) = 0; /// Creates a TokenStream that is allowed to be re-used from the previous time that the same thread called /// this method. Callers that do not need to use more than one TokenStream at the same time from this analyzer /// should use this method for better performance. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); /// Invoked before indexing a Fieldable instance if terms have already been added to that field. This allows /// custom analyzers to place an automatic position increment gap between Fieldable instances using the same /// field name. The default value position increment gap is 0. With a 0 position increment gap and the typical /// default token position increment of 1, all terms in a field, including across Fieldable instances, are in /// successive positions, allowing exact PhraseQuery matches, for instance, across Fieldable instance boundaries. /// /// @param fieldName Fieldable name being indexed. /// @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)} virtual int32_t getPositionIncrementGap(const String& fieldName); /// Just like {@link #getPositionIncrementGap}, except for Token offsets instead. By default this returns 1 for /// tokenized fields and, as if the fields were joined with an extra space character, and 0 for un-tokenized /// fields. This method is only called if the field produced at least one token for indexing. /// /// @param field the field just indexed /// @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)} virtual int32_t getOffsetGap(const FieldablePtr& field); /// Frees persistent resources used by this Analyzer virtual void close(); protected: /// Used by Analyzers that implement reusableTokenStream to retrieve previously saved TokenStreams for re-use /// by the same thread. virtual LuceneObjectPtr getPreviousTokenStream(); /// Used by Analyzers that implement reusableTokenStream to save a TokenStream for later re-use by the /// same thread. virtual void setPreviousTokenStream(const LuceneObjectPtr& stream); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Array.h000066400000000000000000000057311456444476200216110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ARRAY_H #define ARRAY_H #include #include "Lucene.h" namespace Lucene { template class ArrayData { public: ArrayData(int32_t size_) { data = NULL; resize(size_); } ~ArrayData() { resize(0); } public: TYPE* data; int32_t size; public: void resize(int32_t size_) { if (size_ == 0) { FreeMemory(data); data = NULL; } else if (data == NULL) { data = (TYPE*)AllocMemory(size_ * sizeof(TYPE)); } else { data = (TYPE*)ReallocMemory(data, size_ * sizeof(TYPE)); } this->size = size_; } }; /// Utility template class to handle sharable arrays of simple data types template class Array { public: typedef Array this_type; typedef ArrayData array_type; Array() { array = NULL; } protected: boost::shared_ptr container; array_type* array; public: static this_type newInstance(int32_t size) { this_type instance; instance.container = Lucene::newInstance(size); instance.array = instance.container.get(); return instance; } void reset() { resize(0); } void resize(int32_t size) { if (size == 0) { container.reset(); } else if (!container) { container = Lucene::newInstance(size); } else { container->resize(size); } array = container.get(); } TYPE* get() const { return array->data; } int32_t size() const { return array->size; } bool equals(const this_type& other) const { if (array->size != other.array->size) { return false; } return (std::memcmp(array->data, other.array->data, array->size) == 0); } int32_t hashCode() const { return (int32_t)(int64_t)array; } TYPE& operator[] (int32_t i) const { return array->data[i]; } operator bool () const { return container.get() != NULL; } bool operator! () const { return !container; } bool operator== (const Array& other) { return (container == other.container); } bool operator!= (const Array& other) { return (container != other.container); } }; template inline std::size_t hash_value(const Array& value) { return (std::size_t)value.hashCode(); } template inline bool operator== (const Array& value1, const Array& value2) { return (value1.hashCode() == value2.hashCode()); } } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Attribute.h000066400000000000000000000040231456444476200224670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ATTRIBUTE_H #define ATTRIBUTE_H #include "LuceneObject.h" namespace Lucene { /// Base class for Attributes that can be added to a {@link AttributeSource}. /// /// Attributes are used to add data in a dynamic, yet type-safe way to a source of usually streamed objects, /// eg. a {@link TokenStream}. class LPPAPI Attribute : public LuceneObject { public: virtual ~Attribute(); LUCENE_CLASS(Attribute); public: /// Clears the values in this Attribute and resets it to its default value. If this implementation /// implements more than one Attribute interface it clears all. virtual void clear() = 0; /// Subclasses must implement this method and should compute a hashCode similar to this: /// /// int32_t hashCode() /// { /// int32_t code = startOffset; /// code = code * 31 + endOffset; /// return code; /// } /// /// see also {@link #equals(Object)} virtual int32_t hashCode() = 0; /// All values used for computation of {@link #hashCode()} should be checked here for equality. /// /// see also {@link LuceneObject#equals(Object)} virtual bool equals(const LuceneObjectPtr& other) = 0; /// Copies the values from this Attribute into the passed-in target attribute. The target implementation /// must support all the Attributes this implementation supports. virtual void copyTo(const AttributePtr& target) = 0; /// Shallow clone. Subclasses must override this if they need to clone any members deeply. /// @param base clone reference - null when called initially, then set in top virtual override. virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/AttributeSource.h000066400000000000000000000162621456444476200236600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ATTRIBUTESOURCE_H #define ATTRIBUTESOURCE_H #include "LuceneObject.h" namespace Lucene { class LPPAPI AttributeFactory : public LuceneObject { protected: AttributeFactory(); public: virtual ~AttributeFactory(); LUCENE_CLASS(AttributeFactory); public: /// returns an {@link Attribute}. virtual AttributePtr createAttributeInstance(const String& className); template AttributePtr createInstance(const String& className) { AttributePtr attrImpl = createAttributeInstance(className); return attrImpl ? attrImpl : newLucene(); } /// This is the default factory that creates {@link Attribute}s using the class name of the supplied /// {@link Attribute} interface class by appending Impl to it. static AttributeFactoryPtr DEFAULT_ATTRIBUTE_FACTORY(); }; /// An AttributeSource contains a list of different {@link Attribute}s, and methods to add and get them. /// There can only be a single instance of an attribute in the same AttributeSource instance. This is ensured /// by passing in the actual type of the Attribute (Class) to the {@link #addAttribute(Class)}, /// which then checks if an instance of that type is already present. If yes, it returns the instance, otherwise /// it creates a new instance and returns it. class LPPAPI AttributeSource : public LuceneObject { public: /// An AttributeSource using the default attribute factory {@link DefaultAttributeFactory}. AttributeSource(); /// An AttributeSource that uses the same attributes as the supplied one. AttributeSource(const AttributeSourcePtr& input); /// An AttributeSource using the supplied {@link AttributeFactory} for creating new {@link Attribute} /// instances. AttributeSource(const AttributeFactoryPtr& factory); virtual ~AttributeSource(); LUCENE_CLASS(AttributeSource); protected: AttributeFactoryPtr factory; MapStringAttribute attributes; AttributeSourceStatePtr currentState; public: /// returns the used AttributeFactory. AttributeFactoryPtr getAttributeFactory(); /// This method first checks if an instance of that class is already in this AttributeSource and returns it. /// Otherwise a new instance is created, added to this AttributeSource and returned. template boost::shared_ptr addAttribute() { String className(ATTR::_getClassName()); boost::shared_ptr attrImpl(boost::dynamic_pointer_cast(getAttribute(className))); if (!attrImpl) { attrImpl = boost::dynamic_pointer_cast(factory->createInstance(className)); if (!attrImpl) { boost::throw_exception(IllegalArgumentException(L"Could not instantiate implementing class for " + className)); } addAttribute(className, attrImpl); } return attrImpl; } /// Adds a custom Attribute instance. void addAttribute(const String& className, const AttributePtr& attrImpl); /// Returns true if this AttributeSource has any attributes. bool hasAttributes(); /// Returns true, if this AttributeSource contains the passed-in Attribute. template bool hasAttribute() { return getAttribute(ATTR::_getClassName()).get() != NULL; } /// Returns the instance of the passed in Attribute contained in this AttributeSource. template boost::shared_ptr getAttribute() { String className(ATTR::_getClassName()); boost::shared_ptr attr(boost::dynamic_pointer_cast(getAttribute(className))); if (!attr) { boost::throw_exception(IllegalArgumentException(L"This AttributeSource does not have the attribute '" + className + L"'.")); } return attr; } /// Resets all Attributes in this AttributeSource by calling {@link AttributeImpl#clear()} on each Attribute /// implementation. void clearAttributes(); /// Captures the state of all Attributes. The return value can be passed to {@link #restoreState} to restore /// the state of this or another AttributeSource. AttributeSourceStatePtr captureState(); /// Restores this state by copying the values of all attribute implementations that this state contains into /// the attributes implementations of the targetStream. The targetStream must contain a corresponding instance /// for each argument contained in this state (eg. it is not possible to restore the state of an AttributeSource /// containing a TermAttribute into a AttributeSource using a Token instance as implementation). /// /// Note that this method does not affect attributes of the targetStream that are not contained in this state. /// In other words, if for example the targetStream contains an OffsetAttribute, but this state doesn't, then /// the value of the OffsetAttribute remains unchanged. It might be desirable to reset its value to the default, /// in which case the caller should first call {@link TokenStream#clearAttributes()} on the targetStream. void restoreState(const AttributeSourceStatePtr& state); /// Return hash code for this object. virtual int32_t hashCode(); /// Return whether two objects are equal virtual bool equals(const LuceneObjectPtr& other); /// Returns a string representation of the object virtual String toString(); /// Performs a clone of all {@link AttributeImpl} instances returned in a new AttributeSource instance. This /// method can be used to eg. create another TokenStream with exactly the same attributes (using {@link /// #AttributeSource(AttributeSource)}) AttributeSourcePtr cloneAttributes(); /// Return a vector of attributes based on currentState. Collection getAttributes(); protected: /// The caller must pass in a className value. /// This method checks if an instance of that class is already in this AttributeSource and returns it. AttributePtr getAttribute(const String& className); /// Returns true, if this AttributeSource contains the passed-in Attribute. bool hasAttribute(const String& className); void computeCurrentState(); }; class LPPAPI DefaultAttributeFactory : public AttributeFactory { public: virtual ~DefaultAttributeFactory(); LUCENE_CLASS(DefaultAttributeFactory); public: /// returns an {@link Attribute}. virtual AttributePtr createAttributeInstance(const String& className); }; /// This class holds the state of an AttributeSource. /// @see #captureState /// @see #restoreState class LPPAPI AttributeSourceState : public LuceneObject { public: virtual ~AttributeSourceState(); LUCENE_CLASS(AttributeSourceState); protected: AttributePtr attribute; AttributeSourceStatePtr next; public: virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); friend class AttributeSource; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/AveragePayloadFunction.h000066400000000000000000000022011456444476200251120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef AVERAGEPAYLOADFUNCTION_H #define AVERAGEPAYLOADFUNCTION_H #include "PayloadFunction.h" namespace Lucene { /// Calculate the final score as the average score of all payloads seen. /// /// Is thread safe and completely reusable. class LPPAPI AveragePayloadFunction : public PayloadFunction { public: virtual ~AveragePayloadFunction(); LUCENE_CLASS(AveragePayloadFunction); public: virtual double currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, double currentScore, double currentPayloadScore); virtual double docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore); virtual int32_t hashCode(); virtual bool equals(const LuceneObjectPtr& other); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Base64.h000066400000000000000000000014401456444476200215500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BASE64_H #define BASE64_H #include "LuceneObject.h" namespace Lucene { class LPPAPI Base64 : public LuceneObject { public: virtual ~Base64(); LUCENE_CLASS(Base64); protected: static const String BASE64_CHARS; public: static String encode(ByteArray bytes); static String encode(const uint8_t* bytes, int32_t length); static ByteArray decode(const String& str); protected: static bool isBase64(wchar_t ch); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/BaseCharFilter.h000066400000000000000000000021121456444476200233370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BASECHARFILTER_H #define BASECHARFILTER_H #include "CharFilter.h" namespace Lucene { /// Base utility class for implementing a {@link CharFilter}. You subclass this, and then record mappings by /// calling {@link #addOffCorrectMap}, and then invoke the correct method to correct an offset. class LPPAPI BaseCharFilter : public CharFilter { public: BaseCharFilter(const CharStreamPtr& in); virtual ~BaseCharFilter(); LUCENE_CLASS(BaseCharFilter); protected: IntArray offsets; IntArray diffs; int32_t size; protected: /// Retrieve the corrected offset. virtual int32_t correct(int32_t currentOff); int32_t getLastCumulativeDiff(); void addOffCorrectMap(int32_t off, int32_t cumulativeDiff); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/BitSet.h000066400000000000000000000042051456444476200217200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BITSET_H #define BITSET_H #include #include "LuceneObject.h" namespace Lucene { class LPPAPI BitSet : public LuceneObject { public: BitSet(uint32_t size = 0); virtual ~BitSet(); LUCENE_CLASS(BitSet); protected: typedef boost::dynamic_bitset bitset_type; bitset_type bitSet; public: const uint64_t* getBits(); void clear(); void clear(uint32_t bitIndex); void fastClear(uint32_t bitIndex); void clear(uint32_t fromIndex, uint32_t toIndex); void fastClear(uint32_t fromIndex, uint32_t toIndex); void set(uint32_t bitIndex); void fastSet(uint32_t bitIndex); void set(uint32_t bitIndex, bool value); void fastSet(uint32_t bitIndex, bool value); void set(uint32_t fromIndex, uint32_t toIndex); void fastSet(uint32_t fromIndex, uint32_t toIndex); void set(uint32_t fromIndex, uint32_t toIndex, bool value); void fastSet(uint32_t fromIndex, uint32_t toIndex, bool value); void flip(uint32_t bitIndex); void fastFlip(uint32_t bitIndex); void flip(uint32_t fromIndex, uint32_t toIndex); void fastFlip(uint32_t fromIndex, uint32_t toIndex); uint32_t size() const; uint32_t numBlocks() const; bool isEmpty() const; bool get(uint32_t bitIndex) const; bool fastGet(uint32_t bitIndex) const; int32_t nextSetBit(uint32_t fromIndex) const; void _and(const BitSetPtr& set); void _or(const BitSetPtr& set); void _xor(const BitSetPtr& set); void andNot(const BitSetPtr& set); bool intersectsBitSet(const BitSetPtr& set) const; uint32_t cardinality(); void resize(uint32_t size); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/BitUtil.h000066400000000000000000000054411456444476200221050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BITUTIL_H #define BITUTIL_H #include "LuceneObject.h" namespace Lucene { /// A variety of high efficiency bit twiddling routines. class LPPAPI BitUtil : public LuceneObject { public: virtual ~BitUtil(); LUCENE_CLASS(BitUtil); public: /// Table of number of trailing zeros in a byte static const uint8_t ntzTable[]; public: /// Returns the number of bits set in the long static int32_t pop(int64_t x); /// Returns the number of set bits in an array of longs. static int64_t pop_array(const int64_t* A, int32_t wordOffset, int32_t numWords); /// Returns the popcount or cardinality of the two sets after an intersection. Neither array is modified. static int64_t pop_intersect(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords); /// Returns the popcount or cardinality of the union of two sets. Neither array is modified. static int64_t pop_union(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords); /// Returns the popcount or cardinality of A & ~B. Neither array is modified. static int64_t pop_andnot(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords); /// Returns the popcount or cardinality of A ^ B. Neither array is modified. static int64_t pop_xor(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords); /// Returns number of trailing zeros in a 64 bit long value. static int32_t ntz(int64_t val); /// Returns number of trailing zeros in a 32 bit int value. static int32_t ntz(int32_t val); /// Returns 0 based index of first set bit (only works for x!=0) /// This is an alternate implementation of ntz() static int32_t ntz2(int64_t x); /// Returns 0 based index of first set bit. /// This is an alternate implementation of ntz() static int32_t ntz3(int64_t x); /// Returns true if v is a power of two or zero. static bool isPowerOfTwo(int32_t v); /// Returns true if v is a power of two or zero. static bool isPowerOfTwo(int64_t v); /// Returns the next highest power of two, or the current value if it's already a power of two or zero. static int32_t nextHighestPowerOfTwo(int32_t v); /// Returns the next highest power of two, or the current value if it's already a power of two or zero. static int64_t nextHighestPowerOfTwo(int64_t v); protected: inline static void CSA(int64_t& h, int64_t& l, int64_t a, int64_t b, int64_t c); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/BitVector.h000066400000000000000000000054531456444476200224350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BITVECTOR_H #define BITVECTOR_H #include "LuceneObject.h" namespace Lucene { /// Optimized implementation of a vector of bits. class LPPAPI BitVector : public LuceneObject { public: /// Constructs a vector capable of holding n bits. BitVector(int32_t n = 0); BitVector(ByteArray bits, int32_t size); /// Constructs a bit vector from the file name in Directory d, /// as written by the {@link #write} method. BitVector(const DirectoryPtr& d, const String& name); virtual ~BitVector(); LUCENE_CLASS(BitVector); protected: ByteArray bits; int32_t _size; int32_t _count; static const uint8_t BYTE_COUNTS[]; // table of bits/byte public: /// Clone this vector virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); /// Sets the value of bit to one. void set(int32_t bit); /// Sets the value of bit to true, and returns true if bit was already set. bool getAndSet(int32_t bit); /// Sets the value of bit to zero. void clear(int32_t bit); /// Returns true if bit is one and false if it is zero. bool get(int32_t bit); /// Returns the number of bits in this vector. This is also one greater than /// the number of the largest valid bit number. int32_t size(); /// Returns the total number of one bits in this vector. This is efficiently /// computed and cached, so that, if the vector is not changed, no recomputation /// is done for repeated calls. int32_t count(); /// For testing int32_t getRecomputedCount(); /// Writes this vector to the file name in Directory d, in a format that can /// be read by the constructor {@link #BitVector(DirectoryPtr, const String&)}. void write(const DirectoryPtr& d, const String& name); /// Retrieve a subset of this BitVector. /// @param start starting index, inclusive /// @param end ending index, exclusive /// @return subset BitVectorPtr subset(int32_t start, int32_t end); protected: /// Write as a bit set. void writeBits(const IndexOutputPtr& output); /// Write as a d-gaps list. void writeDgaps(const IndexOutputPtr& output); /// Indicates if the bit vector is sparse and should be saved as a d-gaps list, /// or dense, and should be saved as a bit set. bool isSparse(); /// Read as a bit set. void readBits(const IndexInputPtr& input); /// Read as a d-gaps list. void readDgaps(const IndexInputPtr& input); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/BooleanClause.h000066400000000000000000000034001456444476200232360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BOOLEANCLAUSE_H #define BOOLEANCLAUSE_H #include "LuceneObject.h" namespace Lucene { /// A clause in a BooleanQuery. class LPPAPI BooleanClause : public LuceneObject { public: /// Specifies how clauses are to occur in matching documents. enum Occur { /// Use this operator for clauses that must appear in the matching documents. MUST, /// Use this operator for clauses that should appear in the matching documents. For a BooleanQuery /// with no MUST clauses one or more SHOULD clauses must match a document for the BooleanQuery to match. /// @see BooleanQuery#setMinimumNumberShouldMatch SHOULD, /// Use this operator for clauses that must not appear in the matching documents. Note that it is not /// possible to search for queries that only consist of a MUST_NOT clause. MUST_NOT }; public: BooleanClause(const QueryPtr& query, Occur occur); virtual ~BooleanClause(); LUCENE_CLASS(BooleanClause); protected: /// The query whose matching documents are combined by the boolean query. QueryPtr query; Occur occur; public: Occur getOccur(); void setOccur(Occur occur); QueryPtr getQuery(); void setQuery(const QueryPtr& query); bool isProhibited(); bool isRequired(); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/BooleanQuery.h000066400000000000000000000073261456444476200231420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BOOLEANQUERY_H #define BOOLEANQUERY_H #include "Query.h" #include "BooleanClause.h" #include "Weight.h" namespace Lucene { /// A Query that matches documents matching boolean combinations of other queries, eg. {@link TermQuery}s, /// {@link PhraseQuery}s or other BooleanQuerys. class LPPAPI BooleanQuery : public Query { public: /// Constructs an empty boolean query. /// /// {@link Similarity#coord(int32_t, int32_t)} may be disabled in scoring, as appropriate. For example, /// this score factor does not make sense for most automatically generated queries, like {@link WildcardQuery} /// and {@link FuzzyQuery}. /// /// @param disableCoord disables {@link Similarity#coord(int32_t, int32_t)} in scoring. BooleanQuery(bool disableCoord = false); virtual ~BooleanQuery(); LUCENE_CLASS(BooleanQuery); protected: static int32_t maxClauseCount; Collection clauses; bool disableCoord; int32_t minNrShouldMatch; public: using Query::toString; /// Return the maximum number of clauses permitted, 1024 by default. Attempts to add more than the permitted /// number of clauses cause TooManyClauses to be thrown. /// @see #setMaxClauseCount(int32_t) static int32_t getMaxClauseCount(); /// Set the maximum number of clauses permitted per BooleanQuery. Default value is 1024. static void setMaxClauseCount(int32_t maxClauseCount); /// Returns true if {@link Similarity#coord(int32_t, int32_t)} is disabled in scoring for this query instance. /// @see #BooleanQuery(bool) bool isCoordDisabled(); /// Implement coord disabling. virtual SimilarityPtr getSimilarity(const SearcherPtr& searcher); /// Specifies a minimum number of the optional BooleanClauses which must be satisfied. /// /// By default no optional clauses are necessary for a match (unless there are no required clauses). If this /// method is used, then the specified number of clauses is required. /// /// Use of this method is totally independent of specifying that any specific clauses are required (or prohibited). /// This number will only be compared against the number of matching optional clauses. /// /// @param min the number of optional clauses that must match void setMinimumNumberShouldMatch(int32_t min); /// Gets the minimum number of the optional BooleanClauses which must be satisfied. int32_t getMinimumNumberShouldMatch(); /// Adds a clause to a boolean query. /// @see #getMaxClauseCount() void add(const QueryPtr& query, BooleanClause::Occur occur); /// Adds a clause to a boolean query. /// @see #getMaxClauseCount() void add(const BooleanClausePtr& clause); /// Returns the set of clauses in this query. Collection getClauses(); /// Returns an iterator on the clauses in this query. Collection::iterator begin(); Collection::iterator end(); virtual WeightPtr createWeight(const SearcherPtr& searcher); virtual QueryPtr rewrite(const IndexReaderPtr& reader); virtual void extractTerms(SetTerm terms); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual String toString(const String& field); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); friend class BooleanWeight; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/BooleanScorer.h000066400000000000000000000126751456444476200232750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BOOLEANSCORER_H #define BOOLEANSCORER_H #include "Scorer.h" #include "Collector.h" namespace Lucene { /// BooleanScorer uses a ~16k array to score windows of docs. So it scores docs 0-16k first, then docs 16-32k, /// etc. For each window it iterates through all query terms and accumulates a score in table[doc%16k]. It also /// stores in the table a bitmask representing which terms contributed to the score. Non-zero scores are chained /// in a linked list. At the end of scoring each window it then iterates through the linked list and, if the /// bitmask matches the boolean constraints, collects a hit. For boolean queries with lots of frequent terms this /// can be much faster, since it does not need to update a priority queue for each posting, instead performing /// constant-time operations per posting. The only downside is that it results in hits being delivered out-of-order /// within the window, which means it cannot be nested within other scorers. But it works well as a top-level scorer. /// /// The new BooleanScorer2 implementation instead works by merging priority queues of postings, albeit with some /// clever tricks. For example, a pure conjunction (all terms required) does not require a priority queue. Instead it /// sorts the posting streams at the start, then repeatedly skips the first to to the last. If the first ever equals /// the last, then there's a hit. When some terms are required and some terms are optional, the conjunction can /// be evaluated first, then the optional terms can all skip to the match and be added to the score. Thus the /// conjunction can reduce the number of priority queue updates for the optional terms. class LPPAPI BooleanScorer : public Scorer { public: BooleanScorer(const SimilarityPtr& similarity, int32_t minNrShouldMatch, Collection optionalScorers, Collection prohibitedScorers); virtual ~BooleanScorer(); LUCENE_CLASS(BooleanScorer); protected: SubScorerPtr scorers; BucketTablePtr bucketTable; int32_t maxCoord; Collection coordFactors; int32_t requiredMask; int32_t prohibitedMask; int32_t nextMask; int32_t minNrShouldMatch; int32_t end; BucketPtr current; Bucket* __current = nullptr; int32_t doc; protected: // firstDocID is ignored since nextDoc() initializes 'current' virtual bool score(const CollectorPtr& collector, int32_t max, int32_t firstDocID); public: virtual int32_t advance(int32_t target); virtual int32_t docID(); virtual int32_t nextDoc(); virtual double score(); virtual void score(const CollectorPtr& collector); virtual String toString(); }; class BooleanScorerCollector : public Collector { public: BooleanScorerCollector(int32_t mask, const BucketTablePtr& bucketTable); virtual ~BooleanScorerCollector(); LUCENE_CLASS(BooleanScorerCollector); protected: BucketTableWeakPtr _bucketTable; BucketTable* __bucketTable = nullptr; int32_t mask; ScorerWeakPtr _scorer; Scorer* __scorer = nullptr; public: virtual void collect(int32_t doc); virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); virtual void setScorer(const ScorerPtr& scorer); virtual bool acceptsDocsOutOfOrder(); }; // An internal class which is used in score(Collector, int32_t) for setting the current score. This is required // since Collector exposes a setScorer method and implementations that need the score will call scorer->score(). // Therefore the only methods that are implemented are score() and doc(). class BucketScorer : public Scorer { public: BucketScorer(); virtual ~BucketScorer(); int32_t freq; LUCENE_CLASS(BucketScorer); float termFreq(){ return freq; } public: double _score; int32_t doc; public: virtual int32_t advance(int32_t target); virtual int32_t docID(); virtual int32_t nextDoc(); virtual double score(); }; class Bucket : public LuceneObject { public: Bucket(); virtual ~Bucket(); LUCENE_CLASS(Bucket); public: int32_t doc; // tells if bucket is valid double score; // incremental score int32_t bits; // used for bool constraints int32_t coord; // count of terms in score BucketWeakPtr _next; // next valid bucket Bucket* __next = nullptr; // next valid bucket }; /// A simple hash table of document scores within a range. class BucketTable : public LuceneObject { public: BucketTable(); virtual ~BucketTable(); LUCENE_CLASS(BucketTable); public: static const int32_t SIZE; static const int32_t MASK; Collection buckets; BucketPtr first; // head of valid list Bucket* __first = nullptr; // head of valid list public: CollectorPtr newCollector(int32_t mask); int32_t size(); }; class SubScorer : public LuceneObject { public: SubScorer(const ScorerPtr& scorer, bool required, bool prohibited, const CollectorPtr& collector, const SubScorerPtr& next); virtual ~SubScorer(); LUCENE_CLASS(SubScorer); public: ScorerPtr scorer; bool required; bool prohibited; CollectorPtr collector; SubScorerPtr next; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/BooleanScorer2.h000066400000000000000000000132141456444476200233450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BOOLEANSCORER2_H #define BOOLEANSCORER2_H #include "DisjunctionSumScorer.h" #include "ConjunctionScorer.h" namespace Lucene { /// See the description in BooleanScorer, comparing BooleanScorer & BooleanScorer2 /// /// An alternative to BooleanScorer that also allows a minimum number of optional scorers that should match. /// Implements skipTo(), and has no limitations on the numbers of added scorers. /// Uses ConjunctionScorer, DisjunctionScorer, ReqOptScorer and ReqExclScorer. class BooleanScorer2 : public Scorer { public: /// Creates a {@link Scorer} with the given similarity and lists of required, prohibited and optional /// scorers. In no required scorers are added, at least one of the optional scorers will have to match /// during the search. /// /// @param similarity The similarity to be used. /// @param minNrShouldMatch The minimum number of optional added scorers that should match during the search. /// In case no required scorers are added, at least one of the optional scorers will have to match during /// the search. /// @param required The list of required scorers. /// @param prohibited The list of prohibited scorers. /// @param optional The list of optional scorers. BooleanScorer2(const SimilarityPtr& similarity, int32_t minNrShouldMatch, Collection required, Collection prohibited, Collection optional); virtual ~BooleanScorer2(); LUCENE_CLASS(BooleanScorer2); protected: Collection requiredScorers; Collection optionalScorers; Collection prohibitedScorers; CoordinatorPtr coordinator; /// The scorer to which all scoring will be delegated, except for computing and using the coordination factor. ScorerPtr countingSumScorer; int32_t minNrShouldMatch; int32_t doc; public: virtual void initialize(); /// Scores and collects all matching documents. /// @param collector The collector to which all matching documents are passed through. virtual void score(const CollectorPtr& collector); virtual bool score(const CollectorPtr& collector, int32_t max, int32_t firstDocID); virtual int32_t docID(); virtual int32_t nextDoc(); virtual double score(); virtual int32_t advance(int32_t target); protected: ScorerPtr countingDisjunctionSumScorer(Collection scorers, int32_t minNrShouldMatch); ScorerPtr countingConjunctionSumScorer(Collection requiredScorers); ScorerPtr dualConjunctionSumScorer(const ScorerPtr& req1, const ScorerPtr& req2); /// Returns the scorer to be used for match counting and score summing. Uses requiredScorers, optionalScorers /// and prohibitedScorers. ScorerPtr makeCountingSumScorer(); ScorerPtr makeCountingSumScorerNoReq(); ScorerPtr makeCountingSumScorerSomeReq(); /// Returns the scorer to be used for match counting and score summing. Uses the given required scorer and /// the prohibitedScorers. /// @param requiredCountingSumScorer A required scorer already built. ScorerPtr addProhibitedScorers(const ScorerPtr& requiredCountingSumScorer); friend class CountingDisjunctionSumScorer; friend class CountingConjunctionSumScorer; }; class Coordinator : public LuceneObject { public: Coordinator(const BooleanScorer2Ptr& scorer); virtual ~Coordinator(); LUCENE_CLASS(Coordinator); public: BooleanScorer2WeakPtr _scorer; Collection coordFactors; int32_t maxCoord; // to be increased for each non prohibited scorer int32_t nrMatchers; // to be increased by score() of match counting scorers. public: void init(); // use after all scorers have been added. friend class BooleanScorer2; }; /// Count a scorer as a single match. class SingleMatchScorer : public Scorer { public: SingleMatchScorer(const ScorerPtr& scorer, const CoordinatorPtr& coordinator); virtual ~SingleMatchScorer(); LUCENE_CLASS(SingleMatchScorer); protected: ScorerPtr scorer; CoordinatorPtr coordinator; int32_t lastScoredDoc; double lastDocScore; public: virtual double score(); virtual int32_t docID(); virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); }; class CountingDisjunctionSumScorer : public DisjunctionSumScorer { public: CountingDisjunctionSumScorer(const BooleanScorer2Ptr& scorer, Collection subScorers, int32_t minimumNrMatchers); virtual ~CountingDisjunctionSumScorer(); LUCENE_CLASS(CountingDisjunctionSumScorer); protected: BooleanScorer2WeakPtr _scorer; int32_t lastScoredDoc; // Save the score of lastScoredDoc, so that we don't compute it more than once in score(). double lastDocScore; public: virtual double score(); friend class BooleanScorer2; }; class CountingConjunctionSumScorer : public ConjunctionScorer { public: CountingConjunctionSumScorer(const BooleanScorer2Ptr& scorer, const SimilarityPtr& similarity, Collection scorers); virtual ~CountingConjunctionSumScorer(); LUCENE_CLASS(CountingConjunctionSumScorer); protected: BooleanScorer2WeakPtr _scorer; int32_t lastScoredDoc; int32_t requiredNrMatchers; // Save the score of lastScoredDoc, so that we don't compute it more than once in score(). double lastDocScore; public: virtual double score(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/BufferedDeletes.h000066400000000000000000000032071456444476200235570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BUFFEREDDELETES_H #define BUFFEREDDELETES_H #include "Term.h" #include "Query.h" namespace Lucene { /// Holds buffered deletes, by docID, term or query. We hold two instances of this class: one for /// the deletes prior to the last flush, the other for deletes after the last flush. This is so if /// we need to abort (discard all buffered docs) we can also discard the buffered deletes yet keep /// the deletes done during previously flushed segments. class BufferedDeletes : public LuceneObject { public: BufferedDeletes(bool doTermSort); virtual ~BufferedDeletes(); LUCENE_CLASS(BufferedDeletes); public: int32_t numTerms; MapTermNum terms; MapQueryInt queries; Collection docIDs; int64_t bytesUsed; public: int32_t size(); void update(const BufferedDeletesPtr& in); void clear(); void addBytesUsed(int64_t b); bool any(); void remap(const MergeDocIDRemapperPtr& mapper, const SegmentInfosPtr& infos, Collection< Collection > docMaps, Collection delCounts, const OneMergePtr& merge, int32_t mergedDocCount); }; /// Number of documents a delete term applies to. class Num : public LuceneObject { public: Num(int32_t num); protected: int32_t num; public: int32_t getNum(); void setNum(int32_t num); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/BufferedIndexInput.h000066400000000000000000000102541456444476200242610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BUFFEREDINDEXINPUT_H #define BUFFEREDINDEXINPUT_H #include "IndexInput.h" namespace Lucene { /// Base implementation class for buffered {@link IndexInput}. class LPPAPI BufferedIndexInput : public IndexInput { public: /// Construct BufferedIndexInput with a specific bufferSize. BufferedIndexInput(int32_t bufferSize = BUFFER_SIZE); virtual ~BufferedIndexInput(); LUCENE_CLASS(BufferedIndexInput); public: /// Default buffer size. static const int32_t BUFFER_SIZE; protected: int32_t bufferSize; int64_t bufferStart; // position in file of buffer int32_t bufferLength; // end of valid bytes int32_t bufferPosition; // next byte to read ByteArray buffer; decltype(buffer.get()) __buffer; public: /// Reads and returns a single byte. /// @see IndexOutput#writeByte(uint8_t) virtual uint8_t readByte(); /// Reads an int stored in variable-length format. Reads between one and five /// bytes. Smaller values take fewer bytes. Negative numbers are not supported. /// @see IndexOutput#writeVInt(int32_t) virtual int32_t readVInt(); /// Change the buffer size used by this IndexInput. void setBufferSize(int32_t newSize); /// Returns buffer size. /// @see #setBufferSize int32_t getBufferSize(); /// Reads a specified number of bytes into an array at the specified offset. /// @param b the array to read bytes into. /// @param offset the offset in the array to start storing bytes. /// @param length the number of bytes to read. /// @see IndexOutput#writeBytes(const uint8_t*,int) /// @see #readInternal(uint8_t*, int32_t, int32_t) virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); /// Reads a specified number of bytes into an array at the specified offset with control over whether the /// read should be buffered (callers who have their own buffer should pass in "false" for useBuffer). /// Currently only {@link BufferedIndexInput} respects this parameter. /// @param b the array to read bytes into. /// @param offset the offset in the array to start storing bytes. /// @param length the number of bytes to read. /// @param useBuffer set to false if the caller will handle buffering. /// @see IndexOutput#writeBytes(const uint8_t*,int) /// @see #readInternal(uint8_t*, int32_t, int32_t) virtual void readBytes(uint8_t* b, int32_t offset, int32_t length, bool useBuffer); /// Closes the stream to further operations. virtual void close(); /// Returns the current position in this file, where the next read will occur. /// @see #seek(int64_t) virtual int64_t getFilePointer(); /// Sets current position in this file, where the next read will occur. /// @see #getFilePointer() /// @see #seekInternal(int64_t) virtual void seek(int64_t pos); /// Returns a clone of this stream. virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); protected: virtual void newBuffer(ByteArray newBuffer); void checkBufferSize(int32_t bufferSize); /// Refill buffer in preparation for reading. /// @see #readInternal(uint8_t*, int32_t, int32_t) /// @see #seekInternal(int64_t) virtual void refill(); /// Implements buffer refill. Reads bytes from the current position in the input. /// @param b the array to read bytes into. /// @param offset the offset in the array to start storing bytes. /// @param length the number of bytes to read. virtual void readInternal(uint8_t* b, int32_t offset, int32_t length) = 0; /// Implements seek. Sets current position in this file, where the next {@link /// #readInternal(uint8_t*, int32_t, int32_t)} will occur. /// @param pos position to set next write. /// @see #readInternal(uint8_t*, int32_t, int32_t) virtual void seekInternal(int64_t pos) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/BufferedIndexOutput.h000066400000000000000000000044511456444476200244640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BUFFEREDINDEXOUTPUT_H #define BUFFEREDINDEXOUTPUT_H #include "IndexOutput.h" namespace Lucene { /// Base implementation class for buffered {@link IndexOutput}. class LPPAPI BufferedIndexOutput : public IndexOutput { public: BufferedIndexOutput(); virtual ~BufferedIndexOutput(); LUCENE_CLASS(BufferedIndexOutput); public: static const int32_t BUFFER_SIZE; protected: int64_t bufferStart; // position in file of buffer int32_t bufferPosition; // position in buffer ByteArray buffer; public: /// Writes a single byte. /// @see IndexInput#readByte() virtual void writeByte(uint8_t b); /// Writes an array of bytes. /// @param b the bytes to write. /// @param length the number of bytes to write. /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length); /// Forces any buffered output to be written. virtual void flush(); /// Implements buffer write. Writes bytes at the current /// position in the output. /// @param b the bytes to write. /// @param offset the offset in the byte array. /// @param length the number of bytes to write. virtual void flushBuffer(const uint8_t* b, int32_t offset, int32_t length); /// Closes this stream to further operations. virtual void close(); /// Returns the current position in this file, where the next write will occur. /// @see #seek(long) virtual int64_t getFilePointer(); /// Sets current position in this file, where the next write will occur. /// @see #getFilePointer() virtual void seek(int64_t pos); /// The number of bytes in the file. virtual int64_t length() = 0; protected: /// Implements buffer write. Writes bytes at the current /// position in the output. /// @param b the bytes to write. /// @param length the number of bytes to write. void flushBuffer(const uint8_t* b, int32_t length); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/BufferedReader.h000066400000000000000000000032361456444476200233760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BUFFEREDREADER_H #define BUFFEREDREADER_H #include "Reader.h" namespace Lucene { /// Read text from a character-input stream, buffering characters so as to provide /// for the efficient reading of characters, arrays, and lines. class LPPAPI BufferedReader : public Reader { public: /// Create a buffering character-input stream. BufferedReader(const ReaderPtr& reader, int32_t size = READER_BUFFER); virtual ~BufferedReader(); LUCENE_CLASS(BufferedReader); protected: ReaderPtr reader; int32_t bufferSize; int32_t bufferLength; // end of valid bytes int32_t bufferPosition; // next byte to read CharArray buffer; public: static const int32_t READER_BUFFER; public: /// Read a single character. virtual int32_t read(); /// Read characters into a portion of an array. virtual int32_t read(wchar_t* b, int32_t offset, int32_t length); /// Read a line of text. virtual bool readLine(String& line); /// Close the stream. virtual void close(); /// Tell whether this stream supports the mark() operation virtual bool markSupported(); /// Reset the stream. virtual void reset(); protected: /// Refill buffer in preparation for reading. int32_t refill(); /// Read a single character without moving position. int32_t peek(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ByteBlockPool.h000066400000000000000000000044471456444476200232460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BYTEBLOCKPOOL_H #define BYTEBLOCKPOOL_H #include "LuceneObject.h" namespace Lucene { /// Class that Posting and PostingVector use to write byte streams into shared fixed-size byte[] arrays. /// The idea is to allocate slices of increasing lengths. For example, the first slice is 5 bytes, the /// next slice is 14, etc. We start by writing our bytes into the first 5 bytes. When we hit the end of /// the slice, we allocate the next slice and then write the address of the new slice into the last 4 /// bytes of the previous slice (the "forwarding address"). /// /// Each slice is filled with 0's initially, and we mark the end with a non-zero byte. This way the methods /// that are writing into the slice don't need to record its length and instead allocate a new slice once /// they hit a non-zero byte. class LPPAPI ByteBlockPool : public LuceneObject { public: ByteBlockPool(const ByteBlockPoolAllocatorBasePtr& allocator, bool trackAllocations); virtual ~ByteBlockPool(); LUCENE_CLASS(ByteBlockPool); public: Collection buffers; int32_t bufferUpto; // Which buffer we are up to int32_t byteUpto; // Where we are in head buffer ByteArray buffer; int32_t byteOffset; static const int32_t nextLevelArray[]; static const int32_t levelSizeArray[]; protected: bool trackAllocations; ByteBlockPoolAllocatorBasePtr allocator; public: static int32_t FIRST_LEVEL_SIZE(); void reset(); void nextBuffer(); int32_t newSlice(int32_t size); int32_t allocSlice(ByteArray slice, int32_t upto); }; class LPPAPI ByteBlockPoolAllocatorBase : public LuceneObject { public: virtual ~ByteBlockPoolAllocatorBase(); LUCENE_CLASS(ByteBlockPoolAllocatorBase); public: virtual void recycleByteBlocks(Collection blocks, int32_t start, int32_t end) = 0; virtual void recycleByteBlocks(Collection blocks) = 0; virtual ByteArray getByteBlock(bool trackAllocations) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ByteFieldSource.h000066400000000000000000000034261456444476200235620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BYTEFIELDSOURCE_H #define BYTEFIELDSOURCE_H #include "FieldCacheSource.h" namespace Lucene { /// Obtains byte field values from the {@link FieldCache} using getBytes() and makes those values available /// as other numeric types, casting as needed. /// /// @see FieldCacheSource for requirements on the field. /// /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's /// best to switch your application to pass only atomic (single segment) readers to this API. Alternatively, /// for a short-term fix, you could wrap your ValueSource using {@link MultiValueSource}, which costs more CPU /// per lookup but will not consume double the FieldCache RAM. class LPPAPI ByteFieldSource : public FieldCacheSource { public: /// Create a cached byte field source with a specific string-to-byte parser. ByteFieldSource(const String& field, const ByteParserPtr& parser = ByteParserPtr()); virtual ~ByteFieldSource(); LUCENE_CLASS(ByteFieldSource); protected: ByteParserPtr parser; public: virtual String description(); virtual DocValuesPtr getCachedFieldValues(const FieldCachePtr& cache, const String& field, const IndexReaderPtr& reader); virtual bool cachedFieldSourceEquals(const FieldCacheSourcePtr& other); virtual int32_t cachedFieldSourceHashCode(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ByteSliceReader.h000066400000000000000000000032121456444476200235310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BYTESLICEREADER_H #define BYTESLICEREADER_H #include "IndexInput.h" namespace Lucene { /// IndexInput that knows how to read the byte slices written by Posting and PostingVector. We read the bytes in each slice /// until we hit the end of that slice at which point we read the forwarding address of the next slice and then jump to it. class LPPAPI ByteSliceReader : public IndexInput { public: ByteSliceReader(); virtual ~ByteSliceReader(); LUCENE_CLASS(ByteSliceReader); public: ByteBlockPoolPtr pool; int32_t bufferUpto; ByteArray buffer; int32_t upto; int32_t limit; int32_t level; int32_t bufferOffset; int32_t endIndex; public: void init(const ByteBlockPoolPtr& pool, int32_t startIndex, int32_t endIndex); bool eof(); /// Reads and returns a single byte. virtual uint8_t readByte(); int64_t writeTo(const IndexOutputPtr& out); void nextSlice(); /// Reads a specified number of bytes into an array at the specified offset. virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); /// Not implemented virtual int64_t getFilePointer(); /// Not implemented virtual int64_t length(); /// Not implemented virtual void seek(int64_t pos); /// Not implemented virtual void close(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ByteSliceWriter.h000066400000000000000000000022331456444476200236050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BYTESLICEWRITER_H #define BYTESLICEWRITER_H #include "LuceneObject.h" namespace Lucene { /// Class to write byte streams into slices of shared byte[]. This is used by DocumentsWriter to hold /// the posting list for many terms in RAM. class LPPAPI ByteSliceWriter : public LuceneObject { public: ByteSliceWriter(const ByteBlockPoolPtr& pool); virtual ~ByteSliceWriter(); LUCENE_CLASS(ByteSliceWriter); protected: ByteArray slice; int32_t upto; ByteBlockPoolPtr pool; public: int32_t offset0; public: /// Set up the writer to write at address. void init(int32_t address); /// Write byte into byte slice stream void writeByte(uint8_t b); void writeBytes(const uint8_t* b, int32_t offset, int32_t length); int32_t getAddress(); void writeVInt(int32_t i); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CMakeLists.txt000066400000000000000000000003511456444476200231130ustar00rootroot00000000000000#################################### # install headers #################################### file(GLOB_RECURSE lucene_headers "${CMAKE_CURRENT_SOURCE_DIR}/*.h" ) install( FILES ${lucene_headers} DESTINATION include/lucene++ ) LucenePlusPlus-rel_3.0.9/include/lucene++/CachingSpanFilter.h000066400000000000000000000027411456444476200240550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CACHINGSPANFILTER_H #define CACHINGSPANFILTER_H #include "SpanFilter.h" #include "CachingWrapperFilter.h" namespace Lucene { /// Wraps another SpanFilter's result and caches it. The purpose is to allow filters to simply filter, /// and then wrap with this class to add caching. class LPPAPI CachingSpanFilter : public SpanFilter { public: /// New deletions always result in a cache miss, by default ({@link CachingWrapperFilter#RECACHE}. CachingSpanFilter(const SpanFilterPtr& filter, CachingWrapperFilter::DeletesMode deletesMode = CachingWrapperFilter::DELETES_RECACHE); virtual ~CachingSpanFilter(); LUCENE_CLASS(CachingSpanFilter); protected: SpanFilterPtr filter; FilterCachePtr cache; public: // for testing int32_t hitCount; int32_t missCount; public: virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader); virtual SpanFilterResultPtr bitSpans(const IndexReaderPtr& reader); virtual String toString(); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); protected: SpanFilterResultPtr getCachedResult(const IndexReaderPtr& reader); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CachingTokenFilter.h000066400000000000000000000023371456444476200242350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CACHINGTOKENFILTER_H #define CACHINGTOKENFILTER_H #include "TokenFilter.h" namespace Lucene { /// This class can be used if the token attributes of a TokenStream are intended to be consumed more than once. /// It caches all token attribute states locally in a List. /// /// CachingTokenFilter implements the optional method {@link TokenStream#reset()}, which repositions the stream /// to the first Token. class LPPAPI CachingTokenFilter : public TokenFilter { public: CachingTokenFilter(const TokenStreamPtr& input); virtual ~CachingTokenFilter(); LUCENE_CLASS(CachingTokenFilter); protected: Collection cache; Collection::iterator iterator; AttributeSourceStatePtr finalState; public: virtual bool incrementToken(); virtual void end(); virtual void reset(); protected: void fillCache(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CachingWrapperFilter.h000066400000000000000000000056331456444476200245770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CACHINGWRAPPERFILTER_H #define CACHINGWRAPPERFILTER_H #include "Filter.h" namespace Lucene { /// Wraps another filter's result and caches it. The purpose is to allow filters to simply filter, and /// then wrap with this class to add caching. class LPPAPI CachingWrapperFilter : public Filter { public: /// Specifies how new deletions against a reopened reader should be handled. /// /// The default is IGNORE, which means the cache entry will be re-used for a given segment, even when /// that segment has been reopened due to changes in deletions. This is a big performance gain, /// especially with near-real-timer readers, since you don't hit a cache miss on every reopened reader /// for prior segments. /// /// However, in some cases this can cause invalid query results, allowing deleted documents to be /// returned. This only happens if the main query does not rule out deleted documents on its own, /// such as a toplevel ConstantScoreQuery. To fix this, use RECACHE to re-create the cached filter /// (at a higher per-reopen cost, but at faster subsequent search performance), or use DYNAMIC to /// dynamically intersect deleted docs (fast reopen time but some hit to search performance). enum DeletesMode { DELETES_IGNORE, DELETES_RECACHE, DELETES_DYNAMIC }; /// New deletes are ignored by default, which gives higher cache hit rate on reopened readers. /// Most of the time this is safe, because the filter will be AND'd with a Query that fully enforces /// deletions. If instead you need this filter to always enforce deletions, pass either {@link /// DeletesMode#RECACHE} or {@link DeletesMode#DYNAMIC}. CachingWrapperFilter(const FilterPtr& filter, DeletesMode deletesMode = DELETES_IGNORE); virtual ~CachingWrapperFilter(); LUCENE_CLASS(CachingWrapperFilter); INTERNAL: FilterPtr filter; // for testing int32_t hitCount; int32_t missCount; protected: /// A Filter cache FilterCachePtr cache; /// Provide the DocIdSet to be cached, using the DocIdSet provided by the wrapped Filter. /// /// This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable} returns /// true, else it copies the {@link DocIdSetIterator} into an {@link OpenBitSetDISI}. DocIdSetPtr docIdSetToCache(const DocIdSetPtr& docIdSet, const IndexReaderPtr& reader); public: virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader); virtual String toString(); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CharArraySet.h000066400000000000000000000033041456444476200230550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHARARRAYSET_H #define CHARARRAYSET_H #include "LuceneObject.h" namespace Lucene { /// A simple class that stores Strings as char[]'s in a hash table. Note that this is not a general purpose class. /// For example, it cannot remove items from the set, nor does it resize its hash table to be smaller, etc. It is /// designed to be quick to test if a char[] is in the set without the necessity of converting it to a String first. class LPPAPI CharArraySet : public LuceneObject { public: CharArraySet(bool ignoreCase); /// Create set from a set of strings. CharArraySet(HashSet entries, bool ignoreCase); /// Create set from a collection of strings. CharArraySet(Collection entries, bool ignoreCase); virtual ~CharArraySet(); LUCENE_CLASS(CharArraySet); protected: HashSet entries; bool ignoreCase; public: virtual bool contains(const String& text); /// True if the length chars of text starting at offset are in the set virtual bool contains(const wchar_t* text, int32_t offset, int32_t length); /// Add this String into the set virtual bool add(const String& text); /// Add this char[] into the set. virtual bool add(CharArray text); virtual int32_t size(); virtual bool isEmpty(); HashSet::iterator begin(); HashSet::iterator end(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CharBlockPool.h000066400000000000000000000017341456444476200232140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHARBLOCKPOOL_H #define CHARBLOCKPOOL_H #include "LuceneObject.h" namespace Lucene { class CharBlockPool : public LuceneObject { public: CharBlockPool(const DocumentsWriterPtr& docWriter); virtual ~CharBlockPool(); LUCENE_CLASS(CharBlockPool); public: Collection buffers; int32_t numBuffer; int32_t bufferUpto; // Which buffer we are up to int32_t charUpto; // Where we are in head buffer CharArray buffer; // Current head buffer int32_t charOffset; // Current head offset protected: DocumentsWriterWeakPtr _docWriter; public: void reset(); void nextBuffer(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CharFilter.h000066400000000000000000000026301456444476200225510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHARFILTER_H #define CHARFILTER_H #include "CharStream.h" namespace Lucene { /// Subclasses of CharFilter can be chained to filter CharStream. They can be used as {@link Reader} with /// additional offset correction. {@link Tokenizer}s will automatically use {@link #correctOffset} if a /// CharFilter/CharStream subclass is used. class LPPAPI CharFilter : public CharStream { protected: CharFilter(const CharStreamPtr& in); public: virtual ~CharFilter(); LUCENE_CLASS(CharFilter); protected: CharStreamPtr input; protected: /// Subclass may want to override to correct the current offset. /// @param currentOff current offset /// @return corrected offset virtual int32_t correct(int32_t currentOff); /// Chains the corrected offset through the input CharFilter. virtual int32_t correctOffset(int32_t currentOff); virtual void close(); virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); virtual bool markSupported(); virtual void mark(int32_t readAheadLimit); virtual void reset(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CharFolder.h000066400000000000000000000024031456444476200225350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHARFOLDER_H #define CHARFOLDER_H #include "LuceneObject.h" namespace Lucene { /// Utility class for folding character case. class LPPAPI CharFolder : public LuceneObject { public: virtual ~CharFolder(); LUCENE_CLASS(CharFolder); protected: static bool lowerCache; static bool upperCache; static wchar_t lowerChars[CHAR_MAX - CHAR_MIN + 1]; static wchar_t upperChars[CHAR_MAX - CHAR_MIN + 1]; public: static wchar_t toLower(wchar_t ch); static wchar_t toUpper(wchar_t ch); template static void toLower(ITER first, ITER last) { for (; first != last; ++first) { *first = toLower(*first); } } template static void toUpper(ITER first, ITER last) { for (; first != last; ++first) { *first = toUpper(*first); } } protected: static bool fillLower(); static bool fillUpper(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CharReader.h000066400000000000000000000022341456444476200225260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHARREADER_H #define CHARREADER_H #include "CharStream.h" namespace Lucene { /// CharReader is a Reader wrapper. It reads chars from Reader and outputs {@link CharStream}, defining an /// identify function {@link #correctOffset} method that simply returns the provided offset. class LPPAPI CharReader : public CharStream { public: CharReader(const ReaderPtr& in); virtual ~CharReader(); LUCENE_CLASS(CharReader); protected: ReaderPtr input; public: using CharStream::read; static CharStreamPtr get(const ReaderPtr& input); virtual int32_t correctOffset(int32_t currentOff); virtual void close(); virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); virtual bool markSupported(); virtual void mark(int32_t readAheadLimit); virtual void reset(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CharStream.h000066400000000000000000000023111456444476200225530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHARSTREAM_H #define CHARSTREAM_H #include "Reader.h" namespace Lucene { /// CharStream adds {@link #correctOffset} functionality over {@link Reader}. All Tokenizers accept a CharStream /// instead of {@link Reader} as input, which enables arbitrary character based filtering before tokenization. /// The {@link #correctOffset} method fixed offsets to account for removal or insertion of characters, so that the /// offsets reported in the tokens match the character offsets of the original Reader. class LPPAPI CharStream : public Reader { public: virtual ~CharStream(); LUCENE_CLASS(CharStream); public: /// Called by CharFilter(s) and Tokenizer to correct token offset. /// /// @param currentOff offset as seen in the output /// @return corrected offset based on the input virtual int32_t correctOffset(int32_t currentOff) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CharTokenizer.h000066400000000000000000000033571456444476200233050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHARTOKENIZER_H #define CHARTOKENIZER_H #include "Tokenizer.h" namespace Lucene { /// An abstract base class for simple, character-oriented tokenizers. class LPPAPI CharTokenizer : public Tokenizer { public: CharTokenizer(const ReaderPtr& input); CharTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); CharTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); virtual ~CharTokenizer(); LUCENE_CLASS(CharTokenizer); protected: int32_t offset; int32_t bufferIndex; int32_t dataLen; static const int32_t MAX_WORD_LEN; static const int32_t IO_BUFFER_SIZE; CharArray ioBuffer; TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; public: virtual bool incrementToken(); virtual void end(); virtual void reset(const ReaderPtr& input); protected: /// Returns true if a character should be included in a token. This tokenizer generates as tokens adjacent /// sequences of characters which satisfy this predicate. Characters for which this is false are used to /// define token boundaries and are not included in tokens. virtual bool isTokenChar(wchar_t c) = 0; /// Called on each token character to normalize it before it is added to the token. The default implementation /// does nothing. Subclasses may use this to, eg., lowercase tokens. virtual wchar_t normalize(wchar_t c); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CheckIndex.h000066400000000000000000000243451456444476200225420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHECKINDEX_H #define CHECKINDEX_H #include "SegmentTermDocs.h" namespace Lucene { /// Basic tool and API to check the health of an index and write a new segments file that removes reference to /// problematic segments. /// /// As this tool checks every byte in the index, on a large index it can take quite a long time to run. /// /// WARNING: Please make a complete backup of your index before using this to fix your index! class LPPAPI CheckIndex : public LuceneObject { public: /// Create a new CheckIndex on the directory. CheckIndex(const DirectoryPtr& dir); virtual ~CheckIndex(); LUCENE_CLASS(CheckIndex); protected: InfoStreamPtr infoStream; DirectoryPtr dir; static bool _assertsOn; public: /// Set infoStream where messages should go. If null, no messages are printed void setInfoStream(const InfoStreamPtr& out); /// Returns a {@link IndexStatus} instance detailing the state of the index. /// /// As this method checks every byte in the index, on a large index it can take quite a long time to run. /// /// WARNING: make sure you only call this when the index is not opened by any writer. IndexStatusPtr checkIndex(); /// Returns a {@link IndexStatus} instance detailing the state of the index. /// /// @param onlySegments list of specific segment names to check /// /// As this method checks every byte in the specified segments, on a large index it can take quite a long /// time to run. /// /// WARNING: make sure you only call this when the index is not opened by any writer. IndexStatusPtr checkIndex(Collection onlySegments); /// Repairs the index using previously returned result from {@link #checkIndex}. Note that this does not /// remove any of the unreferenced files after it's done; you must separately open an {@link IndexWriter}, /// which deletes unreferenced files when it's created. /// /// WARNING: this writes a new segments file into the index, effectively removing all documents in broken /// segments from the index. BE CAREFUL. /// /// WARNING: Make sure you only call this when the index is not opened by any writer. void fixIndex(const IndexStatusPtr& result); static bool testAsserts(); static bool assertsOn(); /// Command-line interface to check and fix an index. /// /// Run it like this: /// CheckIndex pathToIndex [-fix] [-segment X] [-segment Y] /// /// -fix: actually write a new segments_N file, removing any problematic segments /// /// -segment X: only check the specified segment(s). This can be specified multiple times, /// to check more than one segment, eg -segment _2 -segment _a. /// You can't use this with the -fix option. /// /// WARNING: -fix should only be used on an emergency basis as it will cause documents (perhaps many) /// to be permanently removed from the index. Always make a backup copy of your index before running /// this! Do not run this tool on an index that is actively being written to. You have been warned! /// /// Run without -fix, this tool will open the index, report version information and report any exceptions /// it hits and what action it would take if -fix were specified. With -fix, this tool will remove any /// segments that have issues and write a new segments_N file. This means all documents contained in the /// affected segments will be removed. /// /// This tool exits with exit code 1 if the index cannot be opened or has any corruption, else 0. static int main(Collection args); protected: void msg(const String& msg); /// Test field norms. FieldNormStatusPtr testFieldNorms(Collection fieldNames, const SegmentReaderPtr& reader); /// Test the term index. TermIndexStatusPtr testTermIndex(const SegmentInfoPtr& info, const SegmentReaderPtr& reader); /// Test stored fields for a segment. StoredFieldStatusPtr testStoredFields(const SegmentInfoPtr& info, const SegmentReaderPtr& reader); /// Test term vectors for a segment. TermVectorStatusPtr testTermVectors(const SegmentInfoPtr& info, const SegmentReaderPtr& reader); }; /// Returned from {@link #checkIndex()} detailing the health and status of the index. class LPPAPI IndexStatus : public LuceneObject { public: IndexStatus(); virtual ~IndexStatus(); LUCENE_CLASS(IndexStatus); public: /// True if no problems were found with the index. bool clean; /// True if we were unable to locate and load the segments_N file. bool missingSegments; /// True if we were unable to open the segments_N file. bool cantOpenSegments; /// True if we were unable to read the version number from segments_N file. bool missingSegmentVersion; /// Name of latest segments_N file in the index. String segmentsFileName; /// Number of segments in the index. int32_t numSegments; /// String description of the version of the index. String segmentFormat; /// Empty unless you passed specific segments list to check as optional 3rd argument. /// @see CheckIndex#checkIndex(List) Collection segmentsChecked; /// True if the index was created with a newer version of Lucene than the CheckIndex tool. bool toolOutOfDate; /// List of {@link SegmentInfoStatus} instances, detailing status of each segment. Collection segmentInfos; /// Directory index is in. DirectoryPtr dir; /// SegmentInfos instance containing only segments that had no problems (this is used with the /// {@link CheckIndex#fixIndex} method to repair the index. SegmentInfosPtr newSegments; /// How many documents will be lost to bad segments. int32_t totLoseDocCount; /// How many bad segments were found. int32_t numBadSegments; /// True if we checked only specific segments ({@link #checkIndex(List)}) was called with non-null argument). bool partial; /// Holds the userData of the last commit in the index MapStringString userData; }; /// Holds the status of each segment in the index. See {@link #segmentInfos}. class LPPAPI SegmentInfoStatus : public LuceneObject { public: SegmentInfoStatus(); virtual ~SegmentInfoStatus(); LUCENE_CLASS(SegmentInfoStatus); public: /// Name of the segment. String name; /// Document count (does not take deletions into account). int32_t docCount; /// True if segment is compound file format. bool compound; /// Number of files referenced by this segment. int32_t numFiles; /// Net size (MB) of the files referenced by this segment. double sizeMB; /// Doc store offset, if this segment shares the doc store files (stored fields and term vectors) with /// other segments. This is -1 if it does not share. int32_t docStoreOffset; /// String of the shared doc store segment, or null if this segment does not share the doc store files. String docStoreSegment; /// True if the shared doc store files are compound file format. bool docStoreCompoundFile; /// True if this segment has pending deletions. bool hasDeletions; /// Name of the current deletions file name. String deletionsFileName; /// Number of deleted documents. int32_t numDeleted; /// True if we were able to open a SegmentReader on this segment. bool openReaderPassed; /// Number of fields in this segment. int32_t numFields; /// True if at least one of the fields in this segment does not omitTermFreqAndPositions. /// @see AbstractField#setOmitTermFreqAndPositions bool hasProx; /// Map that includes certain debugging details that IndexWriter records into each segment it creates MapStringString diagnostics; /// Status for testing of field norms (null if field norms could not be tested). FieldNormStatusPtr fieldNormStatus; /// Status for testing of indexed terms (null if indexed terms could not be tested). TermIndexStatusPtr termIndexStatus; /// Status for testing of stored fields (null if stored fields could not be tested). StoredFieldStatusPtr storedFieldStatus; /// Status for testing of term vectors (null if term vectors could not be tested). TermVectorStatusPtr termVectorStatus; }; /// Status from testing field norms. class LPPAPI FieldNormStatus : public LuceneObject { public: FieldNormStatus(); virtual ~FieldNormStatus(); LUCENE_CLASS(FieldNormStatus); public: /// Number of fields successfully tested int64_t totFields; /// Exception thrown during term index test (null on success) LuceneException error; }; /// Status from testing term index. class LPPAPI TermIndexStatus : public LuceneObject { public: TermIndexStatus(); virtual ~TermIndexStatus(); LUCENE_CLASS(TermIndexStatus); public: /// Total term count int64_t termCount; /// Total frequency across all terms. int64_t totFreq; /// Total number of positions. int64_t totPos; /// Exception thrown during term index test (null on success) LuceneException error; }; /// Status from testing stored fields. class LPPAPI StoredFieldStatus : public LuceneObject { public: StoredFieldStatus(); virtual ~StoredFieldStatus(); LUCENE_CLASS(StoredFieldStatus); public: /// Number of documents tested. int32_t docCount; /// Total number of stored fields tested. int64_t totFields; /// Exception thrown during stored fields test (null on success) LuceneException error; }; /// Status from testing stored fields. class LPPAPI TermVectorStatus : public LuceneObject { public: TermVectorStatus(); virtual ~TermVectorStatus(); LUCENE_CLASS(TermVectorStatus); public: /// Number of documents tested. int32_t docCount; /// Total number of term vectors tested. int64_t totVectors; /// Exception thrown during term vector test (null on success) LuceneException error; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ChecksumIndexInput.h000066400000000000000000000037101456444476200243000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHECKSUMINDEXINPUT_H #define CHECKSUMINDEXINPUT_H #include #include "IndexInput.h" namespace Lucene { /// Writes bytes through to a primary IndexInput, computing checksum as it goes. /// Note that you cannot use seek(). class LPPAPI ChecksumIndexInput : public IndexInput { public: ChecksumIndexInput(const IndexInputPtr& main); virtual ~ChecksumIndexInput(); LUCENE_CLASS(ChecksumIndexInput); protected: IndexInputPtr main; boost::crc_32_type checksum; public: /// Reads and returns a single byte. /// @see IndexOutput#writeByte(uint8_t) virtual uint8_t readByte(); /// Reads a specified number of bytes into an array at the specified offset. /// @param b the array to read bytes into. /// @param offset the offset in the array to start storing bytes. /// @param length the number of bytes to read. /// @see IndexOutput#writeBytes(const uint8_t*,int) virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); /// Return calculated checksum. int64_t getChecksum(); /// Closes the stream to further operations. virtual void close(); /// Returns the current position in this file, where the next read will occur. /// @see #seek(int64_t) virtual int64_t getFilePointer(); /// Sets current position in this file, where the next read will occur. /// @see #getFilePointer() virtual void seek(int64_t pos); /// The number of bytes in the file. virtual int64_t length(); /// Returns a clone of this stream. virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ChecksumIndexOutput.h000066400000000000000000000041241456444476200245010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHECKSUMINDEXOUTPUT_H #define CHECKSUMINDEXOUTPUT_H #include #include "IndexOutput.h" namespace Lucene { /// Writes bytes through to a primary IndexOutput, computing /// checksum. Note that you cannot use seek(). class LPPAPI ChecksumIndexOutput : public IndexOutput { public: ChecksumIndexOutput(const IndexOutputPtr& main); virtual ~ChecksumIndexOutput(); LUCENE_CLASS(ChecksumIndexOutput); protected: IndexOutputPtr main; boost::crc_32_type checksum; public: /// Writes a single byte. /// @see IndexInput#readByte() virtual void writeByte(uint8_t b); /// Writes an array of bytes. /// @param b the bytes to write. /// @param length the number of bytes to write. /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length); /// Return calculated checksum. int64_t getChecksum(); /// Forces any buffered output to be written. virtual void flush(); /// Closes the stream to further operations. virtual void close(); /// Returns the current position in this file, where the next write will occur. /// @see #seek(int64_t) virtual int64_t getFilePointer(); /// Sets current position in this file, where the next write will occur. /// @see #getFilePointer() virtual void seek(int64_t pos); /// Starts but does not complete the commit of this file (= writing of /// the final checksum at the end). After this is called must call /// {@link #finishCommit} and the {@link #close} to complete the commit. void prepareCommit(); /// See {@link #prepareCommit} void finishCommit(); /// The number of bytes in the file. virtual int64_t length(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CloseableThreadLocal.h000066400000000000000000000030401456444476200245160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CLOSEABLETHREADLOCAL_H #define CLOSEABLETHREADLOCAL_H #include "LuceneThread.h" namespace Lucene { /// General purpose thread-local map. template class CloseableThreadLocal : public LuceneObject { public: typedef boost::shared_ptr localDataPtr; typedef Map MapLocalData; CloseableThreadLocal() { localData = MapLocalData::newInstance(); } public: localDataPtr get() { SyncLock syncLock(this); typename MapLocalData::iterator local = localData.find(LuceneThread::currentId()); if (local != localData.end()) { return local->second; } localDataPtr initial(initialValue()); if (initial) { localData.put(LuceneThread::currentId(), initial); } return initial; } void set(const localDataPtr& data) { SyncLock syncLock(this); localData.put(LuceneThread::currentId(), data); } void close() { SyncLock syncLock(this); localData.remove(LuceneThread::currentId()); } protected: MapLocalData localData; virtual localDataPtr initialValue() { return localDataPtr(); // override } }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Collator.h000066400000000000000000000014641456444476200223110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef COLLATOR_H #define COLLATOR_H #include "LuceneObject.h" namespace Lucene { /// Convenience class for storing collate objects. class LPPAPI Collator : public LuceneObject { public: /// Creates a new Collator, given the file to read from. Collator(std::locale locale); virtual ~Collator(); LUCENE_CLASS(Collator); protected: const std::collate& collate; public: int32_t compare(const String& first, const String& second); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Collection.h000066400000000000000000000166151456444476200226310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef COLLECTION_H #define COLLECTION_H #include #include "LuceneSync.h" namespace Lucene { /// Utility template class to handle collections that can be safely copied and shared template class Collection : public LuceneSync { public: typedef Collection this_type; typedef boost::shared_ptr shared_ptr; typedef std::vector collection_type; typedef typename collection_type::iterator iterator; typedef typename collection_type::const_iterator const_iterator; typedef TYPE value_type; virtual ~Collection() { } protected: boost::shared_ptr container; public: static this_type newInstance(int32_t size = 0) { this_type instance; instance.container = Lucene::newInstance(size); return instance; } template static this_type newInstance(ITER first, ITER last) { this_type instance; instance.container = Lucene::newInstance(first, last); return instance; } void reset() { resize(0); } void resize(int32_t size) { if (size == 0) { container.reset(); } else { container->resize(size); } } int32_t size() const { return (int32_t)container->size(); } bool empty() const { return container->empty(); } void clear() { container->clear(); } iterator begin() { return container->begin(); } iterator end() { return container->end(); } const_iterator begin() const { return container->begin(); } const_iterator end() const { return container->end(); } void add(const TYPE& type) { container->push_back(type); } void add(int32_t pos, const TYPE& type) { container->insert(container->begin() + pos, type); } template void addAll(ITER first, ITER last) { container->insert(container->end(), first, last); } template void insert(ITER pos, const TYPE& type) { container->insert(pos, type); } template ITER remove(ITER pos) { return container->erase(pos); } template ITER remove(ITER first, ITER last) { return container->erase(first, last); } void remove(const TYPE& type) { container->erase(std::remove(container->begin(), container->end(), type), container->end()); } template void remove_if(PRED comp) { container->erase(std::remove_if(container->begin(), container->end(), comp), container->end()); } TYPE removeFirst() { TYPE front = container->front(); container->erase(container->begin()); return front; } TYPE removeLast() { TYPE back = container->back(); container->pop_back(); return back; } iterator find(const TYPE& type) { return std::find(container->begin(), container->end(), type); } template iterator find_if(PRED comp) { return std::find_if(container->begin(), container->end(), comp); } bool contains(const TYPE& type) const { return (std::find(container->begin(), container->end(), type) != container->end()); } template bool contains_if(PRED comp) const { return (std::find_if(container->begin(), container->end(), comp) != container->end()); } bool equals(const this_type& other) const { return equals(other, std::equal_to()); } template bool equals(const this_type& other, PRED comp) const { if (container->size() != other.container->size()) { return false; } return std::equal(container->begin(), container->end(), other.container->begin(), comp); } int32_t hashCode() { return (int32_t)(int64_t)container.get(); } void swap(this_type& other) { container.swap(other->container); } TYPE& operator[] (int32_t pos) { return (*container)[pos]; } const TYPE& operator[] (int32_t pos) const { return (*container)[pos]; } operator bool() const { return container.get() != NULL; } bool operator! () const { return !container; } bool operator== (const this_type& other) { return (container == other.container); } bool operator!= (const this_type& other) { return (container != other.container); } collection_type* get() { return container.get(); } }; template Collection newCollection(const TYPE& a1) { Collection result = Collection::newInstance(); result.add(a1); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2) { Collection result = newCollection(a1); result.add(a2); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3) { Collection result = newCollection(a1, a2); result.add(a3); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4) { Collection result = newCollection(a1, a2, a3); result.add(a4); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5) { Collection result = newCollection(a1, a2, a3, a4); result.add(a5); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6) { Collection result = newCollection(a1, a2, a3, a4, a5); result.add(a6); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6, const TYPE& a7) { Collection result = newCollection(a1, a2, a3, a4, a5, a6); result.add(a7); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6, const TYPE& a7, const TYPE& a8) { Collection result = newCollection(a1, a2, a3, a4, a5, a6, a7); result.add(a8); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6, const TYPE& a7, const TYPE& a8, const TYPE& a9) { Collection result = newCollection(a1, a2, a3, a4, a5, a6, a7, a8); result.add(a9); return result; } template Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6, const TYPE& a7, const TYPE& a8, const TYPE& a9, const TYPE& a10) { Collection result = newCollection(a1, a2, a3, a4, a5, a6, a7, a8, a9); result.add(a10); return result; } } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Collector.h000066400000000000000000000135641456444476200224640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef COLLECTOR_H #define COLLECTOR_H #include "LuceneObject.h" namespace Lucene { /// Collectors are primarily meant to be used to gather raw results from a search, and implement sorting /// or custom result filtering, collation, etc. /// /// Lucene's core collectors are derived from Collector. Likely your application can use one of these /// classes, or subclass {@link TopDocsCollector}, instead of implementing Collector directly: /// ///
    ///
  • {@link TopDocsCollector} is an abstract base class that assumes you will retrieve the top N docs, /// according to some criteria, after collection is done. /// ///
  • {@link TopScoreDocCollector} is a concrete subclass {@link TopDocsCollector} and sorts according /// to score + docID. This is used internally by the {@link IndexSearcher} search methods that do not take /// an explicit {@link Sort}. It is likely the most frequently used collector. /// ///
  • {@link TopFieldCollector} subclasses {@link TopDocsCollector} and sorts according to a specified /// {@link Sort} object (sort by field). This is used internally by the {@link IndexSearcher} search methods /// that take an explicit {@link Sort}. /// ///
  • {@link TimeLimitingCollector}, which wraps any other Collector and aborts the search if it's taken too /// much time. /// ///
  • {@link PositiveScoresOnlyCollector} wraps any other Collector and prevents collection of hits whose /// score is <= 0.0 /// ///
/// /// Collector decouples the score from the collected doc: the score computation is skipped entirely if it's not /// needed. Collectors that do need the score should implement the {@link #setScorer} method, to hold onto the /// passed {@link Scorer} instance, and call {@link Scorer#score()} within the collect method to compute the /// current hit's score. If your collector may request the score for a single hit multiple times, you should use /// {@link ScoreCachingWrappingScorer}. /// /// NOTE: The doc that is passed to the collect method is relative to the current reader. If your collector needs /// to resolve this to the docID space of the Multi*Reader, you must re-base it by recording the docBase from the /// most recent setNextReader call. Here's a simple example showing how to collect docIDs into a BitSet: /// ///
/// class MyCollector : public Collector
/// {
/// public:
///     MyCollector(const BitSetPtr& bits)
///     {
///         this->bits = bits;
///         this->docBase = 0;
///     }
///
/// protected:
///     BitSetPtr bits;
///     int32_t docBase;
///
/// public:
///     virtual void setScorer(const ScorerPtr& scorer)
///     {
///         // ignore scorer
///     }
///
///     virtual void collect(int32_t doc)
///     {
///         bits->set(doc + docBase);
///     }
///
///     virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase)
///     {
///         this->docBase = docBase;
///     }
///
///     virtual bool acceptsDocsOutOfOrder()
///     {
///         return true; // accept docs out of order (for a BitSet it doesn't matter)
///     }
/// };
///
/// ...
///
/// SearcherPtr searcher = newLucene(indexReader);
/// BitSetPtr bits = newLucene(indexReader->maxDoc());
/// searcher->search(query, newLucene(bits));
///
/// 
/// Not all collectors will need to rebase the docID. For example, a collector that simply counts the /// total number of hits would skip it. /// /// NOTE: Prior to 2.9, Lucene silently filtered out hits with score <= 0. As of 2.9, the core Collectors /// no longer do that. It's very unusual to have such hits (a negative query boost, or function query /// returning negative custom scores, could cause it to happen). If you need that behavior, use {@link /// PositiveScoresOnlyCollector}. class LPPAPI Collector : public LuceneObject { public: virtual ~Collector(); LUCENE_CLASS(Collector); public: /// Called before successive calls to {@link #collect(int32_t)}. Implementations that need the score /// of the current document (passed-in to {@link #collect(int32_t)}), should save the passed-in Scorer /// and call scorer.score() when needed. virtual void setScorer(const ScorerPtr& scorer) = 0; /// Called once for every document matching a query, with the unbased document number. /// /// Note: This is called in an inner search loop. For good search performance, implementations of this /// method should not call {@link Searcher#doc(int32_t)} or {@link IndexReader#document(int32_t)} on /// every hit. Doing so can slow searches by an order of magnitude or more. virtual void collect(int32_t doc) = 0; /// Called before collecting from each IndexReader. All doc ids in {@link #collect(int32_t)} will /// correspond to reader. Add docBase to the current IndexReaders internal document id to re-base ids /// in {@link #collect(int32_t)}. /// @param reader next IndexReader /// @param docBase virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase) = 0; /// Return true if this collector does not require the matching docIDs to be delivered in int sort /// order (smallest to largest) to {@link #collect}. /// /// Most Lucene Query implementations will visit matching docIDs in order. However, some queries /// (currently limited to certain cases of {@link BooleanQuery}) can achieve faster searching if the /// Collector allows them to deliver the docIDs out of order. /// /// Many collectors don't mind getting docIDs out of order, so it's important to return true here. virtual bool acceptsDocsOutOfOrder() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ComplexExplanation.h000066400000000000000000000024571456444476200243470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef COMPLEXEXPLANATION_H #define COMPLEXEXPLANATION_H #include "Explanation.h" namespace Lucene { /// Describes the score computation for document and query, and can distinguish a match independent /// of a positive value. class LPPAPI ComplexExplanation : public Explanation { public: ComplexExplanation(bool match = false, double value = 0, const String& description = EmptyString); virtual ~ComplexExplanation(); LUCENE_CLASS(ComplexExplanation); protected: bool match; public: /// The match status of this explanation node. bool getMatch(); /// Sets the match status assigned to this explanation node. void setMatch(bool match); /// Indicates whether or not this Explanation models a good match. /// /// If the match status is explicitly set this method uses it; otherwise it defers to the /// superclass. /// /// @see #getMatch virtual bool isMatch(); protected: virtual String getSummary(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CompoundFileReader.h000066400000000000000000000075431456444476200242450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef COMPOUNDFILEREADER_H #define COMPOUNDFILEREADER_H #include "Directory.h" #include "BufferedIndexInput.h" namespace Lucene { /// Class for accessing a compound stream. /// This class implements a directory, but is limited to only read operations. /// Directory methods that would normally modify data throw an exception. class LPPAPI CompoundFileReader : public Directory { public: CompoundFileReader(const DirectoryPtr& dir, const String& name); CompoundFileReader(const DirectoryPtr& dir, const String& name, int32_t readBufferSize); virtual ~CompoundFileReader(); LUCENE_CLASS(CompoundFileReader); protected: struct FileEntry { FileEntry(int64_t offset = 0, int64_t length = 0) { this->offset = offset; this->length = length; } int64_t offset; int64_t length; }; typedef boost::shared_ptr FileEntryPtr; typedef HashMap MapStringFileEntryPtr; DirectoryPtr directory; String fileName; int32_t readBufferSize; IndexInputPtr stream; MapStringFileEntryPtr entries; protected: void ConstructReader(const DirectoryPtr& dir, const String& name, int32_t readBufferSize); public: DirectoryPtr getDirectory(); String getName(); virtual void close(); virtual IndexInputPtr openInput(const String& name); virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); /// Returns an array of strings, one for each file in the directory. virtual HashSet listAll(); /// Returns true if a file with the given name exists. virtual bool fileExists(const String& name); /// Returns the time the compound file was last modified. virtual uint64_t fileModified(const String& name); /// Set the modified time of the compound file to now. virtual void touchFile(const String& name); /// Not implemented virtual void deleteFile(const String& name); /// Not implemented virtual void renameFile(const String& from, const String& to); /// Returns the length of a file in the directory. virtual int64_t fileLength(const String& name); /// Not implemented virtual IndexOutputPtr createOutput(const String& name); /// Not implemented virtual LockPtr makeLock(const String& name); }; /// Implementation of an IndexInput that reads from a portion of the compound file. class LPPAPI CSIndexInput : public BufferedIndexInput { public: CSIndexInput(); CSIndexInput(const IndexInputPtr& base, int64_t fileOffset, int64_t length); CSIndexInput(const IndexInputPtr& base, int64_t fileOffset, int64_t length, int32_t readBufferSize); virtual ~CSIndexInput(); LUCENE_CLASS(CSIndexInput); public: IndexInputPtr base; int64_t fileOffset; int64_t _length; public: /// Closes the stream to further operations. virtual void close(); virtual int64_t length(); /// Returns a clone of this stream. virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); protected: /// Implements buffer refill. Reads bytes from the current position in the input. /// @param b the array to read bytes into /// @param offset the offset in the array to start storing bytes /// @param len the number of bytes to read virtual void readInternal(uint8_t* b, int32_t offset, int32_t length); /// Implements seek. Sets current position in this file, where the next {@link /// #readInternal(byte[],int,int)} will occur. virtual void seekInternal(int64_t pos); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CompoundFileWriter.h000066400000000000000000000050421456444476200243070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef COMPOUNDFILEWRITER_H #define COMPOUNDFILEWRITER_H #include "LuceneObject.h" namespace Lucene { /// Combines multiple files into a single compound file. /// The file format: /// VInt fileCount /// {Directory} /// fileCount entries with the following structure: /// int64_t dataOffset /// String fileName /// {File Data} /// fileCount entries with the raw data of the corresponding file /// /// The fileCount integer indicates how many files are contained in this compound file. The {directory} /// that follows has that many entries. Each directory entry contains a long pointer to the start of /// this file's data section, and a string with that file's name. class LPPAPI CompoundFileWriter : public LuceneObject { public: CompoundFileWriter(const DirectoryPtr& dir, const String& name, const CheckAbortPtr& checkAbort = CheckAbortPtr()); virtual ~CompoundFileWriter(); LUCENE_CLASS(CompoundFileWriter); protected: struct FileEntry { /// source file String file; /// temporary holder for the start of directory entry for this file int64_t directoryOffset; /// temporary holder for the start of this file's data section int64_t dataOffset; }; DirectoryWeakPtr _directory; String fileName; HashSet ids; Collection entries; bool merged; CheckAbortPtr checkAbort; public: /// Returns the directory of the compound file. DirectoryPtr getDirectory(); /// Returns the name of the compound file. String getName(); /// Add a source stream. file is the string by which the sub-stream will be known in the /// compound stream. void addFile(const String& file); /// Merge files with the extensions added up to now. All files with these extensions are /// combined sequentially into the compound stream. After successful merge, the source /// are deleted.files void close(); protected: /// Copy the contents of the file with specified extension into the provided output stream. /// Use the provided buffer for moving data to reduce memory allocation. void copyFile(const FileEntry& source, const IndexOutputPtr& os, ByteArray buffer); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CompressionTools.h000066400000000000000000000033711456444476200240530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef COMPRESSIONTOOLS_H #define COMPRESSIONTOOLS_H #include "LuceneObject.h" namespace Lucene { /// Simple utility class providing static methods to compress and decompress binary data for stored fields. class LPPAPI CompressionTools : public LuceneObject { public: virtual ~CompressionTools(); LUCENE_CLASS(CompressionTools); public: /// Compresses the specified byte range using the specified compressionLevel static ByteArray compress(uint8_t* value, int32_t offset, int32_t length, int32_t compressionLevel); /// Compresses the specified byte range, with default BEST_COMPRESSION level static ByteArray compress(uint8_t* value, int32_t offset, int32_t length); /// Compresses all bytes in the array, with default BEST_COMPRESSION level static ByteArray compress(ByteArray value); /// Compresses the String value, with default BEST_COMPRESSION level static ByteArray compressString(const String& value); /// Compresses the String value using the specified compressionLevel static ByteArray compressString(const String& value, int32_t compressionLevel); /// Decompress the byte array previously returned by compress static ByteArray decompress(ByteArray value); /// Decompress the byte array previously returned by compressString back into a String static String decompressString(ByteArray value); protected: static const int32_t COMPRESS_BUFFER; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ConcurrentMergeScheduler.h000066400000000000000000000062261456444476200254740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CONCURRENTMERGESCHEDULER_H #define CONCURRENTMERGESCHEDULER_H #include "MergeScheduler.h" namespace Lucene { /// A {@link MergeScheduler} that runs each merge using a separate thread, up until a /// maximum number of threads ({@link #setMaxThreadCount}) at which when a merge is needed, /// the thread(s) that are updating the index will pause until one or more merges completes. /// This is a simple way to use concurrency in the indexing process without having to create /// and manage application level threads. class LPPAPI ConcurrentMergeScheduler : public MergeScheduler { public: ConcurrentMergeScheduler(); virtual ~ConcurrentMergeScheduler(); LUCENE_CLASS(ConcurrentMergeScheduler); protected: int32_t mergeThreadPriority; SetMergeThread mergeThreads; /// Max number of threads allowed to be merging at once int32_t maxThreadCount; DirectoryPtr dir; bool closed; IndexWriterWeakPtr _writer; static Collection allInstances; bool suppressExceptions; static bool anyExceptions; public: virtual void initialize(); /// Sets the max # simultaneous threads that may be running. If a merge is necessary yet /// we already have this many threads running, the incoming thread (that is calling /// add/updateDocument) will block until a merge thread has completed. virtual void setMaxThreadCount(int32_t count); /// Get the max # simultaneous threads that may be running. @see #setMaxThreadCount. virtual int32_t getMaxThreadCount(); /// Return the priority that merge threads run at. By default the priority is 1 plus the /// priority of (ie, slightly higher priority than) the first thread that calls merge. virtual int32_t getMergeThreadPriority(); /// Set the priority that merge threads run at. virtual void setMergeThreadPriority(int32_t pri); virtual void close(); virtual void sync(); virtual void merge(const IndexWriterPtr& writer); /// Used for testing static bool anyUnhandledExceptions(); static void clearUnhandledExceptions(); /// Used for testing void setSuppressExceptions(); void clearSuppressExceptions(); /// Used for testing static void setTestMode(); protected: virtual bool verbose(); virtual void message(const String& message); virtual void initMergeThreadPriority(); virtual int32_t mergeThreadCount(); /// Does the actual merge, by calling {@link IndexWriter#merge} virtual void doMerge(const OneMergePtr& merge); virtual MergeThreadPtr getMergeThread(const IndexWriterPtr& writer, const OneMergePtr& merge); /// Called when an exception is hit in a background merge thread virtual void handleMergeException(const LuceneException& exc); virtual void addMyself(); friend class MergeThread; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ConjunctionScorer.h000066400000000000000000000017431456444476200242010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CONJUNCTIONSCORER_H #define CONJUNCTIONSCORER_H #include "Scorer.h" namespace Lucene { /// Scorer for conjunctions, sets of queries, all of which are required. class ConjunctionScorer : public Scorer { public: ConjunctionScorer(const SimilarityPtr& similarity, Collection scorers); virtual ~ConjunctionScorer(); LUCENE_CLASS(ConjunctionScorer); protected: Collection scorers; double coord; int32_t lastDoc; public: virtual int32_t advance(int32_t target); virtual int32_t docID(); virtual int32_t nextDoc(); virtual double score(); protected: int32_t doNext(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ConstantScoreQuery.h000066400000000000000000000026631456444476200243470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CONSTANTSCOREQUERY_H #define CONSTANTSCOREQUERY_H #include "Query.h" #include "Weight.h" #include "Scorer.h" namespace Lucene { /// A query that wraps a filter and simply returns a constant score equal to the query boost for every /// document in the filter. class LPPAPI ConstantScoreQuery : public Query { public: ConstantScoreQuery(const FilterPtr& filter); virtual ~ConstantScoreQuery(); LUCENE_CLASS(ConstantScoreQuery); protected: FilterPtr filter; public: using Query::toString; /// Returns the encapsulated filter FilterPtr getFilter(); virtual QueryPtr rewrite(const IndexReaderPtr& reader); virtual void extractTerms(SetTerm terms); virtual WeightPtr createWeight(const SearcherPtr& searcher); /// Prints a user-readable version of this query. virtual String toString(const String& field); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); friend class ConstantWeight; friend class ConstantScorer; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Constants.h000066400000000000000000000051751456444476200225110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CONSTANTS_H #define CONSTANTS_H #include "Lucene.h" namespace Lucene { /// Some useful Lucene constants. class LPPAPI Constants { private: Constants(); public: virtual ~Constants(); public: static String OS_NAME; static String LUCENE_MAIN_VERSION; static String LUCENE_VERSION; }; /// Use by certain classes to match version compatibility across releases of Lucene. /// /// WARNING: When changing the version parameter that you supply to components in Lucene, do not simply /// change the version at search-time, but instead also adjust your indexing code to match, and re-index. class LPPAPI LuceneVersion { private: LuceneVersion(); public: virtual ~LuceneVersion(); public: enum Version { /// Match settings and bugs in Lucene's 2.0 release. LUCENE_20 = 0, /// Match settings and bugs in Lucene's 2.1 release. LUCENE_21, /// Match settings and bugs in Lucene's 2.2 release. LUCENE_22, /// Match settings and bugs in Lucene's 2.3 release. LUCENE_23, /// Match settings and bugs in Lucene's 2.4 release. LUCENE_24, /// Match settings and bugs in Lucene's 2.9 release. LUCENE_29, /// Match settings and bugs in Lucene's 3.0 release. /// /// Use this to get the latest & greatest settings, bug fixes, etc, for Lucene. LUCENE_30, /// Add new constants for later versions **here** to respect order! /// Warning: If you use this setting, and then upgrade to a newer release of Lucene, /// sizable changes may happen. If backwards compatibility is important then you /// should instead explicitly specify an actual version. /// /// If you use this constant then you may need to re-index all of your documents /// when upgrading Lucene, as the way text is indexed may have changed. Additionally, /// you may need to re-test your entire application to ensure it behaves as /// expected, as some defaults may have changed and may break functionality in your /// application. /// /// Deprecated: Use an actual version instead. LUCENE_CURRENT }; public: static bool onOrAfter(LuceneVersion::Version first, LuceneVersion::Version second); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CustomScoreProvider.h000066400000000000000000000104641456444476200245130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CUSTOMSCOREPROVIDER_H #define CUSTOMSCOREPROVIDER_H #include "LuceneObject.h" namespace Lucene { /// An instance of this subclass should be returned by {@link CustomScoreQuery#getCustomScoreProvider}, /// if you want to modify the custom score calculation of a {@link CustomScoreQuery}. /// /// Since Lucene 2.9, queries operate on each segment of an Index separately, so overriding the similar /// (now deprecated) methods in {@link CustomScoreQuery} is no longer suitable, as the supplied doc ID /// is per-segment and without knowledge of the IndexReader you cannot access the document or {@link /// FieldCache}. class LPPAPI CustomScoreProvider : public LuceneObject { public: /// Creates a new instance of the provider class for the given {@link IndexReader}. CustomScoreProvider(const IndexReaderPtr& reader); virtual ~CustomScoreProvider(); LUCENE_CLASS(CustomScoreProvider); protected: IndexReaderPtr reader; public: /// Compute a custom score by the subQuery score and a number of ValueSourceQuery scores. /// /// Subclasses can override this method to modify the custom score. /// /// If your custom scoring is different than the default herein you should override at least one of /// the two customScore() methods. If the number of ValueSourceQueries is always < 2 it is /// sufficient to override the other {@link #customScore(int32_t, double, double) customScore()} /// method, which is simpler. /// /// The default computation herein is a multiplication of given scores: ///
    /// ModifiedScore = valSrcScore * valSrcScores[0] * valSrcScores[1] * ...
    /// 
/// /// @param doc id of scored doc. /// @param subQueryScore score of that doc by the subQuery. /// @param valSrcScores scores of that doc by the ValueSourceQuery. /// @return custom score. virtual double customScore(int32_t doc, double subQueryScore, Collection valSrcScores); /// Compute a custom score by the subQuery score and the ValueSourceQuery score. /// /// Subclasses can override this method to modify the custom score. /// /// If your custom scoring is different than the default herein you should override at least one of the /// two customScore() methods. If the number of ValueSourceQueries is always < 2 it is sufficient to /// override this customScore() method, which is simpler. /// /// The default computation herein is a multiplication of the two scores: ///
    /// ModifiedScore = subQueryScore * valSrcScore
    /// 
/// /// @param doc id of scored doc. /// @param subQueryScore score of that doc by the subQuery. /// @param valSrcScore score of that doc by the ValueSourceQuery. /// @return custom score. virtual double customScore(int32_t doc, double subQueryScore, double valSrcScore); /// Explain the custom score. Whenever overriding {@link #customScore(int32_t, double, Collection)}, /// this method should also be overridden to provide the correct explanation for the part of the custom scoring. /// /// @param doc doc being explained. /// @param subQueryExpl explanation for the sub-query part. /// @param valSrcExpls explanation for the value source part. /// @return an explanation for the custom score virtual ExplanationPtr customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, Collection valSrcExpls); /// Explain the custom score. Whenever overriding {@link #customScore(int32_t, double, double)}, /// this method should also be overridden to provide the correct explanation for the part of the custom scoring. /// @param doc doc being explained. /// @param subQueryExpl explanation for the sub-query part. /// @param valSrcExpl explanation for the value source part. /// @return an explanation for the custom score virtual ExplanationPtr customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, const ExplanationPtr& valSrcExpl); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CustomScoreQuery.h000066400000000000000000000142231456444476200240230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CUSTOMSCOREQUERY_H #define CUSTOMSCOREQUERY_H #include "Query.h" namespace Lucene { /// Query that sets document score as a programmatic function of several (sub) scores: ///
    ///
  1. the score of its subQuery (any query) ///
  2. (optional) the score of its ValueSourceQuery (or queries). For most simple/convenient use cases /// this query is likely to be a {@link FieldScoreQuery} ///
/// Subclasses can modify the computation by overriding {@link #getCustomScoreProvider}. class LPPAPI CustomScoreQuery : public Query { public: /// Create a CustomScoreQuery over input subQuery. /// @param subQuery the sub query whose scored is being customed. Must not be null. CustomScoreQuery(const QueryPtr& subQuery); /// Create a CustomScoreQuery over input subQuery and a {@link ValueSourceQuery}. /// @param subQuery the sub query whose score is being customized. Must not be null. /// @param valSrcQuery a value source query whose scores are used in the custom score computation. For /// most simple/convenient use case this would be a {@link FieldScoreQuery}. This parameter is /// optional - it can be null. CustomScoreQuery(const QueryPtr& subQuery, const ValueSourceQueryPtr& valSrcQuery); /// Create a CustomScoreQuery over input subQuery and a {@link ValueSourceQuery}. /// @param subQuery the sub query whose score is being customized. Must not be null. /// @param valSrcQueries value source queries whose scores are used in the custom score computation. /// For most simple/convenient use case these would be {@link FieldScoreQueries}. This parameter is /// optional - it can be null or even an empty array. CustomScoreQuery(const QueryPtr& subQuery, Collection valSrcQueries); virtual ~CustomScoreQuery(); LUCENE_CLASS(CustomScoreQuery); protected: QueryPtr subQuery; Collection valSrcQueries; // never null (empty array if there are no valSrcQueries). bool strict; // if true, valueSource part of query does not take part in weights normalization. public: using Query::toString; virtual QueryPtr rewrite(const IndexReaderPtr& reader); virtual void extractTerms(SetTerm terms); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual String toString(const String& field); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); /// Compute a custom score by the subQuery score and a number of ValueSourceQuery scores. /// /// Deprecated: Will be removed in Lucene 3.1. /// /// The doc is relative to the current reader, which is unknown to CustomScoreQuery when using per-segment /// search (since Lucene 2.9). /// Please override {@link #getCustomScoreProvider} and return a subclass of {@link CustomScoreProvider} /// for the given {@link IndexReader}. virtual double customScore(int32_t doc, double subQueryScore, Collection valSrcScores); /// Compute a custom score by the subQuery score and the ValueSourceQuery score. /// /// Deprecated: Will be removed in Lucene 3.1. /// /// The doc is relative to the current reader, which is unknown to CustomScoreQuery when using per-segment /// search (since Lucene 2.9). /// Please override {@link #getCustomScoreProvider} and return a subclass of {@link CustomScoreProvider} /// for the given {@link IndexReader}. virtual double customScore(int32_t doc, double subQueryScore, double valSrcScore); /// Explain the custom score. /// /// Deprecated: Will be removed in Lucene 3.1. /// /// The doc is relative to the current reader, which is unknown to CustomScoreQuery when using per-segment /// search (since Lucene 2.9). /// Please override {@link #getCustomScoreProvider} and return a subclass of {@link CustomScoreProvider} /// for the given {@link IndexReader}. virtual ExplanationPtr customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, Collection valSrcExpls); /// Explain the custom score. /// /// Deprecated Will be removed in Lucene 3.1. /// /// The doc is relative to the current reader, which is unknown to CustomScoreQuery when using per-segment /// search (since Lucene 2.9). /// Please override {@link #getCustomScoreProvider} and return a subclass of {@link CustomScoreProvider} /// for the given {@link IndexReader}. virtual ExplanationPtr customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, const ExplanationPtr& valSrcExpl); virtual WeightPtr createWeight(const SearcherPtr& searcher); /// Checks if this is strict custom scoring. In strict custom scoring, the ValueSource part does not /// participate in weight normalization. This may be useful when one wants full control over how scores /// are modified, and does not care about normalizing by the ValueSource part. One particular case where /// this is useful if for testing this query. /// /// Note: only has effect when the ValueSource part is not null. virtual bool isStrict(); /// Set the strict mode of this query. /// @param strict The strict mode to set. /// @see #isStrict() virtual void setStrict(bool strict); /// A short name of this query, used in {@link #toString(String)}. virtual String name(); protected: void ConstructQuery(const QueryPtr& subQuery, Collection valSrcQueries); /// Returns a {@link CustomScoreProvider} that calculates the custom scores for the given {@link /// IndexReader}. The default implementation returns a default implementation as specified in /// the docs of {@link CustomScoreProvider}. virtual CustomScoreProviderPtr getCustomScoreProvider(const IndexReaderPtr& reader); friend class CustomWeight; friend class CustomScorer; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/CycleCheck.h000066400000000000000000000022151456444476200225220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CYCLECHECK_H #define CYCLECHECK_H #include "Lucene.h" namespace Lucene { /// Debug utility to track shared_ptr utilization. class LPPAPI CycleCheck { public: virtual ~CycleCheck(); protected: static MapStringInt cycleMap; static Set staticRefs; protected: void addRef(const String& className, int32_t ref); static void addStatic(LuceneObjectPtr* staticRef); public: template static void addStatic(TYPE& staticRef) { addStatic(reinterpret_cast(&staticRef)); } static void dumpRefs(); }; template class CycleCheckT : public CycleCheck { public: CycleCheckT() { addRef(TYPE::_getClassName(), 1); } virtual ~CycleCheckT() { addRef(TYPE::_getClassName(), -1); } }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DateField.h000066400000000000000000000045311456444476200223510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DATEFIELD_H #define DATEFIELD_H #include "LuceneObject.h" namespace Lucene { /// Provides support for converting dates to strings and vice-versa. The strings are structured so that /// lexicographic sorting orders by date, which makes them suitable for use as field values and search terms. /// /// Note that this class saves dates with millisecond granularity, which is bad for {@link TermRangeQuery} and /// {@link PrefixQuery}, as those queries are expanded to a BooleanQuery with a potentially large number of terms /// when searching. Thus you might want to use {@link DateTools} instead. /// /// Note: dates before 1970 cannot be used, and therefore cannot be indexed when using this class. See {@link /// DateTools} for an alternative without such a limitation. /// /// Another approach is {@link NumericUtils}, which provides a sortable binary representation (prefix encoded) /// of numeric values, which date/time are. For indexing a {@link Date} or {@link Calendar}, just get the unix /// timestamp as long using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and index this as a numeric /// value with {@link NumericField} and use {@link NumericRangeQuery} to query it. /// /// @deprecated If you build a new index, use {@link DateTools} or {@link NumericField} instead. This class is /// included for use with existing indices and will be removed in a future release (possibly Lucene 4.0). class LPPAPI DateField : public LuceneObject { public: virtual ~DateField(); LUCENE_CLASS(DateField); protected: static int32_t DATE_LEN(); public: static const String& MIN_DATE_STRING(); static const String& MAX_DATE_STRING(); /// Converts a Date to a string suitable for indexing. static String dateToString(const boost::posix_time::ptime& date); /// Converts a millisecond time to a string suitable for indexing. static String timeToString(int64_t time); /// Converts a string-encoded date into a millisecond time. static int64_t stringToTime(const String& s); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DateTools.h000066400000000000000000000115701456444476200224270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DATETOOLS_H #define DATETOOLS_H #include "LuceneObject.h" namespace Lucene { /// Provides support for converting dates to strings and vice-versa. The strings are structured so that /// lexicographic sorting orders them by date, which makes them suitable for use as field values and search /// terms. /// /// This class also helps you to limit the resolution of your dates. Do not save dates with a finer resolution /// than you really need, as then RangeQuery and PrefixQuery will require more memory and become slower. /// /// Compared to {@link DateField} the strings generated by the methods in this class take slightly more space, /// unless your selected resolution is set to Resolution.DAY or lower. /// /// Another approach is {@link NumericUtils}, which provides a sortable binary representation (prefix encoded) /// of numeric values, which date/time are. For indexing a {@link Date} or {@link Calendar}, just get the unix /// timestamp as long using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and index this as a numeric /// value with {@link NumericField} and use {@link NumericRangeQuery} to query it. class LPPAPI DateTools : public LuceneObject { public: virtual ~DateTools(); LUCENE_CLASS(DateTools); public: enum Resolution { RESOLUTION_NULL, RESOLUTION_YEAR, RESOLUTION_MONTH, RESOLUTION_DAY, RESOLUTION_HOUR, RESOLUTION_MINUTE, RESOLUTION_SECOND, RESOLUTION_MILLISECOND }; enum DateOrder { DATEORDER_LOCALE, DATEORDER_YMD, DATEORDER_DMY, DATEORDER_MDY }; protected: static DateOrder dateOrder; public: /// Converts a Date to a string suitable for indexing. /// @param date the date to be converted /// @param resolution the desired resolution /// @return a string in format yyyyMMddHHmmssSSS or shorter, depending on resolution; using GMT as timezone static String dateToString(const boost::posix_time::ptime& date, Resolution resolution); /// Converts a millisecond time to a string suitable for indexing. /// @param time the date expressed as milliseconds since January 1, 1970, 00:00:00 GMT /// @param resolution the desired resolution /// @return a string in format yyyyMMddHHmmssSSS or shorter, depending on resolution; using GMT as timezone static String timeToString(int64_t time, Resolution resolution); /// Converts a string produced by timeToString or dateToString back to a time, represented as the number of /// milliseconds since January 1, 1970, 00:00:00 GMT. /// @param dateString the date string to be converted /// @return the number of milliseconds since January 1, 1970, 00:00:00 GMT static int64_t stringToTime(const String& dateString); /// Converts a string produced by timeToString or dateToString back to a time, represented as a ptime object. /// @param dateString the date string to be converted /// @return the parsed time as a ptime object static boost::posix_time::ptime stringToDate(const String& dateString); /// Limit a date's resolution. For example, the date 2004-09-21 13:50:11 will be changed to 2004-09-01 00:00:00 /// when using Resolution.MONTH. /// @param resolution The desired resolution of the date to be returned /// @return the date with all values more precise than resolution set to 0 or 1 static boost::posix_time::ptime round(const boost::posix_time::ptime& date, Resolution resolution); /// Limit a date's resolution. For example, the date 1095767411000 (which represents 2004-09-21 13:50:11) will /// be changed to 1093989600000 (2004-09-01 00:00:00) when using Resolution.MONTH. /// @param resolution The desired resolution of the date to be returned /// @return the date with all values more precise than resolution set to 0 or 1, expressed as milliseconds /// since January 1, 1970, 00:00:00 GMT static int64_t round(int64_t time, Resolution resolution); /// Allow overriding of date ordering. static void setDateOrder(DateTools::DateOrder order); /// Return date ordering based on given locale (or overridden in {@link #setDateOrder(DateTools::DateOrder)}). static DateTools::DateOrder getDateOrder(std::locale locale = std::locale()); /// Parse a given date using locale date format /// @param dateString the date string to be converted /// @param locale the locale to use for parsing /// @return the parsed time as a ptime object static boost::posix_time::ptime parseDate(const String& dateString, std::locale locale = std::locale()); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DefaultSimilarity.h000066400000000000000000000037661456444476200241740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DEFAULTSIMILARITY_H #define DEFAULTSIMILARITY_H #include "Similarity.h" namespace Lucene { /// Default scoring implementation. class LPPAPI DefaultSimilarity : public Similarity { public: DefaultSimilarity(); virtual ~DefaultSimilarity(); LUCENE_CLASS(DefaultSimilarity); protected: bool discountOverlaps; // Default false public: /// Implemented as state->getBoost() * lengthNorm(numTerms), where numTerms is {@link /// FieldInvertState#getLength()} if {@link #setDiscountOverlaps} is false, else it's {@link /// FieldInvertState#getLength()} - {@link FieldInvertState#getNumOverlap()}. virtual double computeNorm(const String& fieldName, const FieldInvertStatePtr& state); /// Implemented as 1 / sqrt(numTerms). virtual double lengthNorm(const String& fieldName, int32_t numTokens); /// Implemented as 1 / sqrt(sumOfSquaredWeights). virtual double queryNorm(double sumOfSquaredWeights); /// Implemented as sqrt(freq). virtual double tf(double freq); /// Implemented as 1 / (distance + 1). virtual double sloppyFreq(int32_t distance); /// Implemented as log(numDocs / (docFreq + 1)) + 1. virtual double idf(int32_t docFreq, int32_t numDocs); /// Implemented as overlap / maxOverlap. virtual double coord(int32_t overlap, int32_t maxOverlap); /// Determines whether overlap tokens (Tokens with 0 position increment) are ignored when computing /// norm. By default this is false, meaning overlap tokens are counted just like non-overlap tokens. /// @see #computeNorm void setDiscountOverlaps(bool v); /// @see #setDiscountOverlaps bool getDiscountOverlaps(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DefaultSkipListReader.h000066400000000000000000000041361456444476200247230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DEFAULTSKIPLISTREADER_H #define DEFAULTSKIPLISTREADER_H #include "MultiLevelSkipListReader.h" namespace Lucene { /// Implements the skip list reader for the default posting list format that stores positions and payloads. class DefaultSkipListReader : public MultiLevelSkipListReader { public: DefaultSkipListReader(const IndexInputPtr& skipStream, int32_t maxSkipLevels, int32_t skipInterval); virtual ~DefaultSkipListReader(); LUCENE_CLASS(DefaultSkipListReader); protected: bool currentFieldStoresPayloads; Collection freqPointer; Collection proxPointer; Collection payloadLength; int64_t lastFreqPointer; int64_t lastProxPointer; int32_t lastPayloadLength; public: void init(int64_t skipPointer, int64_t freqBasePointer, int64_t proxBasePointer, int32_t df, bool storesPayloads); /// Returns the freq pointer of the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)} /// has skipped. int64_t getFreqPointer(); /// Returns the prox pointer of the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)} /// has skipped. int64_t getProxPointer(); /// Returns the payload length of the payload stored just before the doc to which the last call of {@link /// MultiLevelSkipListReader#skipTo(int)} has skipped. int32_t getPayloadLength(); protected: /// Seeks the skip entry on the given level virtual void seekChild(int32_t level); /// Copies the values of the last read skip entry on this level virtual void setLastSkipData(int32_t level); /// Subclasses must implement the actual skip data encoding in this method. virtual int32_t readSkipData(int32_t level, const IndexInputPtr& skipStream); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DefaultSkipListWriter.h000066400000000000000000000032521456444476200247730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DEFAULTSKIPLISTWRITER_H #define DEFAULTSKIPLISTWRITER_H #include "MultiLevelSkipListWriter.h" namespace Lucene { /// Implements the skip list writer for the default posting list format that stores positions and payloads. class DefaultSkipListWriter : public MultiLevelSkipListWriter { public: DefaultSkipListWriter(int32_t skipInterval, int32_t numberOfSkipLevels, int32_t docCount, const IndexOutputPtr& freqOutput, const IndexOutputPtr& proxOutput); virtual ~DefaultSkipListWriter(); LUCENE_CLASS(DefaultSkipListWriter); protected: Collection lastSkipDoc; Collection lastSkipPayloadLength; Collection lastSkipFreqPointer; Collection lastSkipProxPointer; IndexOutputPtr freqOutput; IndexOutputPtr proxOutput; int32_t curDoc; bool curStorePayloads; int32_t curPayloadLength; int64_t curFreqPointer; int64_t curProxPointer; public: void setFreqOutput(const IndexOutputPtr& freqOutput); void setProxOutput(const IndexOutputPtr& proxOutput); /// Sets the values for the current skip data. void setSkipData(int32_t doc, bool storePayloads, int32_t payloadLength); protected: virtual void resetSkip(); virtual void writeSkipData(int32_t level, const IndexOutputPtr& skipBuffer); friend class FormatPostingsTermsWriter; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Directory.h000066400000000000000000000112001456444476200224630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DIRECTORY_H #define DIRECTORY_H #include "LuceneObject.h" namespace Lucene { /// A Directory is a flat list of files. Files may be written once, when they are created. Once a file /// is created it may only be opened for read, or deleted. Random access is permitted both when reading /// and writing. Directory locking is implemented by an instance of {@link LockFactory}, and can be changed /// for each Directory instance using {@link #setLockFactory}. class LPPAPI Directory : public LuceneObject { public: Directory(); virtual ~Directory(); LUCENE_CLASS(Directory); protected: bool isOpen; /// Holds the LockFactory instance (implements locking for this Directory instance). LockFactoryPtr lockFactory; public: /// Returns an array of strings, one for each file in the directory. virtual HashSet listAll() = 0; /// Returns true if a file with the given name exists. virtual bool fileExists(const String& name) = 0; /// Returns the time the named file was last modified. virtual uint64_t fileModified(const String& name) = 0; /// Set the modified time of an existing file to now. virtual void touchFile(const String& name) = 0; /// Removes an existing file in the directory. virtual void deleteFile(const String& name) = 0; /// Returns the length of a file in the directory. virtual int64_t fileLength(const String& name) = 0; /// Creates a new, empty file in the directory with the given name. /// Returns a stream writing this file. virtual IndexOutputPtr createOutput(const String& name) = 0; /// Returns a stream reading an existing file. virtual IndexInputPtr openInput(const String& name) = 0; /// Closes the store. virtual void close() = 0; /// Ensure that any writes to this file are moved to stable storage. Lucene uses this to properly commit /// changes to the index, to prevent a machine/OS crash from corrupting the index. virtual void sync(const String& name); /// Returns a stream reading an existing file, with the specified read buffer size. The particular Directory /// implementation may ignore the buffer size. Currently the only Directory implementations that respect /// this parameter are {@link FSDirectory} and {@link CompoundFileReader}. virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); /// Construct a {@link Lock}. /// @param name the name of the lock file. virtual LockPtr makeLock(const String& name); /// Attempt to clear (forcefully unlock and remove) the specified lock. Only call this at a time when you /// are certain this lock is no longer in use. /// @param name name of the lock to be cleared. void clearLock(const String& name); /// Set the LockFactory that this Directory instance should use for its locking implementation. Each * instance /// of LockFactory should only be used for one directory (ie, do not share a single instance across multiple /// Directories). /// @param lockFactory instance of {@link LockFactory}. void setLockFactory(const LockFactoryPtr& lockFactory); /// Get the LockFactory that this Directory instance is using for its locking implementation. Note that this /// may be null for Directory implementations that provide their own locking implementation. LockFactoryPtr getLockFactory(); /// Return a string identifier that uniquely differentiates this Directory instance from other Directory /// instances. This ID should be the same if two Directory instances are considered "the same index". /// This is how locking "scopes" to the right index. virtual String getLockID(); virtual String toString(); /// Copy contents of a directory src to a directory dest. If a file in src already exists in dest then the one /// in dest will be blindly overwritten. NOTE: the source directory cannot change while this method is running. /// Otherwise the results are undefined. /// @param src source directory. /// @param dest destination directory. /// @param closeDirSrc if true, call {@link #close()} method on source directory. static void copy(const DirectoryPtr& src, const DirectoryPtr& dest, bool closeDirSrc); protected: /// @throws AlreadyClosed if this Directory is closed. void ensureOpen(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DirectoryReader.h000066400000000000000000000304711456444476200236210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DIRECTORYREADER_H #define DIRECTORYREADER_H #include "IndexReader.h" #include "TermEnum.h" #include "TermPositions.h" #include "IndexCommit.h" #include "SegmentMergeQueue.h" namespace Lucene { /// An IndexReader which reads indexes with multiple segments. class LPPAPI DirectoryReader : public IndexReader { public: /// Construct reading the named set of readers. DirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& sis, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor); /// Used by near real-time search. DirectoryReader(const IndexWriterPtr& writer, const SegmentInfosPtr& infos, int32_t termInfosIndexDivisor); /// This constructor is only used for {@link #reopen()} DirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& infos, Collection oldReaders, Collection oldStarts, MapStringByteArray oldNormsCache, bool readOnly, bool doClone, int32_t termInfosIndexDivisor); virtual ~DirectoryReader(); LUCENE_CLASS(DirectoryReader); protected: DirectoryPtr _directory; bool readOnly; IndexWriterWeakPtr _writer; IndexDeletionPolicyPtr deletionPolicy; HashSet synced; LockPtr writeLock; SegmentInfosPtr segmentInfos; SegmentInfosPtr segmentInfosStart; bool stale; int32_t termInfosIndexDivisor; bool rollbackHasChanges; Collection subReaders; Collection starts; // 1st docno for each segment MapStringByteArray normsCache; int32_t _maxDoc; int32_t _numDocs; bool _hasDeletions; // Max version in index as of when we opened; this can be > our current segmentInfos version // in case we were opened on a past IndexCommit int64_t maxIndexVersion; public: void _initialize(Collection subReaders); static IndexReaderPtr open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, const IndexCommitPtr& commit, bool readOnly, int32_t termInfosIndexDivisor); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual LuceneObjectPtr clone(bool openReadOnly, const LuceneObjectPtr& other = LuceneObjectPtr()); virtual IndexReaderPtr reopen(); virtual IndexReaderPtr reopen(bool openReadOnly); virtual IndexReaderPtr reopen(const IndexCommitPtr& commit); /// Version number when this IndexReader was opened. virtual int64_t getVersion(); /// Return an array of term frequency vectors for the specified document. virtual Collection getTermFreqVectors(int32_t docNumber); /// Return a term frequency vector for the specified document and field. virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of the {@link TermFreqVector}. virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper); /// Map all the term vectors for all fields in a Document virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper); /// Checks is the index is optimized (if it has a single segment and no deletions). Not implemented in the IndexReader base class. /// @return true if the index is optimized; false otherwise virtual bool isOptimized(); /// Returns the number of documents in this index. virtual int32_t numDocs(); /// Returns one greater than the largest possible document number. virtual int32_t maxDoc(); /// Get the {@link Document} at the n'th position. The {@link FieldSelector} may be used to determine what {@link Field}s to load and how they should be loaded. virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector); /// Returns true if document n has been deleted virtual bool isDeleted(int32_t n); /// Returns true if any documents have been deleted virtual bool hasDeletions(); /// Find reader for doc n static int32_t readerIndex(int32_t n, Collection starts, int32_t numSubReaders); /// Returns true if there are norms stored for this field. virtual bool hasNorms(const String& field); /// Returns the byte-encoded normalization factor for the named field of every document. virtual ByteArray norms(const String& field); /// Reads the byte-encoded normalization factor for the named field of every document. virtual void norms(const String& field, ByteArray norms, int32_t offset); /// Returns an enumeration of all the terms in the index. virtual TermEnumPtr terms(); /// Returns an enumeration of all terms starting at a given term. virtual TermEnumPtr terms(const TermPtr& t); /// Returns the number of documents containing the term t. virtual int32_t docFreq(const TermPtr& t); /// Returns an unpositioned {@link TermDocs} enumerator. virtual TermDocsPtr termDocs(); /// Returns an unpositioned {@link TermPositions} enumerator. virtual TermPositionsPtr termPositions(); /// Tries to acquire the WriteLock on this directory. this method is only valid if this /// IndexReader is directory owner. virtual void acquireWriteLock(); void startCommit(); void rollbackCommit(); /// Retrieve the String userData optionally passed to IndexWriter#commit. virtual MapStringString getCommitUserData(); /// Check whether any new changes have occurred to the index since this reader was opened. virtual bool isCurrent(); /// Get a list of unique field names that exist in this index and have the specified field /// option information. virtual HashSet getFieldNames(FieldOption fieldOption); static HashSet getFieldNames(FieldOption fieldOption, Collection subReaders); /// Returns the sequential sub readers that this reader is logically composed of. virtual Collection getSequentialSubReaders(); /// Returns the directory this index resides in. virtual DirectoryPtr directory(); virtual int32_t getTermInfosIndexDivisor(); /// Return the IndexCommit that this reader has opened. virtual IndexCommitPtr getIndexCommit(); /// Returns all commit points that exist in the Directory. static Collection listCommits(const DirectoryPtr& dir); protected: IndexReaderPtr doReopenFromWriter(bool openReadOnly, const IndexCommitPtr& commit); IndexReaderPtr doReopen(bool openReadOnly, const IndexCommitPtr& commit); IndexReaderPtr doReopenNoWriter(bool openReadOnly, const IndexCommitPtr& commit); DirectoryReaderPtr doReopen(const SegmentInfosPtr& infos, bool doClone, bool openReadOnly); /// Implements deletion of the document numbered docNum. virtual void doDelete(int32_t docNum); /// Implements actual undeleteAll() in subclass. virtual void doUndeleteAll(); int32_t readerIndex(int32_t n); /// Implements setNorm in subclass. virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); /// Commit changes resulting from delete, undeleteAll, or setNorm operations /// /// If an exception is hit, then either no changes or all changes will have been committed to the index (transactional semantics). virtual void doCommit(MapStringString commitUserData); /// Implements close. virtual void doClose(); friend class FindSegmentsReopen; }; class MultiTermEnum : public TermEnum { public: MultiTermEnum(const IndexReaderPtr& topReader, Collection readers, Collection starts, const TermPtr& t); virtual ~MultiTermEnum(); LUCENE_CLASS(MultiTermEnum); protected: SegmentMergeQueuePtr queue; TermPtr _term; int32_t _docFreq; public: IndexReaderWeakPtr _topReader; Collection matchingSegments; // null terminated array of matching segments public: /// Increments the enumeration to the next element. True if one exists. virtual bool next(); /// Returns the current Term in the enumeration. virtual TermPtr term(); /// Returns the docFreq of the current Term in the enumeration. virtual int32_t docFreq(); /// Closes the enumeration to further activity, freeing resources. virtual void close(); }; class MultiTermDocs : public TermPositions, public LuceneObject { public: MultiTermDocs(const IndexReaderPtr& topReader, Collection r, Collection s); virtual ~MultiTermDocs(); LUCENE_CLASS(MultiTermDocs); protected: IndexReaderWeakPtr _topReader; // used for matching TermEnum to TermDocs Collection readers; Collection starts; TermPtr term; int32_t base; int32_t pointer; Collection readerTermDocs; TermDocsPtr current; MultiTermEnumPtr tenum; // the term enum used for seeking int32_t matchingSegmentPos; // position into the matching segments from tenum SegmentMergeInfoPtr smi; // current segment mere info public: /// Returns the current document number. virtual int32_t doc(); /// Returns the frequency of the term within the current document. virtual int32_t freq(); /// Sets this to the data for a term. virtual void seek(const TermPtr& term); /// Sets this to the data for the current term in a {@link TermEnum}. virtual void seek(const TermEnumPtr& termEnum); /// Moves to the next pair in the enumeration. virtual bool next(); /// Attempts to read multiple entries from the enumeration, up to length of docs. /// Optimized implementation. virtual int32_t read(Collection& docs, Collection& freqs); /// Skips entries to the first beyond the current whose document number is greater than or equal to target. virtual bool skipTo(int32_t target); /// Frees associated resources. virtual void close(); protected: virtual TermDocsPtr termDocs(int32_t i); virtual TermDocsPtr termDocs(const IndexReaderPtr& reader); }; class MultiTermPositions : public MultiTermDocs { public: MultiTermPositions(const IndexReaderPtr& topReader, Collection r, Collection s); virtual ~MultiTermPositions(); LUCENE_CLASS(MultiTermPositions); public: /// Returns next position in the current document. virtual int32_t nextPosition(); /// Returns the length of the payload at the current term position. virtual int32_t getPayloadLength(); /// Returns the payload data at the current term position. virtual ByteArray getPayload(ByteArray data, int32_t offset); /// Checks if a payload can be loaded at this position. virtual bool isPayloadAvailable(); protected: virtual TermDocsPtr termDocs(const IndexReaderPtr& reader); }; class ReaderCommit : public IndexCommit { public: ReaderCommit(const SegmentInfosPtr& infos, const DirectoryPtr& dir); virtual ~ReaderCommit(); LUCENE_CLASS(ReaderCommit); protected: String segmentsFileName; HashSet files; DirectoryPtr dir; int64_t generation; int64_t version; bool _isOptimized; MapStringString userData; public: virtual String toString(); /// Returns true if this commit is an optimized index. virtual bool isOptimized(); /// Two IndexCommits are equal if both their Directory and versions are equal. virtual String getSegmentsFileName(); /// Returns all index files referenced by this commit point. virtual HashSet getFileNames(); /// Returns the {@link Directory} for the index. virtual DirectoryPtr getDirectory(); /// Returns the version for this IndexCommit. virtual int64_t getVersion(); /// Returns the generation (the _N in segments_N) for this IndexCommit. virtual int64_t getGeneration(); virtual bool isDeleted(); /// Returns userData, previously passed to {@link IndexWriter#commit(Map)} for this commit. virtual MapStringString getUserData(); virtual void deleteCommit(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DisjunctionMaxQuery.h000066400000000000000000000107501456444476200245150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DISJUNCTIONMAXQUERY_H #define DISJUNCTIONMAXQUERY_H #include "Query.h" namespace Lucene { /// A query that generates the union of documents produced by its subqueries, and that scores each /// document with the maximum score for that document as produced by any subquery, plus a tie breaking /// increment for any additional matching subqueries. This is useful when searching for a word in /// multiple fields with different boost factors (so that the fields cannot be combined equivalently /// into a single search field). We want the primary score to be the one associated with the highest /// boost, not the sum of the field scores (as BooleanQuery would give). If the query is "albino /// elephant" this ensures that "albino" matching one field and "elephant" matching another gets a /// higher score than "albino" matching both fields. To get this result, use both BooleanQuery and /// DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in each field, while the /// set of these DisjunctionMaxQuery's is combined into a BooleanQuery. The tie breaker capability /// allows results that include the same term in multiple fields to be judged better than results that /// include this term in only the best of those multiple fields, without confusing this with the better /// case of two different terms in the multiple fields. class LPPAPI DisjunctionMaxQuery : public Query { public: /// Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries. /// @param tieBreakerMultiplier the score of each non-maximum disjunct for a document is multiplied /// by this weight and added into the final score. If non-zero, the value should be small, on the /// order of 0.1, which says that 10 occurrences of word in a lower-scored field that is also in a /// higher scored field is just as good as a unique word in the lower scored field (ie., one that is /// not in any higher scored field. DisjunctionMaxQuery(double tieBreakerMultiplier = 0.0); /// Creates a new DisjunctionMaxQuery /// @param disjuncts A Collection of all the disjuncts to add /// @param tieBreakerMultiplier The weight to give to each matching non-maximum disjunct DisjunctionMaxQuery(Collection disjuncts, double tieBreakerMultiplier); virtual ~DisjunctionMaxQuery(); LUCENE_CLASS(DisjunctionMaxQuery); protected: /// The subqueries Collection disjuncts; /// Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. double tieBreakerMultiplier; public: using Query::toString; /// Add a subquery to this disjunction /// @param query the disjunct added void add(const QueryPtr& query); /// Add a collection of disjuncts to this disjunction void add(Collection disjuncts); /// An iterator over the disjuncts Collection::iterator begin(); Collection::iterator end(); /// Create the Weight used to score us virtual WeightPtr createWeight(const SearcherPtr& searcher); /// Optimize our representation and our subqueries representations /// @param reader the IndexReader we query /// @return an optimized copy of us (which may not be a copy if there is nothing to optimize) virtual QueryPtr rewrite(const IndexReaderPtr& reader); /// Create a shallow copy of us - used in rewriting if necessary /// @return a copy of us (but reuse, don't copy, our subqueries) virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); /// Adds all terms occurring in this query to the terms set. virtual void extractTerms(SetTerm terms); /// Pretty print us. /// @param field the field to which we are applied /// @return a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost" virtual String toString(const String& field); /// @return true if other is a DisjunctionMaxQuery with the same boost and the same subqueries, in the /// same order, as us virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); friend class DisjunctionMaxWeight; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DisjunctionMaxScorer.h000066400000000000000000000045461456444476200246530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DISJUNCTIONMAXSCORER_H #define DISJUNCTIONMAXSCORER_H #include "Scorer.h" namespace Lucene { /// The Scorer for DisjunctionMaxQuery. The union of all documents generated by the the subquery scorers /// is generated in document number order. The score for each document is the maximum of the scores computed /// by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores /// for the other subqueries that generate the document. class DisjunctionMaxScorer : public Scorer { public: DisjunctionMaxScorer(double tieBreakerMultiplier, const SimilarityPtr& similarity, Collection subScorers, int32_t numScorers); virtual ~DisjunctionMaxScorer(); LUCENE_CLASS(DisjunctionMaxScorer); protected: /// The scorers for subqueries that have remaining docs, kept as a min heap by number of next doc. Collection subScorers; int32_t numScorers; /// Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. double tieBreakerMultiplier; int32_t doc; public: virtual int32_t nextDoc(); virtual int32_t docID(); /// Determine the current document score. Initially invalid, until {@link #next()} is called the first time. /// @return the score of the current generated document virtual double score(); virtual int32_t advance(int32_t target); protected: /// Recursively iterate all subScorers that generated last doc computing sum and max void scoreAll(int32_t root, int32_t size, int32_t doc, Collection sum, Collection max); /// Organize subScorers into a min heap with scorers generating the earliest document on top. void heapify(); /// The subtree of subScorers at root is a min heap except possibly for its root element. Bubble the root /// down as required to make the subtree a heap. void heapAdjust(int32_t root); /// Remove the root Scorer from subScorers and re-establish it as a heap void heapRemoveRoot(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DisjunctionSumScorer.h000066400000000000000000000076721456444476200246750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DISJUNCTIONSUMSCORER_H #define DISJUNCTIONSUMSCORER_H #include "Scorer.h" namespace Lucene { /// A Scorer for OR like queries, counterpart of ConjunctionScorer. This Scorer implements {@link /// Scorer#skipTo(int32_t)} and uses skipTo() on the given Scorers. class DisjunctionSumScorer : public Scorer { public: DisjunctionSumScorer(Collection subScorers, int32_t minimumNrMatchers = 1); virtual ~DisjunctionSumScorer(); LUCENE_CLASS(DisjunctionSumScorer); protected: /// The number of subscorers. int32_t nrScorers; /// The subscorers. Collection subScorers; /// The minimum number of scorers that should match. int32_t minimumNrMatchers; /// The scorerDocQueue contains all subscorers ordered by their current doc(), with the minimum at /// the top. The scorerDocQueue is initialized the first time next() or skipTo() is called. An exhausted /// scorer is immediately removed from the scorerDocQueue. If less than the minimumNrMatchers scorers /// remain in the scorerDocQueue next() and skipTo() return false. /// /// After each to call to next() or skipTo() currentSumScore is the total score of the current matching doc, /// nrMatchers is the number of matching scorers, and all scorers are after the matching doc, or are exhausted. ScorerDocQueuePtr scorerDocQueue; /// The document number of the current match. int32_t currentDoc; /// The number of subscorers that provide the current match. int32_t _nrMatchers; double currentScore; public: virtual void initialize(); virtual void score(const CollectorPtr& collector); virtual int32_t nextDoc(); /// Returns the score of the current document matching the query. Initially invalid, until {@link #next()} /// is called the first time. virtual double score(); virtual int32_t docID(); /// Returns the number of subscorers matching the current document. Initially invalid, until {@link #next()} /// is called the first time. int32_t nrMatchers(); /// Advances to the first match beyond the current whose document number is greater than or equal to a given /// target. The implementation uses the skipTo() method on the subscorers. /// /// @param target The target document number. /// @return the document whose number is greater than or equal to the given target, or -1 if none exist. virtual int32_t advance(int32_t target); protected: /// Called the first time next() or skipTo() is called to initialize scorerDocQueue. void initScorerDocQueue(); /// Collects matching documents in a range. Hook for optimization. Note that {@link #next()} must be /// called once before this method is called for the first time. /// @param collector The collector to which all matching documents are passed through. /// @param max Do not score documents past this. /// @return true if more matching documents may remain. virtual bool score(const CollectorPtr& collector, int32_t max, int32_t firstDocID); /// Advance all subscorers after the current document determined by the top of the scorerDocQueue. Repeat /// until at least the minimum number of subscorers match on the same document and all subscorers are after /// that document or are exhausted. On entry the scorerDocQueue has at least minimumNrMatchers available. /// At least the scorer with the minimum document number will be advanced. /// @return true if there is a match. In case there is a match, currentDoc, currentSumScore and nrMatchers /// describe the match. bool advanceAfterCurrent(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocConsumer.h000066400000000000000000000016101456444476200227440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCCONSUMER_H #define DOCCONSUMER_H #include "LuceneObject.h" namespace Lucene { class DocConsumer : public LuceneObject { public: virtual ~DocConsumer(); LUCENE_CLASS(DocConsumer); public: virtual DocConsumerPerThreadPtr addThread(const DocumentsWriterThreadStatePtr& perThread) = 0; virtual void flush(Collection threads, const SegmentWriteStatePtr& state) = 0; virtual void closeDocStore(const SegmentWriteStatePtr& state) = 0; virtual void abort() = 0; virtual bool freeRAM() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocConsumerPerThread.h000066400000000000000000000016421456444476200245500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCCONSUMERPERTHREAD_H #define DOCCONSUMERPERTHREAD_H #include "LuceneObject.h" namespace Lucene { class DocConsumerPerThread : public LuceneObject { public: virtual ~DocConsumerPerThread(); LUCENE_CLASS(DocConsumerPerThread); public: /// Process the document. If there is something for this document to be done in docID order, /// you should encapsulate that as a DocWriter and return it. /// DocumentsWriter then calls finish() on this object when it's its turn. virtual DocWriterPtr processDocument() = 0; virtual void abort() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocFieldConsumer.h000066400000000000000000000026711456444476200237200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDCONSUMER_H #define DOCFIELDCONSUMER_H #include "LuceneObject.h" namespace Lucene { class DocFieldConsumer : public LuceneObject { public: virtual ~DocFieldConsumer(); LUCENE_CLASS(DocFieldConsumer); protected: FieldInfosPtr fieldInfos; public: /// Called when DocumentsWriter decides to create a new segment virtual void flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) = 0; /// Called when DocumentsWriter decides to close the doc stores virtual void closeDocStore(const SegmentWriteStatePtr& state) = 0; /// Called when an aborting exception is hit virtual void abort() = 0; /// Add a new thread virtual DocFieldConsumerPerThreadPtr addThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread) = 0; /// Called when DocumentsWriter is using too much RAM. The consumer should free RAM, if possible, returning /// true if any RAM was in fact freed. virtual bool freeRAM() = 0; virtual void setFieldInfos(const FieldInfosPtr& fieldInfos); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocFieldConsumerPerField.h000066400000000000000000000014341456444476200253270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDCONSUMERPERFIELD_H #define DOCFIELDCONSUMERPERFIELD_H #include "LuceneObject.h" namespace Lucene { class DocFieldConsumerPerField : public LuceneObject { public: virtual ~DocFieldConsumerPerField(); LUCENE_CLASS(DocFieldConsumerPerField); public: /// Processes all occurrences of a single field virtual void processFields(Collection fields, int32_t count) = 0; virtual void abort() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocFieldConsumerPerThread.h000066400000000000000000000014731456444476200255160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDCONSUMERPERTHREAD_H #define DOCFIELDCONSUMERPERTHREAD_H #include "LuceneObject.h" namespace Lucene { class DocFieldConsumerPerThread : public LuceneObject { public: virtual ~DocFieldConsumerPerThread(); LUCENE_CLASS(DocFieldConsumerPerThread); public: virtual void startDocument() = 0; virtual DocWriterPtr finishDocument() = 0; virtual DocFieldConsumerPerFieldPtr addField(const FieldInfoPtr& fi) = 0; virtual void abort() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocFieldConsumers.h000066400000000000000000000041751456444476200241040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDCONSUMERS_H #define DOCFIELDCONSUMERS_H #include "DocFieldConsumer.h" #include "DocumentsWriter.h" namespace Lucene { /// This is just a "splitter" class: it lets you wrap two DocFieldConsumer instances as a single consumer. class DocFieldConsumers : public DocFieldConsumer { public: DocFieldConsumers(const DocFieldConsumerPtr& one, const DocFieldConsumerPtr& two); virtual ~DocFieldConsumers(); LUCENE_CLASS(DocFieldConsumers); public: DocFieldConsumerPtr one; DocFieldConsumerPtr two; Collection docFreeList; int32_t freeCount; int32_t allocCount; public: virtual void setFieldInfos(const FieldInfosPtr& fieldInfos); /// Called when DocumentsWriter decides to create a new segment virtual void flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state); /// Called when DocumentsWriter decides to close the doc stores virtual void closeDocStore(const SegmentWriteStatePtr& state); /// Called when DocumentsWriter is using too much RAM. virtual bool freeRAM(); /// Add a new thread virtual DocFieldConsumerPerThreadPtr addThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread); DocFieldConsumersPerDocPtr getPerDoc(); void freePerDoc(const DocFieldConsumersPerDocPtr& perDoc); }; class DocFieldConsumersPerDoc : public DocWriter { public: DocFieldConsumersPerDoc(const DocFieldConsumersPtr& fieldConsumers); virtual ~DocFieldConsumersPerDoc(); LUCENE_CLASS(DocFieldConsumersPerDoc); protected: DocFieldConsumersWeakPtr _fieldConsumers; public: DocWriterPtr one; DocWriterPtr two; public: virtual int64_t sizeInBytes(); virtual void finish(); virtual void abort(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocFieldConsumersPerField.h000066400000000000000000000021251456444476200255100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDCONSUMERSPERFIELD_H #define DOCFIELDCONSUMERSPERFIELD_H #include "DocFieldConsumerPerField.h" namespace Lucene { class DocFieldConsumersPerField : public DocFieldConsumerPerField { public: DocFieldConsumersPerField(const DocFieldConsumersPerThreadPtr& perThread, const DocFieldConsumerPerFieldPtr& one, const DocFieldConsumerPerFieldPtr& two); virtual ~DocFieldConsumersPerField(); LUCENE_CLASS(DocFieldConsumersPerField); public: DocFieldConsumerPerFieldPtr one; DocFieldConsumerPerFieldPtr two; DocFieldConsumersPerThreadWeakPtr _perThread; public: /// Processes all occurrences of a single field virtual void processFields(Collection fields, int32_t count); virtual void abort(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocFieldConsumersPerThread.h000066400000000000000000000023251456444476200256760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDCONSUMERSPERTHREAD_H #define DOCFIELDCONSUMERSPERTHREAD_H #include "DocFieldConsumerPerThread.h" namespace Lucene { class DocFieldConsumersPerThread : public DocFieldConsumerPerThread { public: DocFieldConsumersPerThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread, const DocFieldConsumersPtr& parent, const DocFieldConsumerPerThreadPtr& one, const DocFieldConsumerPerThreadPtr& two); virtual ~DocFieldConsumersPerThread(); LUCENE_CLASS(DocFieldConsumersPerThread); public: DocFieldConsumerPerThreadPtr one; DocFieldConsumerPerThreadPtr two; DocFieldConsumersWeakPtr _parent; DocStatePtr docState; public: virtual void startDocument(); virtual void abort(); virtual DocWriterPtr finishDocument(); virtual DocFieldConsumerPerFieldPtr addField(const FieldInfoPtr& fi); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocFieldProcessor.h000066400000000000000000000025771456444476200241110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDPROCESSOR_H #define DOCFIELDPROCESSOR_H #include "DocConsumer.h" namespace Lucene { /// This is a DocConsumer that gathers all fields under the same name, and calls per-field consumers to process /// field by field. This class doesn't doesn't do any "real" work of its own: it just forwards the fields to a /// DocFieldConsumer. class DocFieldProcessor : public DocConsumer { public: DocFieldProcessor(const DocumentsWriterPtr& docWriter, const DocFieldConsumerPtr& consumer); virtual ~DocFieldProcessor(); LUCENE_CLASS(DocFieldProcessor); public: DocumentsWriterWeakPtr _docWriter; FieldInfosPtr fieldInfos; DocFieldConsumerPtr consumer; StoredFieldsWriterPtr fieldsWriter; public: virtual void closeDocStore(const SegmentWriteStatePtr& state); virtual void flush(Collection threads, const SegmentWriteStatePtr& state); virtual void abort(); virtual bool freeRAM(); virtual DocConsumerPerThreadPtr addThread(const DocumentsWriterThreadStatePtr& perThread); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocFieldProcessorPerField.h000066400000000000000000000017671456444476200255240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDPROCESSORPERFIELD_H #define DOCFIELDPROCESSORPERFIELD_H #include "LuceneObject.h" namespace Lucene { /// Holds all per thread, per field state. class DocFieldProcessorPerField : public LuceneObject { public: DocFieldProcessorPerField(const DocFieldProcessorPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo); virtual ~DocFieldProcessorPerField(); LUCENE_CLASS(DocFieldProcessorPerField); public: DocFieldConsumerPerFieldPtr consumer; FieldInfoPtr fieldInfo; DocFieldProcessorPerFieldPtr next; int32_t lastGen; int32_t fieldCount; Collection fields; public: virtual void abort(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocFieldProcessorPerThread.h000066400000000000000000000050721456444476200257010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCFIELDPROCESSORPERTHREAD_H #define DOCFIELDPROCESSORPERTHREAD_H #include "DocConsumerPerThread.h" #include "DocumentsWriter.h" namespace Lucene { /// Gathers all Fieldables for a document under the same name, updates FieldInfos, and calls per-field /// consumers to process field by field. /// /// Currently, only a single thread visits the fields, sequentially, for processing. class DocFieldProcessorPerThread : public DocConsumerPerThread { public: DocFieldProcessorPerThread(const DocumentsWriterThreadStatePtr& threadState, const DocFieldProcessorPtr& docFieldProcessor); virtual ~DocFieldProcessorPerThread(); LUCENE_CLASS(DocFieldProcessorPerThread); public: double docBoost; int32_t fieldGen; DocFieldProcessorWeakPtr _docFieldProcessor; FieldInfosPtr fieldInfos; DocFieldConsumerPerThreadPtr consumer; Collection _fields; // Holds all fields seen in current doc int32_t fieldCount; Collection fieldHash; // Hash table for all fields ever seen int32_t hashMask; int32_t totalFieldCount; StoredFieldsWriterPerThreadPtr fieldsWriter; DocStatePtr docState; Collection docFreeList; int32_t freeCount; int32_t allocCount; public: virtual void initialize(); virtual void abort(); Collection fields(); // If there are fields we've seen but did not see again in the last run, then free them up. void trimFields(const SegmentWriteStatePtr& state); virtual DocWriterPtr processDocument(); DocFieldProcessorPerThreadPerDocPtr getPerDoc(); void freePerDoc(const DocFieldProcessorPerThreadPerDocPtr& perDoc); protected: void rehash(); }; class DocFieldProcessorPerThreadPerDoc : public DocWriter { public: DocFieldProcessorPerThreadPerDoc(const DocFieldProcessorPerThreadPtr& docProcessor); virtual ~DocFieldProcessorPerThreadPerDoc(); LUCENE_CLASS(DocFieldProcessorPerThreadPerDoc); public: DocWriterPtr one; DocWriterPtr two; protected: DocFieldProcessorPerThreadWeakPtr _docProcessor; public: virtual int64_t sizeInBytes(); virtual void finish(); virtual void abort(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocIdBitSet.h000066400000000000000000000020771456444476200226300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCIDBITSET_H #define DOCIDBITSET_H #include "DocIdSet.h" namespace Lucene { /// Simple DocIdSet and DocIdSetIterator backed by a BitSet class LPPAPI DocIdBitSet : public DocIdSet { public: DocIdBitSet(); DocIdBitSet(const BitSetPtr& bitSet); virtual ~DocIdBitSet(); LUCENE_CLASS(DocIdBitSet); protected: BitSetPtr bitSet; public: virtual DocIdSetIteratorPtr iterator(); /// This DocIdSet implementation is cacheable. virtual bool isCacheable(); /// Returns the underlying BitSet. BitSetPtr getBitSet(); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocIdSet.h000066400000000000000000000026311456444476200221650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCIDSET_H #define DOCIDSET_H #include "DocIdSetIterator.h" namespace Lucene { /// A DocIdSet contains a set of doc ids. Implementing classes must only implement {@link #iterator} to /// provide access to the set. class LPPAPI DocIdSet : public LuceneObject { public: virtual ~DocIdSet(); LUCENE_CLASS(DocIdSet); public: /// Provides a {@link DocIdSetIterator} to access the set. This implementation can return null or /// {@link #EmptyDocIdSet}.iterator() if there are no docs that match. virtual DocIdSetIteratorPtr iterator() = 0; /// This method is a hint for {@link CachingWrapperFilter}, if this DocIdSet should be cached without /// copying it into a BitSet. The default is to return false. If you have an own DocIdSet implementation /// that does its iteration very effective and fast without doing disk I/O, override this method and /// return true. virtual bool isCacheable(); /// An empty {@code DocIdSet} instance for easy use, eg. in Filters that hit no documents. static DocIdSetPtr EMPTY_DOCIDSET(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocIdSetIterator.h000066400000000000000000000056221456444476200237020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCIDSETITERATOR_H #define DOCIDSETITERATOR_H #include "LuceneObject.h" namespace Lucene { /// This abstract class defines methods to iterate over a set of non-decreasing doc ids. Note that this class /// assumes it iterates on doc Ids, and therefore {@link #NO_MORE_DOCS} is set to {@value #NO_MORE_DOCS} in order to /// be used as a sentinel object. Implementations of this class are expected to consider INT_MAX as an invalid value. class LPPAPI DocIdSetIterator : public LuceneObject { public: virtual ~DocIdSetIterator(); LUCENE_CLASS(DocIdSetIterator); public: /// When returned by {@link #nextDoc()}, {@link #advance(int)} and {@link #docID()} it means there are no more /// docs in the iterator. static const int32_t NO_MORE_DOCS; public: /// Returns the following: ///
    ///
  • -1 or {@link #NO_MORE_DOCS} if {@link #nextDoc()} or {@link #advance(int)} were not called yet. ///
  • {@link #NO_MORE_DOCS} if the iterator has exhausted. ///
  • Otherwise it should return the doc ID it is currently on. ///
virtual int32_t docID() = 0; /// Advances to the next document in the set and returns the doc it is currently on, or {@link #NO_MORE_DOCS} /// if there are no more docs in the set. /// /// NOTE: after the iterator has exhausted you should not call this method, as it may result in unpredicted /// behaviour. virtual int32_t nextDoc() = 0; /// Advances to the first beyond the current whose document number is greater than or equal to target. Returns /// the current document number or {@link #NO_MORE_DOCS} if there are no more docs in the set. /// /// Behaves as if written: /// ///
    /// int32_t advance(int32_t target)
    /// {
    ///     int32_t doc;
    ///     while ((doc = nextDoc()) < target)
    ///     { }
    ///     return doc;
    /// }
    /// 
/// /// Some implementations are considerably more efficient than that. /// /// NOTE: certain implementations may return a different value (each time) if called several times in a row /// with the same target. /// /// NOTE: this method may be called with {@value #NO_MORE_DOCS} for efficiency by some Scorers. If your /// implementation cannot efficiently determine that it should exhaust, it is recommended that you check for /// that value in each call to this method. /// /// NOTE: after the iterator has exhausted you should not call this method, as it may result in unpredicted /// behaviour. virtual int32_t advance(int32_t target) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocInverter.h000066400000000000000000000031361456444476200227540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCINVERTER_H #define DOCINVERTER_H #include "DocFieldConsumer.h" namespace Lucene { /// This is a DocFieldConsumer that inverts each field, separately, from a Document, and accepts a /// InvertedTermsConsumer to process those terms. class DocInverter : public DocFieldConsumer { public: DocInverter(const InvertedDocConsumerPtr& consumer, const InvertedDocEndConsumerPtr& endConsumer); virtual ~DocInverter(); LUCENE_CLASS(DocInverter); public: InvertedDocConsumerPtr consumer; InvertedDocEndConsumerPtr endConsumer; public: virtual void setFieldInfos(const FieldInfosPtr& fieldInfos); /// Called when DocumentsWriter decides to create a new segment virtual void flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state); /// Called when DocumentsWriter decides to close the doc stores virtual void closeDocStore(const SegmentWriteStatePtr& state); /// Called when an aborting exception is hit virtual void abort(); /// Called when DocumentsWriter is using too much RAM. virtual bool freeRAM(); /// Add a new thread virtual DocFieldConsumerPerThreadPtr addThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocInverterPerField.h000066400000000000000000000027011456444476200243640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCINVERTERPERFIELD_H #define DOCINVERTERPERFIELD_H #include "DocFieldConsumerPerField.h" namespace Lucene { /// Holds state for inverting all occurrences of a single field in the document. This class doesn't do /// anything itself; instead, it forwards the tokens produced by analysis to its own consumer /// (InvertedDocConsumerPerField). It also interacts with an endConsumer (InvertedDocEndConsumerPerField). class DocInverterPerField : public DocFieldConsumerPerField { public: DocInverterPerField(const DocInverterPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo); virtual ~DocInverterPerField(); LUCENE_CLASS(DocInverterPerField); protected: DocInverterPerThreadWeakPtr _perThread; FieldInfoPtr fieldInfo; public: InvertedDocConsumerPerFieldPtr consumer; InvertedDocEndConsumerPerFieldPtr endConsumer; DocStatePtr docState; FieldInvertStatePtr fieldState; public: virtual void initialize(); virtual void abort(); /// Processes all occurrences of a single field virtual void processFields(Collection fields, int32_t count); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocInverterPerThread.h000066400000000000000000000035121456444476200245510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCINVERTERPERTHREAD_H #define DOCINVERTERPERTHREAD_H #include "DocFieldConsumerPerThread.h" #include "AttributeSource.h" namespace Lucene { /// This is a DocFieldConsumer that inverts each field, separately, from a Document, and accepts a /// InvertedTermsConsumer to process those terms. class DocInverterPerThread : public DocFieldConsumerPerThread { public: DocInverterPerThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread, const DocInverterPtr& docInverter); virtual ~DocInverterPerThread(); LUCENE_CLASS(DocInverterPerThread); public: DocInverterWeakPtr _docInverter; InvertedDocConsumerPerThreadPtr consumer; InvertedDocEndConsumerPerThreadPtr endConsumer; SingleTokenAttributeSourcePtr singleToken; DocStatePtr docState; FieldInvertStatePtr fieldState; /// Used to read a string value for a field ReusableStringReaderPtr stringReader; public: virtual void initialize(); virtual void startDocument(); virtual DocWriterPtr finishDocument(); virtual void abort(); virtual DocFieldConsumerPerFieldPtr addField(const FieldInfoPtr& fi); }; class SingleTokenAttributeSource : public AttributeSource { public: SingleTokenAttributeSource(); virtual ~SingleTokenAttributeSource(); LUCENE_CLASS(SingleTokenAttributeSource); public: TermAttributePtr termAttribute; OffsetAttributePtr offsetAttribute; public: void reinit(const String& stringValue, int32_t startOffset, int32_t endOffset); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocValues.h000066400000000000000000000072161456444476200224200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCVALUES_H #define DOCVALUES_H #include "LuceneObject.h" namespace Lucene { /// Represents field values as different types. Normally created via a {@link ValueSuorce} for a /// particular field and reader. /// /// DocValues is distinct from ValueSource because there needs to be an object created at query /// evaluation time that is not referenced by the query itself because: /// - Query objects should be MT safe /// - For caching, Query objects are often used as keys... you don't want the Query carrying around /// big objects class LPPAPI DocValues : public LuceneObject { public: DocValues(); virtual ~DocValues(); LUCENE_CLASS(DocValues); protected: double minVal; double maxVal; double avgVal; bool computed; public: using LuceneObject::toString; /// Return doc value as a double. /// Mandatory: every DocValues implementation must implement at least this method. /// @param doc document whose double value is requested. virtual double doubleVal(int32_t doc) = 0; /// Return doc value as an int. /// Optional: DocValues implementation can (but don't have to) override this method. /// @param doc document whose int value is requested. virtual int32_t intVal(int32_t doc); /// Return doc value as a long. /// Optional: DocValues implementation can (but don't have to) override this method. /// @param doc document whose long value is requested. virtual int64_t longVal(int32_t doc); /// Return doc value as a string. /// Optional: DocValues implementation can (but don't have to) override this method. /// @param doc document whose string value is requested. virtual String strVal(int32_t doc); /// Return a string representation of a doc value, as required for Explanations. virtual String toString(int32_t doc) = 0; /// Explain the scoring value for the input doc. virtual ExplanationPtr explain(int32_t doc); /// For test purposes only, return the inner array of values, or null if not applicable. /// /// Allows tests to verify that loaded values are: ///
    ///
  1. indeed cached/reused. ///
  2. stored in the expected size/type (byte/short/int/float). ///
/// /// Note: implementations of DocValues must override this method for these test elements to be tested, /// Otherwise the test would not fail, just print a warning. virtual CollectionValue getInnerArray(); /// Returns the minimum of all values or NaN if this DocValues instance does not contain any value. /// This operation is optional /// @return the minimum of all values or NaN if this DocValues instance does not contain any value. virtual double getMinValue(); /// Returns the maximum of all values or NaN if this DocValues instance does not contain any value. /// This operation is optional /// @return the maximum of all values or NaN if this DocValues instance does not contain any value. virtual double getMaxValue(); /// Returns the average of all values or NaN if this DocValues instance does not contain any value. /// This operation is optional /// @return the average of all values or NaN if this DocValues instance does not contain any value. virtual double getAverageValue(); protected: /// Compute optional values void compute(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Document.h000066400000000000000000000160611456444476200223070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCUMENT_H #define DOCUMENT_H #include "LuceneObject.h" namespace Lucene { /// Documents are the unit of indexing and search. /// /// A Document is a set of fields. Each field has a name and a textual value. A field may be {@link /// Fieldable#isStored() stored} with the document, in which case it is returned with search hits on the /// document. Thus each document should typically contain one or more stored fields which uniquely /// identify it. /// /// Note that fields which are not {@link Fieldable#isStored() stored} are not available in documents /// retrieved from the index, eg. with {@link ScoreDoc#doc}, {@link Searcher#doc(int)} or {@link /// IndexReader#document(int)}. class LPPAPI Document : public LuceneObject { public: /// Constructs a new document with no fields. Document(); virtual ~Document(); LUCENE_CLASS(Document); protected: Collection fields; double boost; public: /// Sets a boost factor for hits on any field of this document. This value will be multiplied into the /// score of all hits on this document. /// /// The default value is 1.0. /// /// Values are multiplied into the value of {@link Fieldable#getBoost()} of each field in this document. /// Thus, this method in effect sets a default boost for the fields of this document. /// /// @see Fieldable#setBoost(double) void setBoost(double boost); /// Returns, at indexing time, the boost factor as set by {@link #setBoost(double)}. /// /// Note that once a document is indexed this value is no longer available from the index. At search time, /// for retrieved documents, this method always returns 1. This however does not mean that the boost value /// set at indexing time was ignored - it was just combined with other indexing time factors and stored /// elsewhere, for better indexing and search performance. (For more information see the "norm(t,d)" part /// of the scoring formula in {@link Similarity}.) /// /// @see #setBoost(double) double getBoost(); /// Adds a field to a document. Several fields may be added with the same name. In this case, if the fields /// are indexed, their text is treated as though appended for the purposes of search. /// /// Note that add like the removeField(s) methods only makes sense prior to adding a document to an index. /// These methods cannot be used to change the content of an existing index! In order to achieve this, a /// document has to be deleted from an index and a new changed version of that document has to be added. void add(const FieldablePtr& field); /// Removes field with the specified name from the document. If multiple fields exist with this name, this /// method removes the first field that has been added. If there is no field with the specified name, the /// document remains unchanged. /// /// Note that the removeField(s) methods like the add method only make sense prior to adding a document to /// an index. These methods cannot be used to change the content of an existing index! In order to achieve /// this, a document has to be deleted from an index and a new changed version of that document has to be added. void removeField(const String& name); /// Removes all fields with the given name from the document. If there is no field with the specified name, /// the document remains unchanged. /// /// Note that the removeField(s) methods like the add method only make sense prior to adding a document to an /// index. These methods cannot be used to change the content of an existing index! In order to achieve this, /// a document has to be deleted from an index and a new changed version of that document has to be added. void removeFields(const String& name); /// Returns a field with the given name if any exist in this document, or null. If multiple fields exists with /// this name, this method returns the first value added. /// Do not use this method with lazy loaded fields. FieldPtr getField(const String& name); /// Returns a field with the given name if any exist in this document, or null. If multiple fields exists with /// this name, this method returns the first value added. FieldablePtr getFieldable(const String& name); /// Returns the string value of the field with the given name if any exist in this document, or null. If multiple /// fields exist with this name, this method returns the first value added. If only binary fields with this name /// exist, returns null. String get(const String& name); /// Returns a List of all the fields in a document. /// /// Note that fields which are not {@link Fieldable#isStored() stored} are not available in documents /// retrieved from the index, eg. {@link Searcher#doc(int)} or {@link IndexReader#document(int)}. Collection getFields(); /// Returns an array of {@link Field}s with the given name. Do not use with lazy loaded fields. This method /// returns an empty array when there are no matching fields. It never returns null. /// @param name the name of the field /// @return a Field[] array Collection getFields(const String& name); /// Returns an array of {@link Fieldable}s with the given name. /// This method returns an empty array when there are no matching fields. It never returns null. /// @param name the name of the field /// @return a Fieldable[] array Collection getFieldables(const String& name); /// Returns an array of values of the field specified as the method parameter. /// This method returns an empty array when there are no matching fields. It never returns null. /// @param name the name of the field /// @return a String[] of field values Collection getValues(const String& name); /// Returns an array of byte arrays for of the fields that have the name specified as the method parameter. /// This method returns an empty array when there are no matching fields. It never returns null. /// @param name the name of the field /// @return a byte[][] of binary field values Collection getBinaryValues(const String& name); /// Returns an array of bytes for the first (or only) field that has the name specified as the method parameter. /// This method will return null if no binary fields with the specified name are available. There may be /// non-binary fields with the same name. /// @param name the name of the field. /// @return a byte[] containing the binary field value or null ByteArray getBinaryValue(const String& name); /// Returns a string representation of the object virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocumentsWriter.h000066400000000000000000000454341456444476200236750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCUMENTSWRITER_H #define DOCUMENTSWRITER_H #include "ByteBlockPool.h" #include "RAMFile.h" namespace Lucene { /// This class accepts multiple added documents and directly writes a single segment file. It does this more /// efficiently than creating a single segment per document (with DocumentWriter) and doing standard merges on /// those segments. /// /// Each added document is passed to the {@link DocConsumer}, which in turn processes the document and interacts /// with other consumers in the indexing chain. Certain consumers, like {@link StoredFieldsWriter} and {@link /// TermVectorsTermsWriter}, digest a document and immediately write bytes to the "doc store" files (ie, /// they do not consume RAM per document, except while they are processing the document). /// /// Other consumers, eg {@link FreqProxTermsWriter} and {@link NormsWriter}, buffer bytes in RAM and flush only /// when a new segment is produced. /// /// Once we have used our allowed RAM buffer, or the number of added docs is large enough (in the case we are /// flushing by doc count instead of RAM usage), we create a real segment and flush it to the Directory. /// /// Threads: /// Multiple threads are allowed into addDocument at once. There is an initial synchronized call to /// getThreadState which allocates a ThreadState for this thread. The same thread will get the same ThreadState /// over time (thread affinity) so that if there are consistent patterns (for example each thread is indexing a /// different content source) then we make better use of RAM. Then processDocument is called on that ThreadState /// without synchronization (most of the "heavy lifting" is in this call). Finally the synchronized /// "finishDocument" is called to flush changes to the directory. /// /// When flush is called by IndexWriter we forcefully idle all threads and flush only once they are all idle. /// This means you can call flush with a given thread even while other threads are actively adding/deleting /// documents. /// /// Exceptions: /// Because this class directly updates in-memory posting lists, and flushes stored fields and term vectors /// directly to files in the directory, there are certain limited times when an exception can corrupt this state. /// For example, a disk full while flushing stored fields leaves this file in a corrupt state. Or, an /// std::bad_alloc exception while appending to the in-memory posting lists can corrupt that posting list. /// We call such exceptions "aborting exceptions". In these cases we must call abort() to discard all docs added /// since the last flush. /// /// All other exceptions ("non-aborting exceptions") can still partially update the index structures. These /// updates are consistent, but, they represent only a part of the document seen up until the exception was hit. /// When this happens, we immediately mark the document as deleted so that the document is always atomically /// ("all or none") added to the index. class LPPAPI DocumentsWriter : public LuceneObject { public: DocumentsWriter(const DirectoryPtr& directory, const IndexWriterPtr& writer, const IndexingChainPtr& indexingChain); virtual ~DocumentsWriter(); LUCENE_CLASS(DocumentsWriter); protected: String docStoreSegment; // Current doc-store segment we are writing int32_t docStoreOffset; // Current starting doc-store offset of current segment int32_t nextDocID; // Next docID to be added int32_t numDocsInRAM; // # docs buffered in RAM /// Max # ThreadState instances; if there are more threads than this they share ThreadStates static const int32_t MAX_THREAD_STATE; Collection threadStates; MapThreadDocumentsWriterThreadState threadBindings; int32_t pauseThreads; // Non-zero when we need all threads to pause (eg to flush) bool aborting; // True if an abort is pending DocFieldProcessorPtr docFieldProcessor; /// Deletes done after the last flush; these are discarded on abort BufferedDeletesPtr deletesInRAM; /// Deletes done before the last flush; these are still kept on abort BufferedDeletesPtr deletesFlushed; /// The max number of delete terms that can be buffered before they must be flushed to disk. int32_t maxBufferedDeleteTerms; /// How much RAM we can use before flushing. This is 0 if we are flushing by doc count instead. int64_t ramBufferSize; int64_t waitQueuePauseBytes; int64_t waitQueueResumeBytes; /// If we've allocated 5% over our RAM budget, we then free down to 95% int64_t freeTrigger; int64_t freeLevel; /// Flush @ this number of docs. If ramBufferSize is non-zero we will flush by RAM usage instead. int32_t maxBufferedDocs; /// How many docs already flushed to index int32_t flushedDocCount; bool closed; /// List of files that were written before last abort() HashSet _abortedFiles; SegmentWriteStatePtr flushState; Collection freeIntBlocks; Collection freeCharBlocks; public: /// Coarse estimates used to measure RAM usage of buffered deletes static const int32_t OBJECT_HEADER_BYTES; static const int32_t POINTER_NUM_BYTE; static const int32_t INT_NUM_BYTE; static const int32_t CHAR_NUM_BYTE; /// Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object /// with Term key, BufferedDeletes.Num val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Term is /// object with String field and String text (OBJ_HEADER + 2*POINTER). We don't count Term's field since /// it's interned. Term's text is String (OBJ_HEADER + 4*INT + POINTER + OBJ_HEADER + string.length*CHAR). /// BufferedDeletes.num is OBJ_HEADER + INT. static const int32_t BYTES_PER_DEL_TERM; /// Rough logic: del docIDs are List. Say list allocates ~2X size (2*POINTER). Integer is /// OBJ_HEADER + int static const int32_t BYTES_PER_DEL_DOCID; /// Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object /// with Query key, Integer val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Query we often undercount /// (say 24 bytes). Integer is OBJ_HEADER + INT. static const int32_t BYTES_PER_DEL_QUERY; /// Initial chunks size of the shared byte[] blocks used to store postings data static const int32_t BYTE_BLOCK_SHIFT; static const int32_t BYTE_BLOCK_SIZE; static const int32_t BYTE_BLOCK_MASK; static const int32_t BYTE_BLOCK_NOT_MASK; /// Initial chunk size of the shared char[] blocks used to store term text static const int32_t CHAR_BLOCK_SHIFT; static const int32_t CHAR_BLOCK_SIZE; static const int32_t CHAR_BLOCK_MASK; static const int32_t MAX_TERM_LENGTH; /// Initial chunks size of the shared int[] blocks used to store postings data static const int32_t INT_BLOCK_SHIFT; static const int32_t INT_BLOCK_SIZE; static const int32_t INT_BLOCK_MASK; static const int32_t PER_DOC_BLOCK_SIZE; INTERNAL: IndexWriterWeakPtr _writer; DirectoryPtr directory; IndexingChainPtr indexingChain; String segment; // Current segment we are working on int32_t numDocsInStore; // # docs written to doc stores bool flushPending; // True when a thread has decided to flush bool bufferIsFull; // True when it's time to write segment InfoStreamPtr infoStream; int32_t maxFieldLength; SimilarityPtr similarity; DocConsumerPtr consumer; HashSet _openFiles; HashSet _closedFiles; WaitQueuePtr waitQueue; SkipDocWriterPtr skipDocWriter; ByteBlockAllocatorPtr byteBlockAllocator; ByteBlockAllocatorPtr perDocAllocator; int64_t numBytesAlloc; int64_t numBytesUsed; // used only by assert TermPtr lastDeleteTerm; public: virtual void initialize(); /// Create and return a new DocWriterBuffer. PerDocBufferPtr newPerDocBuffer(); static IndexingChainPtr getDefaultIndexingChain(); void updateFlushedDocCount(int32_t n); int32_t getFlushedDocCount(); void setFlushedDocCount(int32_t n); /// Returns true if any of the fields in the current buffered docs have omitTermFreqAndPositions==false bool hasProx(); /// If non-null, various details of indexing are printed here. void setInfoStream(const InfoStreamPtr& infoStream); void setMaxFieldLength(int32_t maxFieldLength); void setSimilarity(const SimilarityPtr& similarity); /// Set how much RAM we can use before flushing. void setRAMBufferSizeMB(double mb); double getRAMBufferSizeMB(); /// Set max buffered docs, which means we will flush by doc count instead of by RAM usage. void setMaxBufferedDocs(int32_t count); int32_t getMaxBufferedDocs(); /// Get current segment name we are writing. String getSegment(); /// Returns how many docs are currently buffered in RAM. int32_t getNumDocsInRAM(); /// Returns the current doc store segment we are writing to. String getDocStoreSegment(); /// Returns the doc offset into the shared doc store for the current buffered docs. int32_t getDocStoreOffset(); /// Closes the current open doc stores an returns the doc store segment name. This returns null if there /// are no buffered documents. String closeDocStore(); HashSet abortedFiles(); void message(const String& message); /// Returns Collection of files in use by this instance, including any flushed segments. HashSet openFiles(); HashSet closedFiles(); void addOpenFile(const String& name); void removeOpenFile(const String& name); void setAborting(); /// Called if we hit an exception at a bad time (when updating the index files) and must discard all /// currently buffered docs. This resets our state, discarding any docs added since last flush. void abort(); /// Returns true if an abort is in progress bool pauseAllThreads(); void resumeAllThreads(); bool anyChanges(); void initFlushState(bool onlyDocStore); /// Flush all pending docs to a new segment int32_t flush(bool _closeDocStore); HashSet getFlushedFiles(); /// Build compound file for the segment we just flushed void createCompoundFile(const String& segment); /// Set flushPending if it is not already set and returns whether it was set. This is used by IndexWriter /// to trigger a single flush even when multiple threads are trying to do so. bool setFlushPending(); void clearFlushPending(); void pushDeletes(); void close(); void initSegmentName(bool onlyDocStore); /// Returns a free (idle) ThreadState that may be used for indexing this one document. This call also /// pauses if a flush is pending. If delTerm is non-null then we buffer this deleted term after the /// thread state has been acquired. DocumentsWriterThreadStatePtr getThreadState(const DocumentPtr& doc, const TermPtr& delTerm); /// Returns true if the caller (IndexWriter) should now flush. bool addDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer); bool updateDocument(const TermPtr& t, const DocumentPtr& doc, const AnalyzerPtr& analyzer); bool updateDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer, const TermPtr& delTerm); int32_t getNumBufferedDeleteTerms(); // for testing MapTermNum getBufferedDeleteTerms(); // for testing /// Called whenever a merge has completed and the merged segments had deletions void remapDeletes(const SegmentInfosPtr& infos, Collection< Collection > docMaps, Collection delCounts, const OneMergePtr& merge, int32_t mergeDocCount); bool bufferDeleteTerms(Collection terms); bool bufferDeleteTerm(const TermPtr& term); bool bufferDeleteQueries(Collection queries); bool bufferDeleteQuery(const QueryPtr& query); bool deletesFull(); bool doApplyDeletes(); void setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms); int32_t getMaxBufferedDeleteTerms(); bool hasDeletes(); bool applyDeletes(const SegmentInfosPtr& infos); bool doBalanceRAM(); void waitForWaitQueue(); int64_t getRAMUsed(); IntArray getIntBlock(bool trackAllocations); void bytesAllocated(int64_t numBytes); void bytesUsed(int64_t numBytes); void recycleIntBlocks(Collection blocks, int32_t start, int32_t end); CharArray getCharBlock(); void recycleCharBlocks(Collection blocks, int32_t numBlocks); String toMB(int64_t v); /// We have four pools of RAM: Postings, byte blocks (holds freq/prox posting data), char blocks (holds /// characters in the term) and per-doc buffers (stored fields/term vectors). Different docs require /// varying amount of storage from these four classes. /// /// For example, docs with many unique single-occurrence short terms will use up the Postings /// RAM and hardly any of the other two. Whereas docs with very large terms will use alot of char blocks /// RAM and relatively less of the other two. This method just frees allocations from the pools once we /// are over-budget, which balances the pools to match the current docs. void balanceRAM(); protected: /// Reset after a flush void doAfterFlush(); bool allThreadsIdle(); void waitReady(const DocumentsWriterThreadStatePtr& state); bool timeToFlushDeletes(); // used only by assert bool checkDeleteTerm(const TermPtr& term); bool applyDeletes(const IndexReaderPtr& reader, int32_t docIDStart); void addDeleteTerm(const TermPtr& term, int32_t docCount); /// Buffer a specific docID for deletion. Currently only used when we hit a exception when adding a document void addDeleteDocID(int32_t docID); void addDeleteQuery(const QueryPtr& query, int32_t docID); /// Does the synchronized work to finish/flush the inverted document. void finishDocument(const DocumentsWriterThreadStatePtr& perThread, const DocWriterPtr& docWriter); friend class WaitQueue; }; class DocState : public LuceneObject { public: DocState(); virtual ~DocState(); LUCENE_CLASS(DocState); public: DocumentsWriterWeakPtr _docWriter; AnalyzerPtr analyzer; int32_t maxFieldLength; InfoStreamPtr infoStream; SimilarityPtr similarity; int32_t docID; DocumentPtr doc; String maxTermPrefix; public: /// Only called by asserts virtual bool testPoint(const String& name); void clear(); }; /// RAMFile buffer for DocWriters. class PerDocBuffer : public RAMFile { public: PerDocBuffer(const DocumentsWriterPtr& docWriter); virtual ~PerDocBuffer(); LUCENE_CLASS(PerDocBuffer); protected: DocumentsWriterWeakPtr _docWriter; public: /// Recycle the bytes used. void recycle(); protected: /// Allocate bytes used from shared pool. virtual ByteArray newBuffer(int32_t size); }; /// Consumer returns this on each doc. This holds any state that must be flushed synchronized /// "in docID order". We gather these and flush them in order. class DocWriter : public LuceneObject { public: DocWriter(); virtual ~DocWriter(); LUCENE_CLASS(DocWriter); public: DocWriterPtr next; int32_t docID; public: virtual void finish() = 0; virtual void abort() = 0; virtual int64_t sizeInBytes() = 0; virtual void setNext(const DocWriterPtr& next); }; /// The IndexingChain must define the {@link #getChain(DocumentsWriter)} method which returns the DocConsumer /// that the DocumentsWriter calls to process the documents. class IndexingChain : public LuceneObject { public: virtual ~IndexingChain(); LUCENE_CLASS(IndexingChain); public: virtual DocConsumerPtr getChain(const DocumentsWriterPtr& documentsWriter) = 0; }; /// This is the current indexing chain: /// DocConsumer / DocConsumerPerThread /// --> code: DocFieldProcessor / DocFieldProcessorPerThread /// --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField /// --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField /// --> code: DocInverter / DocInverterPerThread / DocInverterPerField /// --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField /// --> code: TermsHash / TermsHashPerThread / TermsHashPerField /// --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField /// --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField /// --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField /// --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField /// --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField /// --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField class DefaultIndexingChain : public IndexingChain { public: virtual ~DefaultIndexingChain(); LUCENE_CLASS(DefaultIndexingChain); public: virtual DocConsumerPtr getChain(const DocumentsWriterPtr& documentsWriter); }; class SkipDocWriter : public DocWriter { public: virtual ~SkipDocWriter(); LUCENE_CLASS(SkipDocWriter); public: virtual void finish(); virtual void abort(); virtual int64_t sizeInBytes(); }; class WaitQueue : public LuceneObject { public: WaitQueue(const DocumentsWriterPtr& docWriter); virtual ~WaitQueue(); LUCENE_CLASS(WaitQueue); protected: DocumentsWriterWeakPtr _docWriter; public: Collection waiting; int32_t nextWriteDocID; int32_t nextWriteLoc; int32_t numWaiting; int64_t waitingBytes; public: void reset(); bool doResume(); bool doPause(); void abort(); bool add(const DocWriterPtr& doc); protected: void writeDocument(const DocWriterPtr& doc); }; class ByteBlockAllocator : public ByteBlockPoolAllocatorBase { public: ByteBlockAllocator(const DocumentsWriterPtr& docWriter, int32_t blockSize); virtual ~ByteBlockAllocator(); LUCENE_CLASS(ByteBlockAllocator); protected: DocumentsWriterWeakPtr _docWriter; public: int32_t blockSize; Collection freeByteBlocks; public: /// Allocate another byte[] from the shared pool virtual ByteArray getByteBlock(bool trackAllocations); /// Return byte[]'s to the pool virtual void recycleByteBlocks(Collection blocks, int32_t start, int32_t end); virtual void recycleByteBlocks(Collection blocks); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DocumentsWriterThreadState.h000066400000000000000000000024171456444476200260200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOCUMENTSWRITERTHREADSTATE_H #define DOCUMENTSWRITERTHREADSTATE_H #include "LuceneObject.h" namespace Lucene { /// Used by DocumentsWriter to maintain per-thread state. /// We keep a separate Posting hash and other state for each thread and then merge postings /// hashes from all threads when writing the segment. class DocumentsWriterThreadState : public LuceneObject { public: DocumentsWriterThreadState(const DocumentsWriterPtr& docWriter); virtual ~DocumentsWriterThreadState(); LUCENE_CLASS(DocumentsWriterThreadState); public: bool isIdle; // false if this is currently in use by a thread int32_t numThreads; // Number of threads that share this instance bool doFlushAfter; // true if we should flush after processing current doc DocConsumerPerThreadPtr consumer; DocStatePtr docState; DocumentsWriterWeakPtr _docWriter; public: virtual void initialize(); void doAfterFlush(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/DoubleFieldSource.h000066400000000000000000000043451456444476200240720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DOUBLEFIELDSOURCE_H #define DOUBLEFIELDSOURCE_H #include "FieldCacheSource.h" #include "DocValues.h" namespace Lucene { /// Obtains double field values from the {@link FieldCache} using getDoubles() and makes those values available /// as other numeric types, casting as needed. /// /// @see FieldCacheSource for requirements on the field. /// /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's /// best to switch your application to pass only atomic (single segment) readers to this API. Alternatively, /// for a short-term fix, you could wrap your ValueSource using {@link MultiValueSource}, which costs more CPU /// per lookup but will not consume double the FieldCache RAM. class DoubleFieldSource : public FieldCacheSource { public: /// Create a cached double field source with a specific string-to-double parser. DoubleFieldSource(const String& field, const DoubleParserPtr& parser = DoubleParserPtr()); virtual ~DoubleFieldSource(); LUCENE_CLASS(DoubleFieldSource); protected: DoubleParserPtr parser; public: virtual String description(); virtual DocValuesPtr getCachedFieldValues(const FieldCachePtr& cache, const String& field, const IndexReaderPtr& reader); virtual bool cachedFieldSourceEquals(const FieldCacheSourcePtr& other); virtual int32_t cachedFieldSourceHashCode(); }; class DoubleDocValues : public DocValues { public: DoubleDocValues(const DoubleFieldSourcePtr& source, Collection arr); virtual ~DoubleDocValues(); LUCENE_CLASS(DoubleDocValues); protected: DoubleFieldSourceWeakPtr _source; Collection arr; public: virtual double doubleVal(int32_t doc); virtual String toString(int32_t doc); virtual CollectionValue getInnerArray(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ExactPhraseScorer.h000066400000000000000000000014271456444476200241160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef EXACTPHRASESCORER_H #define EXACTPHRASESCORER_H #include "PhraseScorer.h" namespace Lucene { class ExactPhraseScorer : public PhraseScorer { public: ExactPhraseScorer(const WeightPtr& weight, Collection tps, Collection offsets, const SimilarityPtr& similarity, ByteArray norms); virtual ~ExactPhraseScorer(); LUCENE_CLASS(ExactPhraseScorer); protected: virtual double phraseFreq(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Explanation.h000066400000000000000000000050441456444476200230120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef EXPLANATION_H #define EXPLANATION_H #include "LuceneObject.h" namespace Lucene { /// Describes the score computation for document and query. class LPPAPI Explanation : public LuceneObject { public: Explanation(double value = 0, const String& description = EmptyString); virtual ~Explanation(); LUCENE_CLASS(Explanation); protected: double value; // the value of this node String description; // what it represents Collection details; // sub-explanations public: /// Indicates whether or not this Explanation models a good match. /// /// By default, an Explanation represents a "match" if the value is positive. /// /// @see #getValue virtual bool isMatch(); /// The value assigned to this explanation node. virtual double getValue(); /// Sets the value assigned to this explanation node. virtual void setValue(double value); /// A description of this explanation node. virtual String getDescription(); /// Sets the description of this explanation node. virtual void setDescription(const String& description); /// The sub-nodes of this explanation node. virtual Collection getDetails(); /// Adds a sub-node to this explanation node. virtual void addDetail(const ExplanationPtr& detail); /// Render an explanation as text. virtual String toString(); /// Render an explanation as HTML. virtual String toHtml(); protected: /// A short one line summary which should contain all high level information about this Explanation, /// without the "Details" virtual String getSummary(); virtual String toString(int32_t depth); }; /// Small Util class used to pass both an idf factor as well as an explanation for that factor. /// /// This class will likely be held on a {@link Weight}, so be aware before storing any large fields. class LPPAPI IDFExplanation : public LuceneObject { public: virtual ~IDFExplanation(); LUCENE_CLASS(IDFExplanation); public: /// @return the idf factor virtual double getIdf() = 0; /// This should be calculated lazily if possible. /// @return the explanation for the idf factor. virtual String explain() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FSDirectory.h000066400000000000000000000126641456444476200227330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FSDIRECTORY_H #define FSDIRECTORY_H #include "Directory.h" namespace Lucene { /// Base class for Directory implementations that store index files in the file system. There are currently three /// core subclasses: /// /// {@link SimpleFSDirectory} is a straightforward implementation using std::ofstream and std::ifstream. /// /// {@link MMapDirectory} uses memory-mapped IO when reading. This is a good choice if you have plenty of virtual /// memory relative to your index size, eg if you are running on a 64 bit operating system, oryour index sizes are /// small enough to fit into the virtual memory space. /// /// For users who have no reason to prefer a specific implementation, it's best to simply use {@link #open}. For /// all others, you should instantiate the desired implementation directly. /// /// The locking implementation is by default {@link NativeFSLockFactory}, but can be changed by passing in a custom /// {@link LockFactory} instance. /// @see Directory class LPPAPI FSDirectory : public Directory { protected: /// Create a new FSDirectory for the named location (ctor for subclasses). /// @param path the path of the directory. /// @param lockFactory the lock factory to use, or null for the default ({@link NativeFSLockFactory}) FSDirectory(const String& path, const LockFactoryPtr& lockFactory); public: virtual ~FSDirectory(); LUCENE_CLASS(FSDirectory); public: /// Default read chunk size. This is a conditional default based on operating system. /// @see #setReadChunkSize static const int32_t DEFAULT_READ_CHUNK_SIZE; protected: bool checked; /// The underlying filesystem directory. String directory; /// @see #DEFAULT_READ_CHUNK_SIZE int32_t chunkSize; public: /// Creates an FSDirectory instance. static FSDirectoryPtr open(const String& path); /// Just like {@link #open(File)}, but allows you to also specify a custom {@link LockFactory}. static FSDirectoryPtr open(const String& path, const LockFactoryPtr& lockFactory); /// Lists all files (not subdirectories) in the directory. /// @throws NoSuchDirectoryException if the directory does not exist, or does exist but is not a directory. static HashSet listAll(const String& dir); /// Returns the time the named file was last modified. static uint64_t fileModified(const String& directory, const String& name); /// Create file system directory. void createDir(); /// Return file system directory. String getFile(); /// Sets the maximum number of bytes read at once from the underlying file during {@link IndexInput#readBytes}. /// The default value is {@link #DEFAULT_READ_CHUNK_SIZE}. Changes to this value will not impact any already-opened /// {@link IndexInput}s. You should call this before attempting to open an index on the directory. This value should /// be as large as possible to reduce any possible performance impact. void setReadChunkSize(int32_t chunkSize); /// The maximum number of bytes to read at once from the underlying file during {@link IndexInput#readBytes}. /// @see #setReadChunkSize int32_t getReadChunkSize(); /// Lists all files (not subdirectories) in the directory. /// @see #listAll(const String&) virtual HashSet listAll(); /// Returns true if a file with the given name exists. virtual bool fileExists(const String& name); /// Returns the time the named file was last modified. virtual uint64_t fileModified(const String& name); /// Set the modified time of an existing file to now. virtual void touchFile(const String& name); /// Removes an existing file in the directory. virtual void deleteFile(const String& name); /// Returns the length in bytes of a file in the directory. virtual int64_t fileLength(const String& name); /// Ensure that any writes to this file are moved to stable storage. Lucene uses this to properly commit changes to /// the index, to prevent a machine/OS crash from corrupting the index. virtual void sync(const String& name); /// Returns a stream reading an existing file, with the specified read buffer size. The particular Directory /// implementation may ignore the buffer size. virtual IndexInputPtr openInput(const String& name); /// Returns a stream reading an existing file, with the specified read buffer size. The particular Directory /// implementation may ignore the buffer size. Currently the only Directory implementations that respect this parameter /// are {@link FSDirectory} and {@link CompoundFileReader}. virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); /// Return a string identifier that uniquely differentiates this Directory instance from other Directory instances. virtual String getLockID(); /// Closes the store to future operations. virtual void close(); /// For debug output. virtual String toString(); protected: /// Initializes the directory to create a new file with the given name. This method should be used in {@link #createOutput}. void initOutput(const String& name); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FSLockFactory.h000066400000000000000000000021451456444476200232000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FSLOCKFACTORY_H #define FSLOCKFACTORY_H #include "LockFactory.h" namespace Lucene { /// Base class for file system based locking implementation. class LPPAPI FSLockFactory : public LockFactory { protected: FSLockFactory(); public: virtual ~FSLockFactory(); LUCENE_CLASS(FSLockFactory); protected: /// Directory for the lock files. String lockDir; public: /// Set the lock directory. This method can be only called once to /// initialize the lock directory. It is used by {@link FSDirectory} /// to set the lock directory to itself. Subclasses can also use /// this method to set the directory in the constructor. void setLockDir(const String& lockDir); /// Retrieve the lock directory. String getLockDir(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FastCharStream.h000066400000000000000000000032121456444476200233720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FASTCHARSTREAM_H #define FASTCHARSTREAM_H #include "QueryParserCharStream.h" namespace Lucene { /// An efficient implementation of QueryParserCharStream interface. /// /// Note that this does not do line-number counting, but instead keeps track of the character position of /// the token in the input, as required by Lucene's {@link Token} API. class LPPAPI FastCharStream : public QueryParserCharStream, public LuceneObject { public: /// Constructs from a Reader. FastCharStream(const ReaderPtr& reader); virtual ~FastCharStream(); LUCENE_CLASS(FastCharStream); public: CharArray buffer; int32_t bufferLength; // end of valid chars int32_t bufferPosition; // next char to read int32_t tokenStart; // offset in buffer int32_t bufferStart; // position in file of buffer ReaderPtr input; // source of chars public: virtual wchar_t readChar(); virtual wchar_t BeginToken(); virtual void backup(int32_t amount); virtual String GetImage(); virtual CharArray GetSuffix(int32_t length); virtual void Done(); virtual int32_t getColumn(); virtual int32_t getLine(); virtual int32_t getEndColumn(); virtual int32_t getEndLine(); virtual int32_t getBeginColumn(); virtual int32_t getBeginLine(); protected: void refill(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Field.h000066400000000000000000000165041456444476200215560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELD_H #define FIELD_H #include "AbstractField.h" namespace Lucene { class LPPAPI Field : public AbstractField { public: /// Create a field by specifying its name, value and how it will be saved in the index. Term vectors /// will not be stored in the index. /// /// @param name The name of the field /// @param value The string to process /// @param store Whether value should be stored in the index /// @param index Whether the field should be indexed, and if so, if it should be tokenized before indexing Field(const String& name, const String& value, Store store, Index index); /// Create a field by specifying its name, value and how it will be saved in the index. /// /// @param name The name of the field /// @param value The string to process /// @param store Whether value should be stored in the index /// @param index Whether the field should be indexed, and if so, if it should be tokenized before indexing /// @param termVector Whether term vector should be stored Field(const String& name, const String& value, Store store, Index index, TermVector termVector); /// Create a tokenized and indexed field that is not stored. Term vectors will not be stored. The Reader is /// read only when the Document is added to the index, ie. you may not close the Reader until {@link /// IndexWriter#addDocument(Document)} has been called. /// /// @param name The name of the field /// @param reader The reader with the content Field(const String& name, const ReaderPtr& reader); /// Create a tokenized and indexed field that is not stored, optionally with storing term vectors. The /// Reader is read only when the Document is added to the index, ie. you may not close the Reader until /// {@link IndexWriter#addDocument(Document)} has been called. /// /// @param name The name of the field /// @param reader The reader with the content /// @param termVector Whether term vector should be stored Field(const String& name, const ReaderPtr& reader, TermVector termVector); /// Create a tokenized and indexed field that is not stored. Term vectors will not be stored. This is useful /// for pre-analyzed fields. The TokenStream is read only when the Document is added to the index, ie. you /// may not close the TokenStream until {@link IndexWriter#addDocument(Document)} has been called. /// /// @param name The name of the field /// @param tokenStream The TokenStream with the content Field(const String& name, const TokenStreamPtr& tokenStream); /// Create a tokenized and indexed field that is not stored, optionally with storing term vectors. This is /// useful for pre-analyzed fields. The TokenStream is read only when the Document is added to the index, /// ie. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)} has been called. /// /// @param name The name of the field /// @param tokenStream The TokenStream with the content /// @param termVector Whether term vector should be stored Field(const String& name, const TokenStreamPtr& tokenStream, TermVector termVector); /// Create a stored field with binary value. Optionally the value may be compressed. /// /// @param name The name of the field /// @param value The binary value /// @param store How value should be stored (compressed or not) Field(const String& name, ByteArray value, Store store); /// Create a stored field with binary value. Optionally the value may be compressed. /// /// @param name The name of the field /// @param value The binary value /// @param offset Starting offset in value where this Field's bytes are /// @param length Number of bytes to use for this Field, starting at offset /// @param store How value should be stored (compressed or not) Field(const String& name, ByteArray value, int32_t offset, int32_t length, Store store); virtual ~Field(); LUCENE_CLASS(Field); public: using AbstractField::isStored; using AbstractField::isIndexed; /// Specifies whether and how a field should be stored. static bool isStored(Store store); /// Specifies whether and how a field should be indexed. static bool isIndexed(Index index); static bool isAnalyzed(Index index); static bool omitNorms(Index index); /// Get the best representation of the index given the flags. static Field::Index toIndex(bool indexed, bool analyzed); /// Get the best representation of the index given the flags. static Field::Index toIndex(bool indexed, bool analyzed, bool omitNorms); /// Specifies whether and how a field should have term vectors. static bool isStored(TermVector termVector); static bool withPositions(TermVector termVector); static bool withOffsets(TermVector termVector); /// Get the best representation of the index given the flags. static Field::TermVector toTermVector(bool stored, bool withOffsets, bool withPositions); /// The value of the field as a String, or null. If null, the Reader value or binary value is used. /// Exactly one of stringValue(), readerValue(), and getBinaryValue() must be set. virtual String stringValue(); /// The value of the field as a Reader, or null. If null, the String value or binary value is used. /// Exactly one of stringValue(), readerValue(), and getBinaryValue() must be set. virtual ReaderPtr readerValue(); /// The value of the field as a TokesStream, or null. If null, the Reader value or String value is /// analyzed to produce the indexed tokens. virtual TokenStreamPtr tokenStreamValue(); /// Change the value of this field. This can be used during indexing to re-use a single Field instance /// to improve indexing speed. Typically a single {@link Document} instance is re-used as well. This /// helps most on small documents. /// /// Each Field instance should only be used once within a single {@link Document} instance. virtual void setValue(const String& value); /// Change the value of this field. virtual void setValue(const ReaderPtr& value); /// Change the value of this field. virtual void setValue(ByteArray value); /// Change the value of this field. virtual void setValue(ByteArray value, int32_t offset, int32_t length); /// Sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return /// true. May be combined with stored values from stringValue() or getBinaryValue() virtual void setTokenStream(const TokenStreamPtr& tokenStream); protected: void ConstructField(const String& name, const String& value, Store store, Index index, TermVector termVector); void ConstructField(const String& name, const ReaderPtr& reader, TermVector termVector); void ConstructField(const String& name, const TokenStreamPtr& tokenStream, TermVector termVector); void ConstructField(const String& name, ByteArray value, int32_t offset, int32_t length, Store store); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldCache.h000066400000000000000000000266011456444476200225010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDCACHE_H #define FIELDCACHE_H #include #include "LuceneObject.h" namespace Lucene { /// Maintains caches of term values. /// @see FieldCacheSanityChecker class LPPAPI FieldCache { public: virtual ~FieldCache(); LUCENE_INTERFACE(FieldCache); public: /// Specifies whether and how a field should be stored. enum CacheType { CACHE_BYTE = 1, CACHE_INT, CACHE_LONG, CACHE_DOUBLE, CACHE_STRING, CACHE_STRING_INDEX }; /// Indicator for StringIndex values in the cache. /// NOTE: the value assigned to this constant must not be the same as any of those in SortField static const int32_t STRING_INDEX; public: /// The cache used internally by sorting and range query classes. static FieldCachePtr DEFAULT(); /// The default parser for byte values, which are encoded by StringUtils::toInt static ByteParserPtr DEFAULT_BYTE_PARSER(); /// The default parser for int values, which are encoded by StringUtils::toInt static IntParserPtr DEFAULT_INT_PARSER(); /// The default parser for int values, which are encoded by StringUtils::toLong static LongParserPtr DEFAULT_LONG_PARSER(); /// The default parser for double values, which are encoded by StringUtils::toDouble static DoubleParserPtr DEFAULT_DOUBLE_PARSER(); /// A parser instance for int values encoded by {@link NumericUtils#prefixCodedToInt(String)}, /// eg. when indexed via {@link NumericField}/{@link NumericTokenStream}. static IntParserPtr NUMERIC_UTILS_INT_PARSER(); /// A parser instance for long values encoded by {@link NumericUtils#prefixCodedToLong(String)}, /// eg. when indexed via {@link NumericField}/{@link NumericTokenStream}. static LongParserPtr NUMERIC_UTILS_LONG_PARSER(); /// A parser instance for double values encoded by {@link NumericUtils}, /// eg. when indexed via {@link NumericField}/{@link NumericTokenStream}. static DoubleParserPtr NUMERIC_UTILS_DOUBLE_PARSER(); /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in /// field as a single byte and returns an array of size reader.maxDoc() of the value each document /// has in the given field. /// @param reader Used to get field values. /// @param field Which field contains the single byte values. /// @return The values in the given field for each document. virtual Collection getBytes(const IndexReaderPtr& reader, const String& field); /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in /// field as bytes and returns an array of size reader.maxDoc() of the value each document has in /// the given field. /// @param reader Used to get field values. /// @param field Which field contains the bytes. /// @param parser Computes byte for string values. /// @return The values in the given field for each document. virtual Collection getBytes(const IndexReaderPtr& reader, const String& field, const ByteParserPtr& parser); /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in /// field as integers and returns an array of size reader.maxDoc() of the value each document has in /// the given field. /// @param reader Used to get field values. /// @param field Which field contains the integers. /// @return The values in the given field for each document. virtual Collection getInts(const IndexReaderPtr& reader, const String& field); /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in /// field as integers and returns an array of size reader.maxDoc() of the value each document has in /// the given field. /// @param reader Used to get field values. /// @param field Which field contains the integers. /// @param parser Computes integer for string values. /// @return The values in the given field for each document. virtual Collection getInts(const IndexReaderPtr& reader, const String& field, const IntParserPtr& parser); /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in /// field as longs and returns an array of size reader.maxDoc() of the value each document has in /// the given field. /// @param reader Used to get field values. /// @param field Which field contains the longs. /// @return The values in the given field for each document. virtual Collection getLongs(const IndexReaderPtr& reader, const String& field); /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in /// field as longs and returns an array of size reader.maxDoc() of the value each document has in /// the given field. /// @param reader Used to get field values. /// @param field Which field contains the longs. /// @param parser Computes long for string values. /// @return The values in the given field for each document. virtual Collection getLongs(const IndexReaderPtr& reader, const String& field, const LongParserPtr& parser); /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in /// field as integers and returns an array of size reader.maxDoc() of the value each document has in /// the given field. /// @param reader Used to get field values. /// @param field Which field contains the doubles. /// @return The values in the given field for each document. virtual Collection getDoubles(const IndexReaderPtr& reader, const String& field); /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in /// field as doubles and returns an array of size reader.maxDoc() of the value each document has in /// the given field. /// @param reader Used to get field values. /// @param field Which field contains the doubles. /// @param parser Computes double for string values. /// @return The values in the given field for each document. virtual Collection getDoubles(const IndexReaderPtr& reader, const String& field, const DoubleParserPtr& parser); /// Checks the internal cache for an appropriate entry, and if none are found, reads the term values in /// field and returns an array of size reader.maxDoc() containing the value each document has in /// the given field. /// @param reader Used to get field values. /// @param field Which field contains the strings. /// @return The values in the given field for each document. virtual Collection getStrings(const IndexReaderPtr& reader, const String& field); /// Checks the internal cache for an appropriate entry, and if none are found reads the term values in /// field and returns an array of them in natural order, along with an array telling which element in /// the term array each document uses. /// @param reader Used to get field values. /// @param field Which field contains the strings. /// @return Array of terms and index into the array for each document. virtual StringIndexPtr getStringIndex(const IndexReaderPtr& reader, const String& field); /// Generates an array of CacheEntry objects representing all items currently in the FieldCache. virtual Collection getCacheEntries() = 0; /// Instructs the FieldCache to forcibly expunge all entries from the underlying caches. This is intended /// only to be used for test methods as a way to ensure a known base state of the Cache. It should not be /// relied on for "Cache maintenance" in general application code. virtual void purgeAllCaches() = 0; /// Drops all cache entries associated with this reader. NOTE: this reader must precisely match the reader /// that the cache entry is keyed on. If you pass a top-level reader, it usually will have no effect as /// Lucene now caches at the segment reader level. virtual void purge(const IndexReaderPtr& r) = 0; /// If non-null, FieldCacheImpl will warn whenever entries are created that are not sane according to /// {@link FieldCacheSanityChecker}. virtual void setInfoStream(const InfoStreamPtr& stream); /// @see #setInfoStream virtual InfoStreamPtr getInfoStream(); }; class LPPAPI CreationPlaceholder : public LuceneObject { public: virtual ~CreationPlaceholder(); LUCENE_CLASS(CreationPlaceholder); public: boost::any value; }; /// Stores term text values and document ordering data. class LPPAPI StringIndex : public LuceneObject { public: StringIndex(Collection values, Collection lookup); virtual ~StringIndex(); LUCENE_CLASS(StringIndex); public: /// All the term values, in natural order. Collection lookup; /// For each document, an index into the lookup array. Collection order; public: int32_t binarySearchLookup(const String& key); }; /// Marker interface as super-interface to all parsers. It is used to specify a custom parser to {@link /// SortField#SortField(String, Parser)}. class LPPAPI Parser : public LuceneObject { public: virtual ~Parser(); LUCENE_CLASS(Parser); }; /// Interface to parse bytes from document fields. /// @see FieldCache#getBytes(IndexReaderPtr, String, ByteParserPtr) class LPPAPI ByteParser : public Parser { public: virtual ~ByteParser(); LUCENE_CLASS(ByteParser); public: /// Return a single Byte representation of this field's value. virtual uint8_t parseByte(const String& string); }; /// Interface to parse ints from document fields. /// @see FieldCache#getInts(IndexReaderPtr, String, IntParserPtr) class LPPAPI IntParser : public Parser { public: virtual ~IntParser(); LUCENE_CLASS(IntParser); public: /// Return a integer representation of this field's value. virtual int32_t parseInt(const String& string); }; /// Interface to parse longs from document fields. /// @see FieldCache#getLongs(IndexReaderPtr, String, LongParserPtr) class LPPAPI LongParser : public Parser { public: virtual ~LongParser(); LUCENE_CLASS(LongParser); public: /// Return a long representation of this field's value. virtual int64_t parseLong(const String& string); }; /// Interface to parse doubles from document fields. /// @see FieldCache#getDoubles(IndexReaderPtr, String, DoubleParserPtr) class LPPAPI DoubleParser : public Parser { public: virtual ~DoubleParser(); LUCENE_CLASS(DoubleParser); public: /// Return a double representation of this field's value. virtual double parseDouble(const String& string); }; /// A unique Identifier/Description for each item in the FieldCache. Can be useful for logging/debugging. class LPPAPI FieldCacheEntry : public LuceneObject { public: virtual ~FieldCacheEntry(); LUCENE_CLASS(FieldCacheEntry); public: virtual LuceneObjectPtr getReaderKey() = 0; virtual String getFieldName() = 0; virtual int32_t getCacheType() = 0; virtual boost::any getCustom() = 0; virtual boost::any getValue() = 0; virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldCacheImpl.h000066400000000000000000000127121456444476200233210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDCACHEIMPL_H #define FIELDCACHEIMPL_H #include "FieldCache.h" namespace Lucene { /// The default cache implementation, storing all values in memory. A WeakHashMap is used for storage. class FieldCacheImpl : public FieldCache, public LuceneObject { public: FieldCacheImpl(); virtual ~FieldCacheImpl(); LUCENE_CLASS(FieldCacheImpl); protected: MapStringCache caches; InfoStreamPtr infoStream; public: virtual void initialize(); virtual void purgeAllCaches(); virtual void purge(const IndexReaderPtr& r); virtual Collection getCacheEntries(); virtual Collection getBytes(const IndexReaderPtr& reader, const String& field); virtual Collection getBytes(const IndexReaderPtr& reader, const String& field, const ByteParserPtr& parser); virtual Collection getInts(const IndexReaderPtr& reader, const String& field); virtual Collection getInts(const IndexReaderPtr& reader, const String& field, const IntParserPtr& parser); virtual Collection getLongs(const IndexReaderPtr& reader, const String& field); virtual Collection getLongs(const IndexReaderPtr& reader, const String& field, const LongParserPtr& parser); virtual Collection getDoubles(const IndexReaderPtr& reader, const String& field); virtual Collection getDoubles(const IndexReaderPtr& reader, const String& field, const DoubleParserPtr& parser); virtual Collection getStrings(const IndexReaderPtr& reader, const String& field); virtual StringIndexPtr getStringIndex(const IndexReaderPtr& reader, const String& field); virtual void setInfoStream(const InfoStreamPtr& stream); virtual InfoStreamPtr getInfoStream(); }; class Entry : public LuceneObject { public: /// Creates one of these objects for a custom comparator/parser. Entry(const String& field, const boost::any& custom); virtual ~Entry(); LUCENE_CLASS(Entry); public: String field; // which Fieldable boost::any custom; // which custom comparator or parser public: /// Two of these are equal if they reference the same field and type. virtual bool equals(const LuceneObjectPtr& other); /// Composes a hashcode based on the field and type. virtual int32_t hashCode(); }; /// Internal cache. class Cache : public LuceneObject { public: Cache(const FieldCachePtr& wrapper = FieldCachePtr()); virtual ~Cache(); LUCENE_CLASS(Cache); public: FieldCacheWeakPtr _wrapper; WeakMapLuceneObjectMapEntryAny readerCache; protected: virtual boost::any createValue(const IndexReaderPtr& reader, const EntryPtr& key) = 0; public: /// Remove this reader from the cache, if present. virtual void purge(const IndexReaderPtr& r); virtual boost::any get(const IndexReaderPtr& reader, const EntryPtr& key); virtual void printNewInsanity(const InfoStreamPtr& infoStream, const boost::any& value); }; class ByteCache : public Cache { public: ByteCache(const FieldCachePtr& wrapper = FieldCachePtr()); virtual ~ByteCache(); LUCENE_CLASS(ByteCache); protected: virtual boost::any createValue(const IndexReaderPtr& reader, const EntryPtr& key); }; class IntCache : public Cache { public: IntCache(const FieldCachePtr& wrapper = FieldCachePtr()); virtual ~IntCache(); LUCENE_CLASS(IntCache); protected: virtual boost::any createValue(const IndexReaderPtr& reader, const EntryPtr& key); }; class LongCache : public Cache { public: LongCache(const FieldCachePtr& wrapper = FieldCachePtr()); virtual ~LongCache(); LUCENE_CLASS(LongCache); protected: virtual boost::any createValue(const IndexReaderPtr& reader, const EntryPtr& key); }; class DoubleCache : public Cache { public: DoubleCache(const FieldCachePtr& wrapper = FieldCachePtr()); virtual ~DoubleCache(); LUCENE_CLASS(DoubleCache); protected: virtual boost::any createValue(const IndexReaderPtr& reader, const EntryPtr& key); }; class StringCache : public Cache { public: StringCache(const FieldCachePtr& wrapper = FieldCachePtr()); virtual ~StringCache(); LUCENE_CLASS(StringCache); protected: virtual boost::any createValue(const IndexReaderPtr& reader, const EntryPtr& key); }; class StringIndexCache : public Cache { public: StringIndexCache(const FieldCachePtr& wrapper = FieldCachePtr()); virtual ~StringIndexCache(); LUCENE_CLASS(StringIndexCache); protected: virtual boost::any createValue(const IndexReaderPtr& reader, const EntryPtr& key); }; class FieldCacheEntryImpl : public FieldCacheEntry { public: FieldCacheEntryImpl(const LuceneObjectPtr& readerKey, const String& fieldName, int32_t cacheType, const boost::any& custom, const boost::any& value); virtual ~FieldCacheEntryImpl(); LUCENE_CLASS(FieldCacheEntryImpl); protected: LuceneObjectPtr readerKey; String fieldName; int32_t cacheType; boost::any custom; boost::any value; public: virtual LuceneObjectPtr getReaderKey(); virtual String getFieldName(); virtual int32_t getCacheType(); virtual boost::any getCustom(); virtual boost::any getValue(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldCacheRangeFilter.h000066400000000000000000000151761456444476200246310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDCACHERANGEFILTER_H #define FIELDCACHERANGEFILTER_H #include "Filter.h" #include "FieldCache.h" namespace Lucene { /// A range filter built on top of a cached single term field (in {@link FieldCache}). /// /// FieldCacheRangeFilter builds a single cache for the field the first time it is used. Each subsequent /// FieldCacheRangeFilter on the same field then reuses this cache, even if the range itself changes. /// /// This means that FieldCacheRangeFilter is much faster (sometimes more than 100x as fast) as building a /// {@link TermRangeFilter}, if using a {@link #newStringRange}. However, if the range never changes it is /// slower (around 2x as slow) than building a CachingWrapperFilter on top of a single {@link TermRangeFilter}. /// /// For numeric data types, this filter may be significantly faster than {@link NumericRangeFilter}. /// Furthermore, it does not need the numeric values encoded by {@link NumericField}. But it has the problem /// that it only works with exact one value/document (see below). /// /// As with all {@link FieldCache} based functionality, FieldCacheRangeFilter is only valid for fields which /// exact one term for each document (except for {@link #newStringRange} where 0 terms are also allowed). Due /// to a restriction of {@link FieldCache}, for numeric ranges all terms that do not have a numeric value, 0 /// is assumed. /// /// Thus it works on dates, prices and other single value fields but will not work on regular text fields. It /// is preferable to use a NOT_ANALYZED field to ensure that there is only a single term. /// /// Do not instantiate this template directly, use one of the static factory methods available, that create a /// correct instance for different data types supported by {@link FieldCache}. class LPPAPI FieldCacheRangeFilter : public Filter { public: FieldCacheRangeFilter(const String& field, const ParserPtr& parser, bool includeLower, bool includeUpper); virtual ~FieldCacheRangeFilter(); LUCENE_CLASS(FieldCacheRangeFilter); INTERNAL: String field; ParserPtr parser; bool includeLower; bool includeUpper; public: /// Creates a string range filter using {@link FieldCache#getStringIndex}. This works with all fields containing /// zero or one term in the field. The range can be half-open by setting one of the values to null. static FieldCacheRangeFilterPtr newStringRange(const String& field, const String& lowerVal, const String& upperVal, bool includeLower, bool includeUpper); /// Creates a numeric range filter using {@link FieldCache#getBytes(IndexReaderPtr, String)}. This works with all /// byte fields containing exactly one numeric term in the field. The range can be half-open by setting one of the /// values to null. static FieldCacheRangeFilterPtr newByteRange(const String& field, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper); /// Creates a numeric range filter using {@link FieldCache#getBytes(IndexReaderPtr, String, ByteParserPtr)}. This /// works with all byte fields containing exactly one numeric term in the field. The range can be half-open by /// setting one of the values to null. static FieldCacheRangeFilterPtr newByteRange(const String& field, const ByteParserPtr& parser, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper); /// Creates a numeric range filter using {@link FieldCache#getInts(IndexReaderPtr, String)}. This works with all /// int fields containing exactly one numeric term in the field. The range can be half-open by setting one of the /// values to null. static FieldCacheRangeFilterPtr newIntRange(const String& field, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper); /// Creates a numeric range filter using {@link FieldCache#getInts(IndexReaderPtr, String, IntParserPtr)}. This /// works with all int fields containing exactly one numeric term in the field. The range can be half-open by /// setting one of the values to null. static FieldCacheRangeFilterPtr newIntRange(const String& field, const IntParserPtr& parser, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper); /// Creates a numeric range filter using {@link FieldCache#getLongs(IndexReaderPtr, String)}. This works with all /// long fields containing exactly one numeric term in the field. The range can be half-open by setting one of the /// values to null. static FieldCacheRangeFilterPtr newLongRange(const String& field, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper); /// Creates a numeric range filter using {@link FieldCache#getLongs(IndexReaderPtr, String, LongParserPtr)}. This /// works with all long fields containing exactly one numeric term in the field. The range can be half-open by /// setting one of the values to null. static FieldCacheRangeFilterPtr newLongRange(const String& field, const LongParserPtr& parser, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper); /// Creates a numeric range filter using {@link FieldCache#getDoubles(IndexReaderPtr, String)}. This works with all /// long fields containing exactly one numeric term in the field. The range can be half-open by setting one of the /// values to null. static FieldCacheRangeFilterPtr newDoubleRange(const String& field, double lowerVal, double upperVal, bool includeLower, bool includeUpper); /// Creates a numeric range filter using {@link FieldCache#getDoubles(IndexReaderPtr, String, DoubleParserPtr)}. This /// works with all long fields containing exactly one numeric term in the field. The range can be half-open by /// setting one of the values to null. static FieldCacheRangeFilterPtr newDoubleRange(const String& field, const DoubleParserPtr& parser, double lowerVal, double upperVal, bool includeLower, bool includeUpper); virtual String toString() = 0; virtual bool equals(const LuceneObjectPtr& other) = 0; virtual int32_t hashCode() = 0; /// Returns the field name for this filter virtual String getField(); /// Returns true if the lower endpoint is inclusive virtual bool includesLower(); /// Returns true if the upper endpoint is inclusive virtual bool includesUpper(); /// Returns the current numeric parser virtual ParserPtr getParser(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldCacheSanityChecker.h000066400000000000000000000137741456444476200251650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDCACHESANITYCHECKER_H #define FIELDCACHESANITYCHECKER_H #include "LuceneObject.h" #include "MapOfSets.h" namespace Lucene { /// Provides methods for sanity checking that entries in the FieldCache are not wasteful or inconsistent. /// /// Lucene 2.9 Introduced numerous enhancements into how the FieldCache is used by the low levels of Lucene /// searching (for Sorting and ValueSourceQueries) to improve both the speed for Sorting, as well as reopening /// of IndexReaders. But these changes have shifted the usage of FieldCache from "top level" IndexReaders /// (frequently a MultiReader or DirectoryReader) down to the leaf level SegmentReaders. As a result, /// existing applications that directly access the FieldCache may find RAM usage increase significantly when /// upgrading to 2.9 or later. This class provides an API for these applications (or their Unit tests) to /// check at run time if the FieldCache contains "insane" usages of the FieldCache. /// /// @see FieldCache /// @see FieldCacheSanityChecker.Insanity /// @see FieldCacheSanityChecker.InsanityType class LPPAPI FieldCacheSanityChecker : public LuceneObject { public: FieldCacheSanityChecker(); virtual ~FieldCacheSanityChecker(); LUCENE_CLASS(FieldCacheSanityChecker); public: typedef MapOfSets< int32_t, boost::hash, std::equal_to, FieldCacheEntryPtr, luceneHash, luceneEquals > MapSetIntFieldCacheEntry; typedef MapOfSets< ReaderFieldPtr, luceneHash, luceneEquals, int32_t, boost::hash, std::equal_to > MapSetReaderFieldInt; typedef MapOfSets< ReaderFieldPtr, luceneHash, luceneEquals, ReaderFieldPtr, luceneHash, luceneEquals > MapSetReaderFieldReaderField; /// An Enumeration of the different types of "insane" behaviour that may be detected in a FieldCache. enum InsanityType { /// Indicates an overlap in cache usage on a given field in sub/super readers. SUBREADER, /// Indicates entries have the same reader+fieldname but different cached values. This can happen /// if different datatypes, or parsers are used -- and while it's not necessarily a bug it's /// typically an indication of a possible problem. /// /// NOTE: Only the reader, fieldname, and cached value are actually tested -- if two cache entries /// have different parsers or datatypes but the cached values are the same Object (== not just equal()) /// this method does not consider that a red flag. This allows for subtle variations in the way a /// Parser is specified (null vs DEFAULT_LONG_PARSER, etc...) VALUEMISMATCH, /// Indicates an expected bit of "insanity". This may be useful for clients that wish to preserve/log /// information about insane usage but indicate that it was expected. EXPECTED }; /// Quick and dirty convenience method /// @see #check static Collection checkSanity(const FieldCachePtr& cache); /// Quick and dirty convenience method that instantiates an instance with "good defaults" and uses it to /// test the CacheEntrys. /// @see #check static Collection checkSanity(Collection cacheEntries); /// Tests a CacheEntry[] for indication of "insane" cache usage. /// NOTE: FieldCache CreationPlaceholder objects are ignored. Collection check(Collection cacheEntries); protected: /// Internal helper method used by check that iterates over valMismatchKeys and generates a Collection of /// Insanity instances accordingly. The MapOfSets are used to populate the Insanity objects. /// @see InsanityType#VALUEMISMATCH Collection checkValueMismatch(MapSetIntFieldCacheEntry valIdToItems, MapSetReaderFieldInt readerFieldToValIds, SetReaderField valMismatchKeys); /// Internal helper method used by check that iterates over the keys of readerFieldToValIds and generates a /// Collection of Insanity instances whenever two (or more) ReaderField instances are found that have an /// ancestry relationships. /// @see InsanityType#SUBREADER Collection checkSubreaders(MapSetIntFieldCacheEntry valIdToItems, MapSetReaderFieldInt readerFieldToValIds); /// Checks if the seed is an IndexReader, and if so will walk the hierarchy of subReaders building up a /// list of the objects returned by obj.getFieldCacheKey() Collection getAllDecendentReaderKeys(const LuceneObjectPtr& seed); }; /// Simple container for a collection of related CacheEntry objects that in conjunction with each other /// represent some "insane" usage of the FieldCache. class LPPAPI Insanity : public LuceneObject { public: Insanity(FieldCacheSanityChecker::InsanityType type, const String& msg, Collection entries); virtual ~Insanity(); LUCENE_CLASS(Insanity); protected: FieldCacheSanityChecker::InsanityType type; String msg; Collection entries; public: /// Type of insane behavior this object represents FieldCacheSanityChecker::InsanityType getType(); /// Description of the insane behaviour String getMsg(); /// CacheEntry objects which suggest a problem Collection getCacheEntries(); /// Multi-Line representation of this Insanity object, starting with the Type and Msg, followed by each /// CacheEntry.toString() on it's own line prefaced by a tab character virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldCacheSource.h000066400000000000000000000050421456444476200236560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDCACHESOURCE_H #define FIELDCACHESOURCE_H #include "ValueSource.h" namespace Lucene { /// A base class for ValueSource implementations that retrieve values for a single field from the /// {@link FieldCache}. /// /// Fields used herein must be indexed (doesn't matter if these fields are stored or not). /// /// It is assumed that each such indexed field is untokenized, or at least has a single token in a document. /// For documents with multiple tokens of the same field, behavior is undefined (It is likely that current /// code would use the value of one of these tokens, but this is not guaranteed). /// /// Document with no tokens in this field are assigned the Zero value. /// /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's /// best to switch your application to pass only atomic (single segment) readers to this API. class LPPAPI FieldCacheSource : public ValueSource { public: /// Create a cached field source for the input field. FieldCacheSource(const String& field); virtual ~FieldCacheSource(); LUCENE_CLASS(FieldCacheSource); protected: String field; public: virtual DocValuesPtr getValues(const IndexReaderPtr& reader); virtual String description(); /// Return cached DocValues for input field and reader. /// @param cache FieldCache so that values of a field are loaded once per reader (RAM allowing) /// @param field Field for which values are required. /// @see ValueSource virtual DocValuesPtr getCachedFieldValues(const FieldCachePtr& cache, const String& field, const IndexReaderPtr& reader) = 0; virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); /// Check if equals to another {@link FieldCacheSource}, already knowing that cache and field are equal. virtual bool cachedFieldSourceEquals(const FieldCacheSourcePtr& other) = 0; /// Return a hash code of a {@link FieldCacheSource}, without the hash-codes of the field and the cache /// (those are taken care of elsewhere). virtual int32_t cachedFieldSourceHashCode() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldCacheTermsFilter.h000066400000000000000000000065631456444476200246670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDCACHETERMSFILTER_H #define FIELDCACHETERMSFILTER_H #include "Filter.h" namespace Lucene { /// A {@link Filter} that only accepts documents whose single term value in the specified field is contained /// in the provided set of allowed terms. /// /// This is the same functionality as TermsFilter (from contrib/queries), except this filter requires that the /// field contains only a single term for all documents. Because of drastically different implementations, /// they also have different performance characteristics, as described below. /// /// The first invocation of this filter on a given field will be slower, since a {@link StringIndex} must be /// created. Subsequent invocations using the same field will re-use this cache. However, as with all /// functionality based on {@link FieldCache}, persistent RAM is consumed to hold the cache, and is not freed /// until the {@link IndexReader} is closed. In contrast, TermsFilter has no persistent RAM consumption. /// /// With each search, this filter translates the specified set of Terms into a private {@link OpenBitSet} keyed /// by term number per unique {@link IndexReader} (normally one reader per segment). Then, during matching, /// the term number for each docID is retrieved from the cache and then checked for inclusion using the {@link /// OpenBitSet}. Since all testing is done using RAM resident data structures, performance should be very fast, /// most likely fast enough to not require further caching of the DocIdSet for each possible combination of /// terms. However, because docIDs are simply scanned linearly, an index with a great many small documents may /// find this linear scan too costly. /// /// In contrast, TermsFilter builds up an {@link OpenBitSet}, keyed by docID, every time it's created, by /// enumerating through all matching docs using {@link TermDocs} to seek and scan through each term's docID list. /// While there is no linear scan of all docIDs, besides the allocation of the underlying array in the {@link /// OpenBitSet}, this approach requires a number of "disk seeks" in proportion to the number of terms, which can /// be exceptionally costly when there are cache misses in the OS's IO cache. /// /// Generally, this filter will be slower on the first invocation for a given field, but subsequent invocations, /// even if you change the allowed set of Terms, should be faster than TermsFilter, especially as the number of /// Terms being matched increases. If you are matching only a very small number of terms, and those terms in /// turn match a very small number of documents, TermsFilter may perform faster. /// /// Which filter is best is very application dependent. class LPPAPI FieldCacheTermsFilter : public Filter { public: FieldCacheTermsFilter(const String& field, Collection terms); virtual ~FieldCacheTermsFilter(); LUCENE_CLASS(FieldCacheTermsFilter); protected: String field; Collection terms; public: FieldCachePtr getFieldCache(); virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldComparator.h000066400000000000000000000303451456444476200236050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDCOMPARATOR_H #define FIELDCOMPARATOR_H #include "LuceneObject.h" namespace Lucene { /// A FieldComparator compares hits so as to determine their sort order when collecting the top results with /// {@link TopFieldCollector}. The concrete public FieldComparator classes here correspond to the SortField types. /// /// This API is designed to achieve high performance sorting, by exposing a tight interaction with {@link /// FieldValueHitQueue} as it visits hits. Whenever a hit is competitive, it's enrolled into a virtual slot, /// which is an int ranging from 0 to numHits-1. The {@link FieldComparator} is made aware of segment transitions /// during searching in case any internal state it's tracking needs to be recomputed during these transitions. /// /// A comparator must define these functions: ///
    ///
  • {@link #compare} Compare a hit at 'slot a' with hit 'slot b'. /// ///
  • {@link #setBottom} This method is called by {@link FieldValueHitQueue} to notify the FieldComparator of /// the current weakest ("bottom") slot. Note that this slot may not hold the weakest value according to your /// comparator, in cases where your comparator is not the primary one (ie, is only used to break ties from the /// comparators before it). /// ///
  • {@link #compareBottom} Compare a new hit (docID) against the "weakest" (bottom) entry in the queue. /// ///
  • {@link #copy} Installs a new hit into the priority queue. The {@link FieldValueHitQueue} calls this /// method when a new hit is competitive. /// ///
  • {@link #setNextReader} Invoked when the search is switching to the next segment. You may need to update /// internal state of the comparator, for example retrieving new values from the {@link FieldCache}. /// ///
  • {@link #value} Return the sort value stored in the specified slot. This is only called at the end of /// the search, in order to populate {@link FieldDoc#fields} when returning the top results. ///
class LPPAPI FieldComparator : public LuceneObject { public: virtual ~FieldComparator(); LUCENE_CLASS(FieldComparator); public: /// Compare hit at slot1 with hit at slot2. /// @param slot1 first slot to compare /// @param slot2 second slot to compare /// @return any N < 0 if slot2's value is sorted after slot1, any N > 0 if the slot2's value is sorted /// before slot1 and 0 if they are equal virtual int32_t compare(int32_t slot1, int32_t slot2) = 0; /// Set the bottom slot, ie the "weakest" (sorted last) entry in the queue. When {@link #compareBottom} /// is called, you should compare against this slot. This will always be called before {@link #compareBottom}. /// @param slot the currently weakest (sorted last) slot in the queue virtual void setBottom(int32_t slot) = 0; /// Compare the bottom of the queue with doc. This will only invoked after setBottom has been called. /// This should return the same result as {@link #compare(int,int)}} as if bottom were slot1 and the new /// document were slot 2. /// /// For a search that hits many results, this method will be the hotspot (invoked by far the most frequently). /// /// @param doc that was hit /// @return any N < 0 if the doc's value is sorted after the bottom entry (not competitive), any N > 0 if /// the doc's value is sorted before the bottom entry and 0 if they are equal. virtual int32_t compareBottom(int32_t doc) = 0; /// This method is called when a new hit is competitive. You should copy any state associated with this /// document that will be required for future comparisons, into the specified slot. /// @param slot which slot to copy the hit to /// @param doc docID relative to current reader virtual void copy(int32_t slot, int32_t doc) = 0; /// Set a new Reader. All doc correspond to the current Reader. /// /// @param reader current reader /// @param docBase docBase of this reader virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase) = 0; /// Sets the Scorer to use in case a document's score is needed. /// @param scorer Scorer instance that you should use to obtain the current hit's score, if necessary. virtual void setScorer(const ScorerPtr& scorer); /// Return the actual value in the slot. /// @param slot the value /// @return value in this slot upgraded to ComparableValue virtual ComparableValue value(int32_t slot) = 0; }; template class NumericComparator : public FieldComparator { public: NumericComparator(int32_t numHits, const String& field = EmptyString) { this->values = Collection::newInstance(numHits); this->field = field; this->bottom = 0; } virtual ~NumericComparator() { } protected: Collection values; Collection currentReaderValues; String field; TYPE bottom; public: virtual int32_t compare(int32_t slot1, int32_t slot2) { return (int32_t)(values[slot1] - values[slot2]); } virtual int32_t compareBottom(int32_t doc) { return (int32_t)(bottom - currentReaderValues[doc]); } virtual void copy(int32_t slot, int32_t doc) { values[slot] = currentReaderValues[doc]; } virtual void setBottom(int32_t slot) { bottom = values[slot]; } virtual ComparableValue value(int32_t slot) { return ComparableValue(values[slot]); } }; /// Parses field's values as byte (using {@link FieldCache#getBytes} and sorts by ascending value. class LPPAPI ByteComparator : public NumericComparator { public: ByteComparator(int32_t numHits, const String& field, const ParserPtr& parser); virtual ~ByteComparator(); LUCENE_CLASS(ByteComparator); protected: ByteParserPtr parser; public: virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); }; /// Sorts by ascending docID class LPPAPI DocComparator : public NumericComparator { public: DocComparator(int32_t numHits); virtual ~DocComparator(); LUCENE_CLASS(DocComparator); protected: int32_t docBase; public: virtual int32_t compareBottom(int32_t doc); virtual void copy(int32_t slot, int32_t doc); virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); }; /// Parses field's values as double (using {@link FieldCache#getDoubles} and sorts by ascending value class LPPAPI DoubleComparator : public NumericComparator { public: DoubleComparator(int32_t numHits, const String& field, const ParserPtr& parser); virtual ~DoubleComparator(); LUCENE_CLASS(DoubleComparator); protected: DoubleParserPtr parser; public: virtual int32_t compare(int32_t slot1, int32_t slot2); virtual int32_t compareBottom(int32_t doc); virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); }; /// Parses field's values as int (using {@link FieldCache#getInts} and sorts by ascending value class LPPAPI IntComparator : public NumericComparator { public: IntComparator(int32_t numHits, const String& field, const ParserPtr& parser); virtual ~IntComparator(); LUCENE_CLASS(IntComparator); protected: IntParserPtr parser; public: virtual int32_t compare(int32_t slot1, int32_t slot2); virtual int32_t compareBottom(int32_t doc); virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); }; /// Parses field's values as long (using {@link FieldCache#getLongs} and sorts by ascending value class LPPAPI LongComparator : public NumericComparator { public: LongComparator(int32_t numHits, const String& field, const ParserPtr& parser); virtual ~LongComparator(); LUCENE_CLASS(LongComparator); protected: LongParserPtr parser; public: virtual int32_t compare(int32_t slot1, int32_t slot2); virtual int32_t compareBottom(int32_t doc); virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); }; /// Sorts by descending relevance. NOTE: if you are sorting only by descending relevance and then secondarily /// by ascending docID, performance is faster using {@link TopScoreDocCollector} directly (which {@link /// IndexSearcher#search} uses when no {@link Sort} is specified). class LPPAPI RelevanceComparator : public NumericComparator { public: RelevanceComparator(int32_t numHits); virtual ~RelevanceComparator(); LUCENE_CLASS(RelevanceComparator); protected: ScorerPtr scorer; public: virtual int32_t compare(int32_t slot1, int32_t slot2); virtual int32_t compareBottom(int32_t doc); virtual void copy(int32_t slot, int32_t doc); virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); virtual void setScorer(const ScorerPtr& scorer); }; /// Sorts by a field's value using the Collator for a given Locale. class LPPAPI StringComparatorLocale : public FieldComparator { public: StringComparatorLocale(int32_t numHits, const String& field, const std::locale& locale); virtual ~StringComparatorLocale(); LUCENE_CLASS(StringComparatorLocale); protected: Collection values; Collection currentReaderValues; String field; CollatorPtr collator; String bottom; public: virtual int32_t compare(int32_t slot1, int32_t slot2); virtual int32_t compareBottom(int32_t doc); virtual void copy(int32_t slot, int32_t doc); virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); virtual void setBottom(int32_t slot); virtual ComparableValue value(int32_t slot); }; /// Sorts by field's natural String sort order, using ordinals. This is functionally equivalent to {@link /// StringValComparator}, but it first resolves the string to their relative ordinal positions (using the /// index returned by {@link FieldCache#getStringIndex}), and does most comparisons using the ordinals. /// For medium to large results, this comparator will be much faster than {@link StringValComparator}. For /// very small result sets it may be slower. class LPPAPI StringOrdValComparator : public FieldComparator { public: StringOrdValComparator(int32_t numHits, const String& field, int32_t sortPos, bool reversed); virtual ~StringOrdValComparator(); LUCENE_CLASS(StringOrdValComparator); protected: Collection ords; Collection values; Collection readerGen; int32_t currentReaderGen; Collection lookup; Collection order; String field; int32_t bottomSlot; int32_t bottomOrd; String bottomValue; bool reversed; int32_t sortPos; public: virtual int32_t compare(int32_t slot1, int32_t slot2); virtual int32_t compareBottom(int32_t doc); virtual void copy(int32_t slot, int32_t doc); virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); virtual void setBottom(int32_t slot); virtual ComparableValue value(int32_t slot); virtual Collection getValues(); virtual int32_t getBottomSlot(); virtual String getField(); protected: void convert(int32_t slot); int32_t binarySearch(Collection lookup, const String& key, int32_t low, int32_t high); }; /// Sorts by field's natural String sort order. All comparisons are done using String.compare, which is /// slow for medium to large result sets but possibly very fast for very small results sets. class LPPAPI StringValComparator : public FieldComparator { public: StringValComparator(int32_t numHits, const String& field); virtual ~StringValComparator(); LUCENE_CLASS(StringOrdValComparator); protected: Collection values; Collection currentReaderValues; String field; String bottom; public: virtual int32_t compare(int32_t slot1, int32_t slot2); virtual int32_t compareBottom(int32_t doc); virtual void copy(int32_t slot, int32_t doc); virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); virtual void setBottom(int32_t slot); virtual ComparableValue value(int32_t slot); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldComparatorSource.h000066400000000000000000000017161456444476200247660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDCOMPARATORSOURCE_H #define FIELDCOMPARATORSOURCE_H #include "LuceneObject.h" namespace Lucene { /// Provides a {@link FieldComparator} for custom field sorting. class LPPAPI FieldComparatorSource : public LuceneObject { public: virtual ~FieldComparatorSource(); LUCENE_CLASS(FieldComparatorSource); public: /// Creates a comparator for the field in the given index. /// @param fieldname Name of the field to create comparator for. /// @return FieldComparator. virtual FieldComparatorPtr newComparator(const String& fieldname, int32_t numHits, int32_t sortPos, bool reversed) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldDoc.h000066400000000000000000000032661456444476200222050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDDOC_H #define FIELDDOC_H #include "ScoreDoc.h" namespace Lucene { /// A ScoreDoc which also contains information about how to sort the referenced document. In addition to the /// document number and score, this object contains an array of values for the document from the field(s) used /// to sort. For example, if the sort criteria was to sort by fields "a", "b" then "c", the fields object array /// will have three elements, corresponding respectively to the term values for the document in fields "a", "b" /// and "c". The class of each element in the array will be either Integer, Double or String depending on the /// type of values in the terms of each field. class LPPAPI FieldDoc : public ScoreDoc { public: FieldDoc(int32_t doc, double score, Collection fields = Collection()); virtual ~FieldDoc(); LUCENE_CLASS(FieldDoc); public: /// The values which are used to sort the referenced document. The order of these will match the original /// sort criteria given by a Sort object. Each Object will be either an Integer, Double or String, depending /// on the type of values in the terms of the original field. /// @see Sort /// @see Searcher#search(QueryPtr, FilterPtr, int32_t, SortPtr) Collection fields; public: virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldDocSortedHitQueue.h000066400000000000000000000035711456444476200250370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDDOCSORTEDHITQUEUE_H #define FIELDDOCSORTEDHITQUEUE_H #include "PriorityQueue.h" namespace Lucene { /// Collects sorted results from Searchable's and collates them. /// The elements put into this queue must be of type FieldDoc. class FieldDocSortedHitQueue : public PriorityQueue { public: FieldDocSortedHitQueue(int32_t size); virtual ~FieldDocSortedHitQueue(); LUCENE_CLASS(FieldDocSortedHitQueue); public: Collection fields; // used in the case where the fields are sorted by locale based strings Collection collators; public: /// Allows redefinition of sort fields if they are null. This is to handle the case using /// ParallelMultiSearcher where the original list contains AUTO and we don't know the actual sort /// type until the values come back. The fields can only be set once. This method should be /// synchronized external like all other PQ methods. void setFields(Collection fields); /// Returns the fields being used to sort. Collection getFields(); protected: /// Returns an array of collators, possibly null. The collators correspond to any SortFields which /// were given a specific locale. /// @param fields Array of sort fields. /// @return Array, possibly null. Collection hasCollators(Collection fields); /// Returns whether first is less relevant than second. virtual bool lessThan(const FieldDocPtr& first, const FieldDocPtr& second); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldInfo.h000066400000000000000000000027161456444476200223720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDINFO_H #define FIELDINFO_H #include "LuceneObject.h" namespace Lucene { class FieldInfo : public LuceneObject { public: FieldInfo(const String& na, bool tk, int32_t nu, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions); virtual ~FieldInfo(); LUCENE_CLASS(FieldInfo); public: String name; bool isIndexed; int32_t number; // true if term vector for this field should be stored bool storeTermVector; bool storeOffsetWithTermVector; bool storePositionWithTermVector; bool omitNorms; // omit norms associated with indexed fields bool omitTermFreqAndPositions; bool storePayloads; // whether this field stores payloads together with term positions public: virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); void update(bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldInfos.h000066400000000000000000000162111456444476200225500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDINFOS_H #define FIELDINFOS_H #include "LuceneObject.h" namespace Lucene { /// Access to the Fieldable Info file that describes document fields and whether or not they are indexed. /// Each segment has a separate Fieldable Info file. Objects of this class are thread-safe for multiple /// readers, but only one thread can be adding documents at a time, with no other reader or writer threads /// accessing this object. class LPPAPI FieldInfos : public LuceneObject { public: FieldInfos(); /// Construct a FieldInfos object using the directory and the name of the file IndexInput /// @param d The directory to open the IndexInput from /// @param name The name of the file to open the IndexInput from in the Directory FieldInfos(const DirectoryPtr& d, const String& name); virtual ~FieldInfos(); LUCENE_CLASS(FieldInfos); public: // Used internally (ie not written to *.fnm files) for pre-2.9 files static const int32_t FORMAT_PRE; // First used in 2.9; prior to 2.9 there was no format header static const int32_t FORMAT_START; static const int32_t CURRENT_FORMAT; static const uint8_t IS_INDEXED; static const uint8_t STORE_TERMVECTOR; static const uint8_t STORE_POSITIONS_WITH_TERMVECTOR; static const uint8_t STORE_OFFSET_WITH_TERMVECTOR; static const uint8_t OMIT_NORMS; static const uint8_t STORE_PAYLOADS; static const uint8_t OMIT_TERM_FREQ_AND_POSITIONS; protected: Collection byNumber; MapStringFieldInfo byName; int32_t format; public: /// Returns a deep clone of this FieldInfos instance. virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); /// Adds field info for a Document. void add(const DocumentPtr& doc); /// Returns true if any fields do not omitTermFreqAndPositions bool hasProx(); /// Add fields that are indexed. Whether they have termvectors has to be specified. /// @param names The names of the fields /// @param storeTermVectors Whether the fields store term vectors or not /// @param storePositionWithTermVector true if positions should be stored. /// @param storeOffsetWithTermVector true if offsets should be stored void addIndexed(HashSet names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector); /// Assumes the fields are not storing term vectors. /// @param names The names of the fields /// @param isIndexed Whether the fields are indexed or not /// @see #add(const String&, bool) void add(HashSet names, bool isIndexed); /// Calls 5 parameter add with false for all TermVector parameters. /// @param name The name of the Fieldable /// @param isIndexed true if the field is indexed /// @see #add(const String&, bool, bool, bool, bool) void add(const String& name, bool isIndexed); /// Calls 5 parameter add with false for term vector positions and offsets. /// @param name The name of the field /// @param isIndexed true if the field is indexed /// @param storeTermVector true if the term vector should be stored void add(const String& name, bool isIndexed, bool storeTermVector); /// If the field is not yet known, adds it. If it is known, checks to make sure that the isIndexed flag /// is the same as was given previously for this field. If not - marks it as being indexed. Same goes /// for the TermVector parameters. /// @param name The name of the field /// @param isIndexed true if the field is indexed /// @param storeTermVector true if the term vector should be stored /// @param storePositionWithTermVector true if the term vector with positions should be stored /// @param storeOffsetWithTermVector true if the term vector with offsets should be stored void add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector); /// If the field is not yet known, adds it. If it is known, checks to make sure that the isIndexed flag /// is the same as was given previously for this field. If not - marks it as being indexed. Same goes /// for the TermVector parameters. /// @param name The name of the field /// @param isIndexed true if the field is indexed /// @param storeTermVector true if the term vector should be stored /// @param storePositionWithTermVector true if the term vector with positions should be stored /// @param storeOffsetWithTermVector true if the term vector with offsets should be stored /// @param omitNorms true if the norms for the indexed field should be omitted void add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms); /// If the field is not yet known, adds it. If it is known, checks to make sure that the isIndexed /// flag is the same as was given previously for this field. If not - marks it as being indexed. /// Same goes for the TermVector parameters. /// @param name The name of the field /// @param isIndexed true if the field is indexed /// @param storeTermVector true if the term vector should be stored /// @param storePositionWithTermVector true if the term vector with positions should be stored /// @param storeOffsetWithTermVector true if the term vector with offsets should be stored /// @param omitNorms true if the norms for the indexed field should be omitted /// @param storePayloads true if payloads should be stored for this field /// @param omitTermFreqAndPositions true if term freqs should be omitted for this field FieldInfoPtr add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions); int32_t fieldNumber(const String& fieldName); FieldInfoPtr fieldInfo(const String& fieldName); /// Return the fieldName identified by its number. /// @return the fieldName or an empty string when the field with the given number doesn't exist. String fieldName(int32_t fieldNumber); /// Return the fieldinfo object referenced by the fieldNumber. /// @return the FieldInfo object or null when the given fieldNumber doesn't exist. FieldInfoPtr fieldInfo(int32_t fieldNumber); int32_t size(); bool hasVectors(); void write(const DirectoryPtr& d, const String& name); void write(const IndexOutputPtr& output); protected: FieldInfoPtr addInternal(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions); void read(const IndexInputPtr& input, const String& fileName); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldInvertState.h000066400000000000000000000035521456444476200237460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDINVERTSTATE_H #define FIELDINVERTSTATE_H #include "LuceneObject.h" namespace Lucene { /// This class tracks the number and position / offset parameters of terms being added to the index. /// The information collected in this class is also used to calculate the normalization factor for a field. class LPPAPI FieldInvertState : public LuceneObject { public: FieldInvertState(int32_t position = 0, int32_t length = 0, int32_t numOverlap = 0, int32_t offset = 0, double boost = 0); virtual ~FieldInvertState(); LUCENE_CLASS(FieldInvertState); INTERNAL: int32_t position; int32_t length; int32_t numOverlap; int32_t offset; double boost; AttributeSourcePtr attributeSource; public: /// Re-initialize the state, using this boost value. /// @param docBoost boost value to use. void reset(double docBoost); /// Get the last processed term position. /// @return the position int32_t getPosition(); /// Get total number of terms in this field. /// @return the length int32_t getLength(); /// Get the number of terms with positionIncrement == 0. /// @return the numOverlap int32_t getNumOverlap(); /// Get end offset of the last processed term. /// @return the offset int32_t getOffset(); /// Get boost value. This is the cumulative product of document boost and field boost for all field /// instances sharing the same field name. /// @return the boost double getBoost(); AttributeSourcePtr getAttributeSource(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldMaskingSpanQuery.h000066400000000000000000000061151456444476200247350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDMASKINGSPANQUERY_H #define FIELDMASKINGSPANQUERY_H #include "SpanQuery.h" namespace Lucene { /// Wrapper to allow {@link SpanQuery} objects participate in composite single-field SpanQueries by /// 'lying' about their search field. That is, the masked SpanQuery will function as normal, but /// {@link SpanQuery#getField()} simply hands back the value supplied in this class's constructor. /// /// This can be used to support Queries like {@link SpanNearQuery} or {@link SpanOrQuery} across /// different fields, which is not ordinarily permitted. /// /// This can be useful for denormalized relational data: for example, when indexing a document with /// conceptually many 'children': /// ///
/// teacherid: 1
/// studentfirstname: james
/// studentsurname: jones
///
/// teacherid: 2
/// studenfirstname: james
/// studentsurname: smith
/// studentfirstname: sally
/// studentsurname: jones
/// 
/// /// A SpanNearQuery with a slop of 0 can be applied across two {@link SpanTermQuery} objects as follows: /// ///
/// SpanQueryPtr q1 = newLucene(newLucene(L"studentfirstname", L"james"));
/// SpanQueryPtr q2 = newLucene(newLucene(L"studentsurname", L"jones"));
/// SpanQueryPtr q2m = newLucene(q2, L"studentfirstname");
///
/// Collection span = newCollection(q1, q1);
///
/// QueryPtr q = newLucene(span, -1, false);
/// 
/// to search for 'studentfirstname:james studentsurname:jones' and find teacherid 1 without matching /// teacherid 2 (which has a 'james' in position 0 and 'jones' in position 1). /// /// Note: as {@link #getField()} returns the masked field, scoring will be done using the norms of the /// field name supplied. This may lead to unexpected scoring behaviour. class LPPAPI FieldMaskingSpanQuery : public SpanQuery { public: FieldMaskingSpanQuery(const SpanQueryPtr& maskedQuery, const String& maskedField); virtual ~FieldMaskingSpanQuery(); LUCENE_CLASS(FieldMaskingSpanQuery); protected: SpanQueryPtr maskedQuery; String field; public: using SpanQuery::toString; virtual String getField(); SpanQueryPtr getMaskedQuery(); virtual SpansPtr getSpans(const IndexReaderPtr& reader); virtual void extractTerms(SetTerm terms); virtual WeightPtr createWeight(const SearcherPtr& searcher); virtual SimilarityPtr getSimilarity(const SearcherPtr& searcher); virtual QueryPtr rewrite(const IndexReaderPtr& reader); virtual String toString(const String& field); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); /// Returns a clone of this query. virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldScoreQuery.h000066400000000000000000000057331456444476200236020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDSCOREQUERY_H #define FIELDSCOREQUERY_H #include "ValueSourceQuery.h" namespace Lucene { /// A query that scores each document as the value of the numeric input field. /// /// The query matches all documents, and scores each document according to the numeric value of that field. /// /// It is assumed, and expected, that: ///
    ///
  • The field used here is indexed, and has exactly one token in every scored document. ///
  • Best if this field is un_tokenized. ///
  • That token is parseable to the selected type. ///
/// /// Combining this query in a FunctionQuery allows much freedom in affecting document scores. Note, that /// with this freedom comes responsibility: it is more than likely that the default Lucene scoring is superior /// in quality to scoring modified as explained here. However, in some cases, and certainly for research /// experiments, this capability may turn useful. /// /// When constructing this query, select the appropriate type. That type should match the data stored in the /// field. So in fact the "right" type should be selected before indexing. Type selection has effect on the /// RAM usage: ///
    ///
  • Byte consumes 1 * maxDocs bytes. ///
  • Int consumes 4 * maxDocs bytes. ///
  • Double consumes 8 * maxDocs bytes. ///
/// /// Caching: Values for the numeric field are loaded once and cached in memory for further use with the same /// IndexReader. To take advantage of this, it is extremely important to reuse index-readers or index- /// searchers, otherwise, for instance if for each query a new index reader is opened, large penalties would /// be paid for loading the field values into memory over and over again. class LPPAPI FieldScoreQuery : public ValueSourceQuery { public: /// Type of score field, indicating how field values are interpreted/parsed. enum Type { /// Field values are interpreted as numeric byte values. BYTE, /// Field values are interpreted as numeric integer values. INT, /// Field values are interpreted as numeric double values. DOUBLE }; /// Create a FieldScoreQuery - a query that scores each document as the value of the numeric input field. /// The type param tells how to parse the field string values into a numeric score value. /// @param field the numeric field to be used. /// @param type the type of the field. FieldScoreQuery(const String& field, Type type); virtual ~FieldScoreQuery(); LUCENE_CLASS(FieldScoreQuery); public: /// Create the appropriate (cached) field value source. static ValueSourcePtr getValueSource(const String& field, Type type); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldSelector.h000066400000000000000000000061141456444476200232530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDSELECTOR_H #define FIELDSELECTOR_H #include "LuceneObject.h" namespace Lucene { /// The FieldSelector allows one to make decisions about what Fields get loaded on a {@link Document} by /// {@link IndexReader#document(int32_t, FieldSelector)} class LPPAPI FieldSelector : public LuceneObject { protected: FieldSelector(); public: virtual ~FieldSelector(); LUCENE_CLASS(FieldSelector); public: /// Provides information about what should be done with this Field enum FieldSelectorResult { /// Null value SELECTOR_NULL, /// Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is /// encountered. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should /// not return null. /// {@link Document#add(Fieldable)} should be called by the Reader. SELECTOR_LOAD, /// Lazily load this {@link Field}. This means the {@link Field} is valid, but it may not actually /// contain its data until invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link /// Document#getFieldable(String)} is safe to use and should return a valid instance of a {@link /// Fieldable}. /// {@link Document#add(Fieldable)} should be called by the Reader. SELECTOR_LAZY_LOAD, /// Do not load the {@link Field}. {@link Document#getField(String)} and {@link /// Document#getFieldable(String)} should return null. {@link Document#add(Fieldable)} is not called. /// {@link Document#add(Fieldable)} should not be called by the Reader. SELECTOR_NO_LOAD, /// Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading /// for the {@link Document}. Thus, the Document may not have its complete set of Fields. {@link /// Document#getField(String)} and {@link Document#getFieldable(String)} should both be valid for /// this {@link Field} /// {@link Document#add(Fieldable)} should be called by the Reader. SELECTOR_LOAD_AND_BREAK, /// Load the size of this {@link Field} rather than its value. Size is measured as number of bytes /// required to store the field == bytes for a binary or any compressed value, and 2*chars for a String /// value. The size is stored as a binary value, represented as an int in a byte[], with the higher /// order byte first in [0] SELECTOR_SIZE, /// Like {@link #SIZE} but immediately break from the field loading loop, i.e., stop loading further /// fields, after the size is loaded SELECTOR_SIZE_AND_BREAK }; public: virtual FieldSelectorResult accept(const String& fieldName) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldSortedTermVectorMapper.h000066400000000000000000000036751456444476200261240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDSORTEDTERMVECTORMAPPER_H #define FIELDSORTEDTERMVECTORMAPPER_H #include #include "TermVectorMapper.h" namespace Lucene { /// For each Field, store a sorted collection of {@link TermVectorEntry}s /// This is not thread-safe. class LPPAPI FieldSortedTermVectorMapper : public TermVectorMapper { public: /// @param comparator A Comparator for sorting {@link TermVectorEntry}s FieldSortedTermVectorMapper(TermVectorEntryComparator comparator); FieldSortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator); virtual ~FieldSortedTermVectorMapper(); LUCENE_CLASS(FieldSortedTermVectorMapper); protected: MapStringCollectionTermVectorEntry fieldToTerms; Collection currentSet; String currentField; TermVectorEntryComparator comparator; public: /// Map the Term Vector information into your own structure virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions); /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions); /// Get the mapping between fields and terms, sorted by the comparator /// @return A map between field names and {@link java.util.SortedSet}s per field. SortedSet entries are /// {@link TermVectorEntry} MapStringCollectionTermVectorEntry getFieldToTerms(); TermVectorEntryComparator getComparator(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldValueHitQueue.h000066400000000000000000000047451456444476200242310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDVALUEHITQUEUE_H #define FIELDVALUEHITQUEUE_H #include "HitQueueBase.h" #include "ScoreDoc.h" namespace Lucene { /// A hit queue for sorting by hits by terms in more than one field. Uses FieldCache::DEFAULT for maintaining /// internal term lookup tables. /// @see Searcher#search(QueryPtr, FilterPtr, int32_t, SortPtr) /// @see FieldCache class LPPAPI FieldValueHitQueue : public HitQueueBase { protected: FieldValueHitQueue(Collection fields, int32_t size); public: virtual ~FieldValueHitQueue(); LUCENE_CLASS(FieldValueHitQueue); protected: /// Stores the sort criteria being used. Collection fields; Collection comparators; Collection reverseMul; public: /// Creates a hit queue sorted by the given list of fields. /// @param fields SortField array we are sorting by in priority order (highest priority first); cannot /// be null or empty. /// @param size The number of hits to retain. Must be greater than zero. static FieldValueHitQueuePtr create(Collection fields, int32_t size); Collection getComparators(); Collection getReverseMul(); /// Given a queue Entry, creates a corresponding FieldDoc that contains the values used to sort the given /// document. These values are not the raw values out of the index, but the internal representation of /// them. This is so the given search hit can be collated by a MultiSearcher with other search hits. /// @param entry The Entry used to create a FieldDoc /// @return The newly created FieldDoc /// @see Searchable#search(WeightPtr, FilterPtr, int32_t, SortPtr) FieldDocPtr fillFields(const FieldValueHitQueueEntryPtr& entry); /// Returns the SortFields being used by this hit queue. Collection getFields(); }; class LPPAPI FieldValueHitQueueEntry : public ScoreDoc { public: FieldValueHitQueueEntry(int32_t slot, int32_t doc, double score); virtual ~FieldValueHitQueueEntry(); LUCENE_CLASS(FieldValueHitQueueEntry); public: int32_t slot; public: virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Fieldable.h000066400000000000000000000167641456444476200224120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDABLE_H #define FIELDABLE_H #include "LuceneObject.h" namespace Lucene { /// Synonymous with {@link Field}. /// /// WARNING: This interface may change within minor versions, despite Lucene's backward compatibility /// requirements. This means new methods may be added from version to version. This change only /// affects the Fieldable API; other backwards compatibility promises remain intact. For example, Lucene /// can still read and write indices created within the same major version. class LPPAPI Fieldable { public: LUCENE_INTERFACE(Fieldable); virtual ~Fieldable() {} public: /// Sets the boost factor hits on this field. This value will be multiplied into the score of all /// hits on this this field of this document. /// /// The boost is multiplied by {@link Document#getBoost()} of the document containing this field. /// If a document has multiple fields with the same name, all such values are multiplied together. /// This product is then used to compute the norm factor for the field. By default, in the {@link /// Similarity#computeNorm(String, FieldInvertState)} method, the boost value is multiplied by the /// {@link Similarity#lengthNorm(String,int)} and then rounded by {@link Similarity#encodeNorm(double)} /// before it is stored in the index. One should attempt to ensure that this product does not overflow /// the range of that encoding. /// /// @see Document#setBoost(double) /// @see Similarity#computeNorm(String, FieldInvertState) /// @see Similarity#encodeNorm(double) virtual void setBoost(double boost) = 0; /// Returns the boost factor for hits for this field. /// /// The default value is 1.0. /// /// Note: this value is not stored directly with the document in the index. Documents returned from /// {@link IndexReader#document(int)} and {@link Searcher#doc(int)} may thus not have the same value /// present as when this field was indexed. virtual double getBoost() = 0; /// Returns the name of the field as an interned string. For example "date", "title", "body", ... virtual String name() = 0; /// The value of the field as a String, or empty. /// /// For indexing, if isStored()==true, the stringValue() will be used as the stored field value /// unless isBinary()==true, in which case getBinaryValue() will be used. /// /// If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token. /// If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate /// indexed tokens if not null, else readerValue() will be used to generate indexed tokens if not null, /// else stringValue() will be used to generate tokens. virtual String stringValue() = 0; /// The value of the field as a Reader, which can be used at index time to generate indexed tokens. /// @see #stringValue() virtual ReaderPtr readerValue() = 0; /// The TokenStream for this field to be used when indexing, or null. /// @see #stringValue() virtual TokenStreamPtr tokenStreamValue() = 0; /// True if the value of the field is to be stored in the index for return with search hits. virtual bool isStored() = 0; /// True if the value of the field is to be indexed, so that it may be searched on. virtual bool isIndexed() = 0; /// True if the value of the field should be tokenized as text prior to indexing. Un-tokenized fields /// are indexed as a single word and may not be Reader-valued. virtual bool isTokenized() = 0; /// True if the term or terms used to index this field are stored as a term vector, available from /// {@link IndexReader#getTermFreqVector(int,String)}. These methods do not provide access to the /// original content of the field, only to terms used to index it. If the original content must be /// preserved, use the stored attribute instead. virtual bool isTermVectorStored() = 0; /// True if terms are stored as term vector together with their offsets (start and end position in /// source text). virtual bool isStoreOffsetWithTermVector() = 0; /// True if terms are stored as term vector together with their token positions. virtual bool isStorePositionWithTermVector() = 0; /// True if the value of the field is stored as binary. virtual bool isBinary() = 0; /// True if norms are omitted for this indexed field. virtual bool getOmitNorms() = 0; /// If set, omit normalization factors associated with this indexed field. /// This effectively disables indexing boosts and length normalization for this field. virtual void setOmitNorms(bool omitNorms) = 0; /// Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field /// is lazily loaded, retrieving it's values via {@link #stringValue()} or {@link #getBinaryValue()} /// is only valid as long as the {@link IndexReader} that retrieved the {@link Document} is still open. /// /// @return true if this field can be loaded lazily virtual bool isLazy() = 0; /// Returns offset into byte[] segment that is used as value, if Field is not binary returned value is /// undefined. /// @return index of the first character in byte[] segment that represents this Field value. virtual int32_t getBinaryOffset() = 0; /// Returns length of byte[] segment that is used as value, if Field is not binary returned value is /// undefined. /// @return length of byte[] segment that represents this Field value. virtual int32_t getBinaryLength() = 0; /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. /// @return reference to the Field value as byte[]. virtual ByteArray getBinaryValue() = 0; /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. /// /// About reuse: if you pass in the result byte[] and it is used, likely the underlying implementation will /// hold onto this byte[] and return it in future calls to {@link #getBinaryValue()}. So if you subsequently /// re-use the same byte[] elsewhere it will alter this Fieldable's value. /// @param result User defined buffer that will be used if possible. If this is null or not large enough, /// a new buffer is allocated /// @return reference to the Field value as byte[]. virtual ByteArray getBinaryValue(ByteArray result) = 0; /// @see #setOmitTermFreqAndPositions virtual bool getOmitTermFreqAndPositions() = 0; /// If set, omit term freq, positions and payloads from postings for this field. /// /// NOTE: While this option reduces storage space required in the index, it also means any query requiring /// positional information, such as {@link PhraseQuery} or {@link SpanQuery} subclasses will silently fail /// to find results. virtual void setOmitTermFreqAndPositions(bool omitTermFreqAndPositions) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldsReader.h000066400000000000000000000134231456444476200230610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDSREADER_H #define FIELDSREADER_H #include "AbstractField.h" #include "CloseableThreadLocal.h" namespace Lucene { /// Class responsible for access to stored document fields. It uses .fdt and .fdx; files. class LPPAPI FieldsReader : public LuceneObject { public: /// Used only by clone FieldsReader(const FieldInfosPtr& fieldInfos, int32_t numTotalDocs, int32_t size, int32_t format, int32_t formatSize, int32_t docStoreOffset, const IndexInputPtr& cloneableFieldsStream, const IndexInputPtr& cloneableIndexStream); FieldsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn); FieldsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn, int32_t readBufferSize, int32_t docStoreOffset = -1, int32_t size = 0); virtual ~FieldsReader(); LUCENE_CLASS(FieldsReader); protected: FieldInfosPtr fieldInfos; // The main fieldStream, used only for cloning. IndexInputPtr cloneableFieldsStream; // This is a clone of cloneableFieldsStream used for reading documents. It should not be cloned outside of a // synchronized context. IndexInputPtr fieldsStream; IndexInputPtr cloneableIndexStream; IndexInputPtr indexStream; int32_t numTotalDocs; int32_t _size; bool closed; int32_t format; int32_t formatSize; // The docID offset where our docs begin in the index file. This will be 0 if we have our own private file. int32_t docStoreOffset; CloseableThreadLocal fieldsStreamTL; bool isOriginal; public: /// Returns a cloned FieldsReader that shares open IndexInputs with the original one. It is the caller's job not to /// close the original FieldsReader until all clones are called (eg, currently SegmentReader manages this logic). virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); /// Closes the underlying {@link IndexInput} streams, including any ones associated with a lazy implementation of a /// Field. This means that the Fields values will not be accessible. void close(); int32_t size(); bool canReadRawDocs(); DocumentPtr doc(int32_t n, const FieldSelectorPtr& fieldSelector); /// Returns the length in bytes of each raw document in a contiguous range of length numDocs starting with startDocID. /// Returns the IndexInput (the fieldStream), already seeked to the starting point for startDocID. IndexInputPtr rawDocs(Collection lengths, int32_t startDocID, int32_t numDocs); protected: void ConstructReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn, int32_t readBufferSize, int32_t docStoreOffset, int32_t size); void ensureOpen(); void seekIndex(int32_t docID); /// Skip the field. We still have to read some of the information about the field, but can skip past the actual content. /// This will have the most payoff on large fields. void skipField(bool binary, bool compressed); void skipField(bool binary, bool compressed, int32_t toRead); void addFieldLazy(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed, bool tokenize); void addField(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed, bool tokenize); /// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes). /// Read just the size - caller must skip the field content to continue reading fields. Return the size in bytes or chars, /// depending on field type. int32_t addFieldSize(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed); ByteArray uncompress(ByteArray b); String uncompressString(ByteArray b); friend class LazyField; }; class LazyField : public AbstractField { public: LazyField(const FieldsReaderPtr& reader, const String& name, Store store, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed); LazyField(const FieldsReaderPtr& reader, const String& name, Store store, Index index, TermVector termVector, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed); virtual ~LazyField(); LUCENE_CLASS(LazyField); protected: FieldsReaderWeakPtr _reader; int32_t toRead; int64_t pointer; /// @deprecated Only kept for backward-compatibility with <3.0 indexes. bool isCompressed; public: /// The value of the field as a Reader, or null. If null, the String value, binary value, or TokenStream value is used. /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set. ReaderPtr readerValue(); /// The value of the field as a TokenStream, or null. If null, the Reader value, String value, or binary value is used. /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set. TokenStreamPtr tokenStreamValue(); /// The value of the field as a String, or null. If null, the Reader value, binary value, or TokenStream value is used. /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set. String stringValue(); int64_t getPointer(); void setPointer(int64_t pointer); int32_t getToRead(); void setToRead(int32_t toRead); /// Return the raw byte[] for the binary field. virtual ByteArray getBinaryValue(ByteArray result); protected: IndexInputPtr getFieldStream(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FieldsWriter.h000066400000000000000000000043731456444476200231370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FIELDSWRITER_H #define FIELDSWRITER_H #include "LuceneObject.h" namespace Lucene { class FieldsWriter : public LuceneObject { public: FieldsWriter(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn); FieldsWriter(const IndexOutputPtr& fdx, const IndexOutputPtr& fdt, const FieldInfosPtr& fn); virtual ~FieldsWriter(); LUCENE_CLASS(FieldsWriter); protected: FieldInfosPtr fieldInfos; IndexOutputPtr fieldsStream; IndexOutputPtr indexStream; bool doClose; public: static const uint8_t FIELD_IS_TOKENIZED; static const uint8_t FIELD_IS_BINARY; static const uint8_t FIELD_IS_COMPRESSED; static const int32_t FORMAT; // Original format static const int32_t FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; // Changed strings to UTF8 static const int32_t FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; // Lucene 3.0: Removal of compressed fields // NOTE: if you introduce a new format, make it 1 higher than the current one, and always change this // if you switch to a new format! static const int32_t FORMAT_CURRENT; public: void setFieldsStream(const IndexOutputPtr& stream); /// Writes the contents of buffer into the fields stream and adds a new entry for this document into the index /// stream. This assumes the buffer was already written in the correct fields format. void flushDocument(int32_t numStoredFields, const RAMOutputStreamPtr& buffer); void skipDocument(); void flush(); void close(); void writeField(const FieldInfoPtr& fi, const FieldablePtr& field); /// Bulk write a contiguous series of documents. The lengths array is the length (in bytes) of each raw document. /// The stream IndexInput is the fieldsStream from which we should bulk-copy all bytes. void addRawDocuments(const IndexInputPtr& stream, Collection lengths, int32_t numDocs); void addDocument(const DocumentPtr& doc); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FileReader.h000066400000000000000000000024741456444476200225360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILEREADER_H #define FILEREADER_H #include "Reader.h" namespace Lucene { /// Convenience class for reading character files. class LPPAPI FileReader : public Reader { public: /// Creates a new FileReader, given the file name to read from. FileReader(const String& fileName); virtual ~FileReader(); LUCENE_CLASS(FileReader); protected: ifstreamPtr file; int64_t _length; ByteArray fileBuffer; public: static const int32_t FILE_EOF; static const int32_t FILE_ERROR; public: /// Read a single character. virtual int32_t read(); /// Read characters into a portion of an array. virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); /// Close the stream. virtual void close(); /// Tell whether this stream supports the mark() operation virtual bool markSupported(); /// Reset the stream. virtual void reset(); /// The number of bytes in the file. virtual int64_t length(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FileSwitchDirectory.h000066400000000000000000000055251456444476200244620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILESWITCHDIRECTORY_H #define FILESWITCHDIRECTORY_H #include "Directory.h" namespace Lucene { /// A Directory instance that switches files between two other /// Directory instances. /// /// Files with the specified extensions are placed in the primary /// directory; others are placed in the secondary directory. The /// provided Set must not change once passed to this class, and /// must allow multiple threads to call contains at once. class LPPAPI FileSwitchDirectory : public Directory { public: FileSwitchDirectory(HashSet primaryExtensions, const DirectoryPtr& primaryDir, const DirectoryPtr& secondaryDir, bool doClose); virtual ~FileSwitchDirectory(); LUCENE_CLASS(FileSwitchDirectory); protected: HashSet primaryExtensions; DirectoryPtr primaryDir; DirectoryPtr secondaryDir; bool doClose; public: /// Return the primary directory. DirectoryPtr getPrimaryDir(); /// Return the secondary directory. DirectoryPtr getSecondaryDir(); /// Closes the store. virtual void close(); /// Returns an array of strings, one for each file in the directory. virtual HashSet listAll(); /// Utility method to return a file's extension. static String getExtension(const String& name); /// Returns true if a file with the given name exists. virtual bool fileExists(const String& name); /// Returns the time the named file was last modified. virtual uint64_t fileModified(const String& name); /// Set the modified time of an existing file to now. virtual void touchFile(const String& name); /// Removes an existing file in the directory. virtual void deleteFile(const String& name); /// Returns the length of a file in the directory. virtual int64_t fileLength(const String& name); /// Creates a new, empty file in the directory with the given name. /// Returns a stream writing this file. virtual IndexOutputPtr createOutput(const String& name); /// Ensure that any writes to this file are moved to stable storage. /// Lucene uses this to properly commit changes to the index, to /// prevent a machine/OS crash from corrupting the index. virtual void sync(const String& name); /// Returns a stream reading an existing file, with the specified /// read buffer size. The particular Directory implementation may /// ignore the buffer size. virtual IndexInputPtr openInput(const String& name); protected: DirectoryPtr getDirectory(const String& name); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FileUtils.h000066400000000000000000000041311456444476200224240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILEUTILS_H #define FILEUTILS_H #include "Lucene.h" namespace Lucene { namespace FileUtils { /// Return true if given file or directory exists. LPPAPI bool fileExists(const String& path); /// Return file last modified date and time. LPPAPI uint64_t fileModified(const String& path); /// Set file last modified date and time to now. LPPAPI bool touchFile(const String& path); /// Return file length in bytes. LPPAPI int64_t fileLength(const String& path); /// Set new file length, truncating or expanding as required. LPPAPI bool setFileLength(const String& path, int64_t length); /// Delete file from file system. LPPAPI bool removeFile(const String& path); /// Copy a file to/from file system. LPPAPI bool copyFile(const String& source, const String& dest); /// Create new directory under given location. LPPAPI bool createDirectory(const String& path); /// Delete directory from file system. LPPAPI bool removeDirectory(const String& path); /// Return true if given path points to a directory. LPPAPI bool isDirectory(const String& path); /// Return list of files (and/or directories) under given directory. /// @param path path to list directory. /// @param filesOnly if true the exclude sub-directories. /// @param dirList list of files to return. LPPAPI bool listDirectory(const String& path, bool filesOnly, HashSet dirList); /// Copy a directory to/from file system. LPPAPI bool copyDirectory(const String& source, const String& dest); /// Return complete path after joining given directory and file name. LPPAPI String joinPath(const String& path, const String& file); /// Extract parent path from given path. LPPAPI String extractPath(const String& path); /// Extract file name from given path. LPPAPI String extractFile(const String& path); } } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Filter.h000066400000000000000000000031551456444476200217560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILTER_H #define FILTER_H #include "LuceneObject.h" namespace Lucene { /// Abstract base class for restricting which documents may be returned during searching. class LPPAPI Filter : public LuceneObject { public: virtual ~Filter(); LUCENE_CLASS(Filter); public: /// Creates a {@link DocIdSet} enumerating the documents that should be permitted in search results. /// /// Note: null can be returned if no documents are accepted by this Filter. /// /// Note: This method will be called once per segment in the index during searching. The returned /// {@link DocIdSet} must refer to document IDs for that segment, not for the top-level reader. /// /// @param reader a {@link IndexReader} instance opened on the index currently searched on. Note, /// it is likely that the provided reader does not represent the whole underlying index ie. if the /// index has more than one segment the given reader only represents a single segment. /// @return a DocIdSet that provides the documents which should be permitted or prohibited in search /// results. NOTE: null can be returned if no documents will be accepted by this Filter. /// /// @see DocIdBitSet virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FilterIndexReader.h000066400000000000000000000114731456444476200240730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILTERINDEXREADER_H #define FILTERINDEXREADER_H #include "IndexReader.h" #include "TermPositions.h" #include "TermEnum.h" namespace Lucene { /// A FilterIndexReader contains another IndexReader, which it uses as its basic source of data, possibly /// transforming the data along the way or providing additional functionality. The class FilterIndexReader /// itself simply implements all abstract methods of IndexReader with versions that pass all requests to /// the contained index reader. Subclasses of FilterIndexReader may further override some of these methods /// and may also provide additional methods and fields. class LPPAPI FilterIndexReader : public IndexReader { public: /// Construct a FilterIndexReader based on the specified base reader. Directory locking for delete, /// undeleteAll, and setNorm operations is left to the base reader. /// /// Note that base reader is closed if this FilterIndexReader is closed. /// @param in specified base reader. FilterIndexReader(const IndexReaderPtr& in); virtual ~FilterIndexReader(); LUCENE_CLASS(FilterIndexReader); protected: IndexReaderPtr in; public: virtual DirectoryPtr directory(); virtual Collection getTermFreqVectors(int32_t docNumber); virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper); virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper); virtual int32_t numDocs(); virtual int32_t maxDoc(); virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector); virtual bool isDeleted(int32_t n); virtual bool hasDeletions(); virtual bool hasNorms(const String& field); virtual ByteArray norms(const String& field); virtual void norms(const String& field, ByteArray norms, int32_t offset); virtual TermEnumPtr terms(); virtual TermEnumPtr terms(const TermPtr& t); virtual int32_t docFreq(const TermPtr& t); virtual TermDocsPtr termDocs(); virtual TermDocsPtr termDocs(const TermPtr& term); virtual TermPositionsPtr termPositions(); virtual HashSet getFieldNames(FieldOption fieldOption); virtual int64_t getVersion(); virtual bool isCurrent(); virtual bool isOptimized(); virtual Collection getSequentialSubReaders(); /// If the subclass of FilteredIndexReader modifies the contents of the FieldCache, you must /// override this method to provide a different key virtual LuceneObjectPtr getFieldCacheKey(); /// If the subclass of FilteredIndexReader modifies the deleted docs, you must override this /// method to provide a different key virtual LuceneObjectPtr getDeletesCacheKey(); protected: virtual void doUndeleteAll(); virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); virtual void doDelete(int32_t docNum); virtual void doCommit(MapStringString commitUserData); virtual void doClose(); }; /// Base class for filtering {@link TermDocs} implementations. class LPPAPI FilterTermDocs : public TermPositions, public LuceneObject { public: FilterTermDocs(const TermDocsPtr& in); virtual ~FilterTermDocs(); LUCENE_CLASS(FilterTermDocs); protected: TermDocsPtr in; public: virtual void seek(const TermPtr& term); virtual void seek(const TermEnumPtr& termEnum); virtual int32_t doc(); virtual int32_t freq(); virtual bool next(); virtual int32_t read(Collection& docs, Collection& freqs); virtual bool skipTo(int32_t target); virtual void close(); }; /// Base class for filtering {@link TermPositions} implementations. class LPPAPI FilterTermPositions : public FilterTermDocs { public: FilterTermPositions(const TermPositionsPtr& in); virtual ~FilterTermPositions(); LUCENE_CLASS(FilterTermPositions); public: virtual int32_t nextPosition(); virtual int32_t getPayloadLength(); virtual ByteArray getPayload(ByteArray data, int32_t offset); virtual bool isPayloadAvailable(); }; /// Base class for filtering {@link TermEnum} implementations. class LPPAPI FilterTermEnum : public TermEnum { public: FilterTermEnum(const TermEnumPtr& in); virtual ~FilterTermEnum(); LUCENE_CLASS(FilterTermEnum); protected: TermEnumPtr in; public: virtual bool next(); virtual TermPtr term(); virtual int32_t docFreq(); virtual void close(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FilterManager.h000066400000000000000000000044031456444476200232460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILTERMANAGER_H #define FILTERMANAGER_H #include "LuceneObject.h" namespace Lucene { /// Filter caching singleton. It can be used to save filters locally for reuse. Also could be used as a /// persistent storage for any filter as long as the filter provides a proper hashCode(), as that is used /// as the key in the cache. /// /// The cache is periodically cleaned up from a separate thread to ensure the cache doesn't exceed the /// maximum size. class LPPAPI FilterManager : public LuceneObject { public: /// Sets up the FilterManager singleton. FilterManager(); virtual ~FilterManager(); LUCENE_CLASS(FilterManager); protected: /// The default maximum number of Filters in the cache static const int32_t DEFAULT_CACHE_CLEAN_SIZE; /// The default frequency of cache cleanup static const int64_t DEFAULT_CACHE_SLEEP_TIME; /// The cache itself MapIntFilterItem cache; /// Maximum allowed cache size int32_t cacheCleanSize; /// Cache cleaning frequency int64_t cleanSleepTime; /// Cache cleaner that runs in a separate thread FilterCleanerPtr filterCleaner; public: virtual void initialize(); static FilterManagerPtr getInstance(); /// Sets the max size that cache should reach before it is cleaned up /// @param cacheCleanSize maximum allowed cache size void setCacheSize(int32_t cacheCleanSize); /// Sets the cache cleaning frequency in milliseconds. /// @param cleanSleepTime cleaning frequency in milliseconds void setCleanThreadSleepTime(int64_t cleanSleepTime); /// Returns the cached version of the filter. Allows the caller to pass up a small filter but this will /// keep a persistent version around and allow the caching filter to do its job. /// @param filter The input filter /// @return The cached version of the filter FilterPtr getFilter(const FilterPtr& filter); friend class FilterCleaner; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FilteredDocIdSet.h000066400000000000000000000036271456444476200236520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILTEREDDOCIDSET_H #define FILTEREDDOCIDSET_H #include "DocIdSet.h" namespace Lucene { /// Abstract decorator class for a DocIdSet implementation that provides on-demand filtering/validation /// mechanism on a given DocIdSet. /// /// Technically, this same functionality could be achieved with ChainedFilter (under contrib/misc), however /// the benefit of this class is it never materializes the full bitset for the filter. Instead, the {@link /// #match} method is invoked on-demand, per docID visited during searching. If you know few docIDs will /// be visited, and the logic behind {@link #match} is relatively costly, this may be a better way to filter /// than ChainedFilter. /// @see DocIdSet class LPPAPI FilteredDocIdSet : public DocIdSet { public: /// @param innerSet Underlying DocIdSet FilteredDocIdSet(const DocIdSetPtr& innerSet); virtual ~FilteredDocIdSet(); LUCENE_CLASS(FilteredDocIdSet); protected: DocIdSetPtr innerSet; public: /// This DocIdSet implementation is cacheable if the inner set is cacheable. virtual bool isCacheable(); /// Implementation of the contract to build a DocIdSetIterator. /// @see DocIdSetIterator /// @see FilteredDocIdSetIterator virtual DocIdSetIteratorPtr iterator(); protected: /// Validation method to determine whether a docid should be in the result set. /// @param docid docid to be tested /// @return true if input docid should be in the result set, false otherwise. virtual bool match(int32_t docid) = 0; friend class DefaultFilteredDocIdSetIterator; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FilteredDocIdSetIterator.h000066400000000000000000000026401456444476200253560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILTEREDDOCIDSETITERATOR_H #define FILTEREDDOCIDSETITERATOR_H #include "DocIdSetIterator.h" namespace Lucene { /// Abstract decorator class of a DocIdSetIterator implementation that provides on-demand filter/validation /// mechanism on an underlying DocIdSetIterator. See {@link FilteredDocIdSet}. class LPPAPI FilteredDocIdSetIterator : public DocIdSetIterator { public: /// @param innerIter Underlying DocIdSetIterator. FilteredDocIdSetIterator(const DocIdSetIteratorPtr& innerIter); virtual ~FilteredDocIdSetIterator(); LUCENE_CLASS(FilteredDocIdSetIterator); protected: DocIdSetIteratorPtr innerIter; int32_t doc; public: virtual int32_t docID(); virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); protected: /// Validation method to determine whether a docid should be in the result set. /// @param doc docid to be tested /// @return true if input docid should be in the result set, false otherwise. /// @see #FilteredDocIdSetIterator(DocIdSetIterator). virtual bool match(int32_t docid) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FilteredQuery.h000066400000000000000000000037631456444476200233220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILTEREDQUERY_H #define FILTEREDQUERY_H #include "Query.h" namespace Lucene { /// A query that applies a filter to the results of another query. /// /// Note: the bits are retrieved from the filter each time this query is used in a search - use a /// CachingWrapperFilter to avoid regenerating the bits every time. /// /// @see CachingWrapperFilter class LPPAPI FilteredQuery : public Query { public: /// Constructs a new query which applies a filter to the results of the original query. /// Filter::getDocIdSet() will be called every time this query is used in a search. /// @param query Query to be filtered, cannot be null. /// @param filter Filter to apply to query results, cannot be null. FilteredQuery(const QueryPtr& query, const FilterPtr& filter); virtual ~FilteredQuery(); LUCENE_CLASS(FilteredQuery); private: QueryPtr query; FilterPtr filter; public: using Query::toString; /// Returns a Weight that applies the filter to the enclosed query's Weight. /// This is accomplished by overriding the Scorer returned by the Weight. virtual WeightPtr createWeight(const SearcherPtr& searcher); /// Rewrites the wrapped query. virtual QueryPtr rewrite(const IndexReaderPtr& reader); QueryPtr getQuery(); FilterPtr getFilter(); virtual void extractTerms(SetTerm terms); /// Prints a user-readable version of this query. virtual String toString(const String& field); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); friend class FilteredQueryWeight; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FilteredTermEnum.h000066400000000000000000000036251456444476200237460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FILTEREDTERMENUM_H #define FILTEREDTERMENUM_H #include "TermEnum.h" namespace Lucene { /// Abstract class for enumerating a subset of all terms. /// /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than /// all that precede it. class LPPAPI FilteredTermEnum : public TermEnum { public: virtual ~FilteredTermEnum(); LUCENE_CLASS(FilteredTermEnum); protected: /// The current term TermPtr currentTerm; /// The delegate enum - to set this member use {@link #setEnum} TermEnumPtr actualEnum; public: /// Equality measure on the term virtual double difference() = 0; /// Returns the docFreq of the current Term in the enumeration. /// Returns -1 if no Term matches or all terms have been enumerated. virtual int32_t docFreq(); /// Increments the enumeration to the next element. True if one exists. virtual bool next(); /// Returns the current Term in the enumeration. /// Returns null if no Term matches or all terms have been enumerated. virtual TermPtr term(); /// Closes the enumeration to further activity, freeing resources. virtual void close(); protected: /// Equality compare on the term virtual bool termCompare(const TermPtr& term) = 0; /// Indicates the end of the enumeration has been reached virtual bool endEnum() = 0; /// Use this method to set the actual TermEnum (eg. in ctor), it will be automatically positioned /// on the first matching term. virtual void setEnum(const TermEnumPtr& actualEnum); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FlagsAttribute.h000066400000000000000000000026141456444476200234500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FLAGSATTRIBUTE_H #define FLAGSATTRIBUTE_H #include "Attribute.h" namespace Lucene { /// This attribute can be used to pass different flags down the tokenizer chain, eg from one TokenFilter /// to another one. class LPPAPI FlagsAttribute : public Attribute { public: FlagsAttribute(); virtual ~FlagsAttribute(); LUCENE_CLASS(FlagsAttribute); protected: int32_t flags; public: virtual String toString(); /// Get the bitset for any bits that have been set. This is completely distinct from {@link /// TypeAttribute#type()}, although they do share similar purposes. The flags can be used to encode /// information about the token for use by other {@link TokenFilter}s. virtual int32_t getFlags(); /// @see #getFlags() virtual void setFlags(int32_t flags); virtual void clear(); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual void copyTo(const AttributePtr& target); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FormatPostingsDocsConsumer.h000066400000000000000000000016421456444476200260340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATPOSTINGSDOCSCONSUMER_H #define FORMATPOSTINGSDOCSCONSUMER_H #include "LuceneObject.h" namespace Lucene { class FormatPostingsDocsConsumer : public LuceneObject { public: virtual ~FormatPostingsDocsConsumer(); LUCENE_CLASS(FormatPostingsDocsConsumer); public: /// Adds a new doc in this term. If this returns null then we just skip consuming positions/payloads. virtual FormatPostingsPositionsConsumerPtr addDoc(int32_t docID, int32_t termDocFreq) = 0; /// Called when we are done adding docs to this term virtual void finish() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FormatPostingsDocsWriter.h000066400000000000000000000032471456444476200255200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATPOSTINGSDOCSWRITER_H #define FORMATPOSTINGSDOCSWRITER_H #include "FormatPostingsDocsConsumer.h" namespace Lucene { /// Consumes doc & freq, writing them using the current index file format class FormatPostingsDocsWriter : public FormatPostingsDocsConsumer { public: FormatPostingsDocsWriter(const SegmentWriteStatePtr& state, const FormatPostingsTermsWriterPtr& parent); virtual ~FormatPostingsDocsWriter(); LUCENE_CLASS(FormatPostingsDocsWriter); public: IndexOutputPtr out; FormatPostingsTermsWriterWeakPtr _parent; SegmentWriteStatePtr state; FormatPostingsPositionsWriterPtr posWriter; DefaultSkipListWriterPtr skipListWriter; int32_t skipInterval; int32_t totalNumDocs; bool omitTermFreqAndPositions; bool storePayloads; int64_t freqStart; FieldInfoPtr fieldInfo; int32_t lastDocID; int32_t df; TermInfoPtr termInfo; // minimize consing UTF8ResultPtr utf8; public: virtual void initialize(); void setField(const FieldInfoPtr& fieldInfo); /// Adds a new doc in this term. If this returns null then we just skip consuming positions/payloads. virtual FormatPostingsPositionsConsumerPtr addDoc(int32_t docID, int32_t termDocFreq); /// Called when we are done adding docs to this term virtual void finish(); void close(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FormatPostingsFieldsConsumer.h000066400000000000000000000020261456444476200263470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATPOSTINGSFIELDSCONSUMER_H #define FORMATPOSTINGSFIELDSCONSUMER_H #include "LuceneObject.h" namespace Lucene { /// Abstract API that consumes terms, doc, freq, prox and payloads postings. Concrete implementations of this /// actually do "something" with the postings (write it into the index in a specific format). class FormatPostingsFieldsConsumer : public LuceneObject { public: virtual ~FormatPostingsFieldsConsumer(); LUCENE_CLASS(FormatPostingsFieldsConsumer); public: /// Add a new field. virtual FormatPostingsTermsConsumerPtr addField(const FieldInfoPtr& field) = 0; /// Called when we are done adding everything. virtual void finish() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FormatPostingsFieldsWriter.h000066400000000000000000000023421456444476200260310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATPOSTINGSFIELDSWRITER_H #define FORMATPOSTINGSFIELDSWRITER_H #include "FormatPostingsFieldsConsumer.h" namespace Lucene { class FormatPostingsFieldsWriter : public FormatPostingsFieldsConsumer { public: FormatPostingsFieldsWriter(const SegmentWriteStatePtr& state, const FieldInfosPtr& fieldInfos); virtual ~FormatPostingsFieldsWriter(); LUCENE_CLASS(FormatPostingsFieldsWriter); public: DirectoryPtr dir; String segment; TermInfosWriterPtr termsOut; SegmentWriteStatePtr state; FieldInfosPtr fieldInfos; FormatPostingsTermsWriterPtr termsWriter; DefaultSkipListWriterPtr skipListWriter; int32_t totalNumDocs; public: virtual void initialize(); /// Add a new field. virtual FormatPostingsTermsConsumerPtr addField(const FieldInfoPtr& field); /// Called when we are done adding everything. virtual void finish(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FormatPostingsPositionsConsumer.h000066400000000000000000000017251456444476200271350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATPOSTINGSPOSITIONSCONSUMER_H #define FORMATPOSTINGSPOSITIONSCONSUMER_H #include "LuceneObject.h" namespace Lucene { class FormatPostingsPositionsConsumer : public LuceneObject { public: virtual ~FormatPostingsPositionsConsumer(); LUCENE_CLASS(FormatPostingsPositionsConsumer); public: /// Add a new position & payload. If payloadLength > 0 you must read those bytes from the IndexInput. virtual void addPosition(int32_t position, ByteArray payload, int32_t payloadOffset, int32_t payloadLength) = 0; /// Called when we are done adding positions & payloads. virtual void finish() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FormatPostingsPositionsWriter.h000066400000000000000000000024351456444476200266150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATPOSTINGSPOSITIONSWRITER_H #define FORMATPOSTINGSPOSITIONSWRITER_H #include "FormatPostingsPositionsConsumer.h" namespace Lucene { class FormatPostingsPositionsWriter : public FormatPostingsPositionsConsumer { public: FormatPostingsPositionsWriter(const SegmentWriteStatePtr& state, const FormatPostingsDocsWriterPtr& parent); virtual ~FormatPostingsPositionsWriter(); LUCENE_CLASS(FormatPostingsPositionsWriter); public: FormatPostingsDocsWriterWeakPtr _parent; IndexOutputPtr out; bool omitTermFreqAndPositions; bool storePayloads; int32_t lastPayloadLength; int32_t lastPosition; public: /// Add a new position & payload virtual void addPosition(int32_t position, ByteArray payload, int32_t payloadOffset, int32_t payloadLength); void setField(const FieldInfoPtr& fieldInfo); /// Called when we are done adding positions & payloads virtual void finish(); void close(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FormatPostingsTermsConsumer.h000066400000000000000000000017051456444476200262360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATPOSTINGSTERMSCONSUMER_H #define FORMATPOSTINGSTERMSCONSUMER_H #include "LuceneObject.h" namespace Lucene { class FormatPostingsTermsConsumer : public LuceneObject { public: virtual ~FormatPostingsTermsConsumer(); LUCENE_CLASS(FormatPostingsTermsConsumer); public: CharArray termBuffer; public: /// Adds a new term in this field virtual FormatPostingsDocsConsumerPtr addTerm(CharArray text, int32_t start) = 0; virtual FormatPostingsDocsConsumerPtr addTerm(const String& text); /// Called when we are done adding terms to this field virtual void finish() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FormatPostingsTermsWriter.h000066400000000000000000000025461456444476200257230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATPOSTINGSTERMSWRITER_H #define FORMATPOSTINGSTERMSWRITER_H #include "FormatPostingsTermsConsumer.h" namespace Lucene { class FormatPostingsTermsWriter : public FormatPostingsTermsConsumer { public: FormatPostingsTermsWriter(const SegmentWriteStatePtr& state, const FormatPostingsFieldsWriterPtr& parent); virtual ~FormatPostingsTermsWriter(); LUCENE_CLASS(FormatPostingsTermsWriter); public: FormatPostingsFieldsWriterWeakPtr _parent; SegmentWriteStatePtr state; FormatPostingsDocsWriterPtr docsWriter; TermInfosWriterPtr termsOut; FieldInfoPtr fieldInfo; CharArray currentTerm; int32_t currentTermStart; int64_t freqStart; int64_t proxStart; public: virtual void initialize(); void setField(const FieldInfoPtr& fieldInfo); /// Adds a new term in this field virtual FormatPostingsDocsConsumerPtr addTerm(CharArray text, int32_t start); /// Called when we are done adding terms to this field virtual void finish(); void close(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FreqProxFieldMergeState.h000066400000000000000000000022771456444476200252300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FREQPROXFIELDMERGESTATE_H #define FREQPROXFIELDMERGESTATE_H #include "LuceneObject.h" namespace Lucene { /// Used by DocumentsWriter to merge the postings from multiple ThreadStates when creating a segment class FreqProxFieldMergeState : public LuceneObject { public: FreqProxFieldMergeState(const FreqProxTermsWriterPerFieldPtr& field); virtual ~FreqProxFieldMergeState(); LUCENE_CLASS(FreqProxFieldMergeState); public: FreqProxTermsWriterPerFieldPtr field; int32_t numPostings; CharBlockPoolPtr charPool; Collection postings; FreqProxTermsWriterPostingListPtr p; CharArray text; int32_t textOffset; ByteSliceReaderPtr freq; ByteSliceReaderPtr prox; int32_t docID; int32_t termFreq; protected: int32_t postingUpto; public: bool nextTerm(); bool nextDoc(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FreqProxTermsWriter.h000066400000000000000000000037171456444476200245130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FREQPROXTERMSWRITER_H #define FREQPROXTERMSWRITER_H #include "TermsHashConsumer.h" #include "RawPostingList.h" namespace Lucene { class FreqProxTermsWriter : public TermsHashConsumer { public: virtual ~FreqProxTermsWriter(); LUCENE_CLASS(FreqProxTermsWriter); protected: ByteArray payloadBuffer; public: virtual TermsHashConsumerPerThreadPtr addThread(const TermsHashPerThreadPtr& perThread); virtual void createPostings(Collection postings, int32_t start, int32_t count); virtual void closeDocStore(const SegmentWriteStatePtr& state); virtual void abort(); virtual void flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state); /// Walk through all unique text tokens (Posting instances) found in this field and serialize them /// into a single RAM segment. void appendPostings(Collection fields, const FormatPostingsFieldsConsumerPtr& consumer); virtual int32_t bytesPerPosting(); protected: static int32_t compareText(const wchar_t* text1, int32_t pos1, const wchar_t* text2, int32_t pos2); }; class FreqProxTermsWriterPostingList : public RawPostingList { public: FreqProxTermsWriterPostingList(); virtual ~FreqProxTermsWriterPostingList(); LUCENE_CLASS(FreqProxTermsWriterPostingList); public: int32_t docFreq; // # times this term occurs in the current doc int32_t lastDocID; // Last docID where this term occurred int32_t lastDocCode; // Code for prior doc int32_t lastPosition; // Last position where this term occurred }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FreqProxTermsWriterPerField.h000066400000000000000000000031521456444476200261170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FREQPROXTERMSWRITERPERFIELD_H #define FREQPROXTERMSWRITERPERFIELD_H #include "TermsHashConsumerPerField.h" namespace Lucene { class FreqProxTermsWriterPerField : public TermsHashConsumerPerField { public: FreqProxTermsWriterPerField(const TermsHashPerFieldPtr& termsHashPerField, const FreqProxTermsWriterPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo); virtual ~FreqProxTermsWriterPerField(); LUCENE_CLASS(FreqProxTermsWriterPerField); public: FreqProxTermsWriterPerThreadWeakPtr _perThread; TermsHashPerFieldWeakPtr _termsHashPerField; FieldInfoPtr fieldInfo; DocStatePtr docState; FieldInvertStatePtr fieldState; bool omitTermFreqAndPositions; PayloadAttributePtr payloadAttribute; bool hasPayloads; public: virtual int32_t getStreamCount(); virtual void finish(); virtual void skippingLongTerm(); virtual int32_t compareTo(const LuceneObjectPtr& other); void reset(); virtual bool start(Collection fields, int32_t count); virtual void start(const FieldablePtr& field); void writeProx(const FreqProxTermsWriterPostingListPtr& p, int32_t proxCode); virtual void newTerm(const RawPostingListPtr& p); virtual void addTerm(const RawPostingListPtr& p); void abort(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FreqProxTermsWriterPerThread.h000066400000000000000000000020551456444476200263040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FREQPROXTERMSWRITERPERTHREAD_H #define FREQPROXTERMSWRITERPERTHREAD_H #include "TermsHashConsumerPerThread.h" namespace Lucene { class FreqProxTermsWriterPerThread : public TermsHashConsumerPerThread { public: FreqProxTermsWriterPerThread(const TermsHashPerThreadPtr& perThread); virtual ~FreqProxTermsWriterPerThread(); LUCENE_CLASS(FreqProxTermsWriterPerThread); public: TermsHashPerThreadWeakPtr _termsHashPerThread; DocStatePtr docState; public: virtual TermsHashConsumerPerFieldPtr addField(const TermsHashPerFieldPtr& termsHashPerField, const FieldInfoPtr& fieldInfo); virtual void startDocument(); virtual DocWriterPtr finishDocument(); virtual void abort(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FuzzyQuery.h000066400000000000000000000056071456444476200227120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FUZZYQUERY_H #define FUZZYQUERY_H #include "MultiTermQuery.h" namespace Lucene { /// Implements the fuzzy search query. The similarity measurement is based on the Levenshtein (edit /// distance) algorithm. /// /// Warning: this query is not very scalable with its default prefix length of 0 - in this case, *every* /// term will be enumerated and cause an edit score calculation. class LPPAPI FuzzyQuery : public MultiTermQuery { public: /// Create a new FuzzyQuery that will match terms with a similarity of at least minimumSimilarity /// to term. If a prefixLength > 0 is specified, a common prefix of that length is also required. /// @param term The term to search for /// @param minimumSimilarity A value between 0 and 1 to set the required similarity between the query /// term and the matching terms. For example, for a minimumSimilarity of 0.5 a term of the same /// length as the query term is considered similar to the query term if the edit distance between /// both terms is less than length(term) * 0.5 /// @param prefixLength Length of common (non-fuzzy) prefix FuzzyQuery(const TermPtr& term, double minimumSimilarity, int32_t prefixLength); FuzzyQuery(const TermPtr& term, double minimumSimilarity); FuzzyQuery(const TermPtr& term); virtual ~FuzzyQuery(); LUCENE_CLASS(FuzzyQuery); protected: double minimumSimilarity; int32_t prefixLength; bool termLongEnough; TermPtr term; public: static double defaultMinSimilarity(); static const int32_t defaultPrefixLength; public: using MultiTermQuery::toString; /// Returns the minimum similarity that is required for this query to match. /// @return float value between 0.0 and 1.0 double getMinSimilarity(); /// Returns the non-fuzzy prefix length. This is the number of characters at the start of a term that /// must be identical (not fuzzy) to the query term if the query is to match that term. int32_t getPrefixLength(); /// Returns the pattern term. TermPtr getTerm(); virtual void setRewriteMethod(const RewriteMethodPtr& method); virtual QueryPtr rewrite(const IndexReaderPtr& reader); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual String toString(const String& field); virtual int32_t hashCode(); virtual bool equals(const LuceneObjectPtr& other); protected: void ConstructQuery(const TermPtr& term, double minimumSimilarity, int32_t prefixLength); virtual FilteredTermEnumPtr getEnum(const IndexReaderPtr& reader); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/FuzzyTermEnum.h000066400000000000000000000114471456444476200233400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FUZZYTERMENUM_H #define FUZZYTERMENUM_H #include "FilteredTermEnum.h" namespace Lucene { /// Subclass of FilteredTermEnum for enumerating all terms that are similar to the specified filter term. /// /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater /// than all that precede it. class LPPAPI FuzzyTermEnum : public FilteredTermEnum { public: /// Constructor for enumeration of all terms from specified reader which share a prefix of length /// prefixLength with term and which have a fuzzy similarity > minSimilarity. /// /// After calling the constructor the enumeration is already pointing to the first valid term if /// such a term exists. /// @param reader Delivers terms. /// @param term Pattern term. /// @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5. /// @param prefixLength Length of required common prefix. Default value is 0. FuzzyTermEnum(const IndexReaderPtr& reader, const TermPtr& term, double minSimilarity, int32_t prefixLength); FuzzyTermEnum(const IndexReaderPtr& reader, const TermPtr& term, double minSimilarity); FuzzyTermEnum(const IndexReaderPtr& reader, const TermPtr& term); virtual ~FuzzyTermEnum(); LUCENE_CLASS(FuzzyTermEnum); protected: /// Allows us save time required to create a new array every time similarity is called. Collection p; Collection d; double _similarity; bool _endEnum; TermPtr searchTerm; String field; String text; String prefix; double minimumSimilarity; double scale_factor; public: virtual double difference(); virtual bool endEnum(); virtual void close(); protected: void ConstructTermEnum(const IndexReaderPtr& reader, const TermPtr& term, double minSimilarity, int32_t prefixLength); /// The termCompare method in FuzzyTermEnum uses Levenshtein distance to calculate the distance between /// the given term and the comparing term. virtual bool termCompare(const TermPtr& term); /// /// Compute Levenshtein distance /// /// Similarity returns a number that is 1.0f or less (including negative numbers) based on how similar the /// Term is compared to a target term. It returns exactly 0.0 when ///
    /// editDistance > maximumEditDistance
    /// 
/// /// Otherwise it returns: ///
    /// 1 - (editDistance / length)
    /// 
/// where length is the length of the shortest term (text or target) including a prefix that are identical /// and editDistance is the Levenshtein distance for the two words. /// /// Embedded within this algorithm is a fail-fast Levenshtein distance algorithm. The fail-fast algorithm /// differs from the standard Levenshtein distance algorithm in that it is aborted if it is discovered that /// the minimum distance between the words is greater than some threshold. /// /// To calculate the maximum distance threshold we use the following formula: ///
    /// (1 - minimumSimilarity) * length
    /// 
/// where length is the shortest term including any prefix that is not part of the similarity comparison. /// This formula was derived by solving for what maximum value of distance returns false for the following /// statements: ///
    /// similarity = 1 - ((double)distance / (double)(prefixLength + std::min(textlen, targetlen)));
    /// return (similarity > minimumSimilarity);
    /// 
/// where distance is the Levenshtein distance for the two words. /// /// Levenshtein distance (also known as edit distance) is a measure of similarity between two strings where /// the distance is measured as the number of character deletions, insertions or substitutions required to /// transform one string to the other string. /// /// @param target The target word or phrase. /// @return the similarity, 0.0 or less indicates that it matches less than the required threshold and 1.0 /// indicates that the text and target are identical. double similarity(const String& target); /// The max Distance is the maximum Levenshtein distance for the text compared to some other value that /// results in score that is better than the minimum similarity. /// @param m The length of the "other value" /// @return The maximum Levenshtein distance that we care about int32_t calculateMaxDistance(int32_t m); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/HashMap.h000066400000000000000000000105621456444476200220520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef HASHMAP_H #define HASHMAP_H #include #include "LuceneSync.h" namespace Lucene { /// Utility template class to handle hash maps that can be safely copied and shared template < class KEY, class VALUE, class HASH = boost::hash, class EQUAL = std::equal_to > class HashMap : public LuceneSync { public: typedef HashMap this_type; typedef std::pair key_value; typedef boost::unordered_map map_type; typedef typename map_type::iterator iterator; typedef typename map_type::const_iterator const_iterator; typedef KEY key_type; typedef VALUE value_type; virtual ~HashMap() { } protected: boost::shared_ptr mapContainer; public: static this_type newInstance() { this_type instance; instance.mapContainer = Lucene::newInstance(); return instance; } void reset() { mapContainer.reset(); } int32_t size() const { return (int32_t)mapContainer->size(); } bool empty() const { return mapContainer->empty(); } void clear() { mapContainer->clear(); } iterator begin() { return mapContainer->begin(); } iterator end() { return mapContainer->end(); } const_iterator begin() const { return mapContainer->begin(); } const_iterator end() const { return mapContainer->end(); } operator bool() const { return mapContainer.get() != NULL; } bool operator! () const { return !mapContainer; } map_type& operator= (const map_type& other) { mapContainer = other.mapContainer; return *this; } void put(const KEY& key, const VALUE& value) { (*mapContainer)[key] = value; } template void putAll(ITER first, ITER last) { for (iterator current = first; current != last; ++current) { (*mapContainer)[current->first] = current->second; } } template void remove(ITER pos) { mapContainer->erase(pos); } template ITER remove(ITER first, ITER last) { return mapContainer->erase(first, last); } bool remove(const KEY& key) { return (mapContainer->erase(key) > 0); } iterator find(const KEY& key) { return mapContainer->find(key); } VALUE get(const KEY& key) const { iterator findValue = mapContainer->find(key); return findValue == mapContainer->end() ? VALUE() : findValue->second; } bool contains(const KEY& key) const { return (mapContainer->find(key) != mapContainer->end()); } VALUE& operator[] (const KEY& key) { return (*mapContainer)[key]; } }; /// Utility template class to handle weak keyed maps template < class KEY, class VALUE, class HASH = boost::hash, class EQUAL = std::equal_to > class WeakHashMap : public HashMap { public: typedef WeakHashMap this_type; typedef std::pair key_value; typedef typename boost::unordered_map map_type; typedef typename map_type::iterator iterator; static this_type newInstance() { this_type instance; instance.mapContainer = Lucene::newInstance(); return instance; } void removeWeak() { if (!this->mapContainer || this->mapContainer->empty()) { return; } map_type clearCopy; for (iterator key = this->mapContainer->begin(); key != this->mapContainer->end(); ++key) { if (!key->first.expired()) { clearCopy.insert(*key); } } this->mapContainer->swap(clearCopy); } VALUE get(const KEY& key) { iterator findValue = this->mapContainer->find(key); if (findValue != this->mapContainer->end()) { return findValue->second; } removeWeak(); return VALUE(); } }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/HashSet.h000066400000000000000000000053001456444476200220620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef HASHSET_H #define HASHSET_H #include #include "LuceneSync.h" namespace Lucene { /// Utility template class to handle hash set collections that can be safely copied and shared template < class TYPE, class HASH = boost::hash, class EQUAL = std::equal_to > class HashSet : public LuceneSync { public: typedef HashSet this_type; typedef boost::unordered_set set_type; typedef typename set_type::iterator iterator; typedef typename set_type::const_iterator const_iterator; typedef TYPE value_type; virtual ~HashSet() { } protected: boost::shared_ptr setContainer; public: static this_type newInstance() { this_type instance; instance.setContainer = Lucene::newInstance(); return instance; } template static this_type newInstance(ITER first, ITER last) { this_type instance; instance.setContainer = Lucene::newInstance(first, last); return instance; } void reset() { setContainer.reset(); } int32_t size() const { return (int32_t)setContainer->size(); } bool empty() const { return setContainer->empty(); } void clear() { setContainer->clear(); } iterator begin() { return setContainer->begin(); } iterator end() { return setContainer->end(); } const_iterator begin() const { return setContainer->begin(); } const_iterator end() const { return setContainer->end(); } operator bool() const { return setContainer.get() != NULL; } bool operator! () const { return !setContainer; } set_type& operator= (const set_type& other) { setContainer = other.setContainer; return *this; } bool add(const TYPE& type) { return setContainer->insert(type).second; } template void addAll(ITER first, ITER last) { setContainer->insert(first, last); } bool remove(const TYPE& type) { return (setContainer->erase(type) > 0); } iterator find(const TYPE& type) { return setContainer->find(type); } bool contains(const TYPE& type) const { return (setContainer->find(type) != setContainer->end()); } }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/HitQueue.h000066400000000000000000000015361456444476200222630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef HITQUEUE_H #define HITQUEUE_H #include "HitQueueBase.h" namespace Lucene { class LPPAPI HitQueue : public HitQueueBase { public: /// Creates a new instance with size elements. HitQueue(int32_t size, bool prePopulate); virtual ~HitQueue(); LUCENE_CLASS(HitQueue); protected: bool prePopulate; protected: virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second); /// Returns null if prePopulate is false. virtual ScoreDocPtr getSentinelObject(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/HitQueueBase.h000066400000000000000000000031421456444476200230510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef HITQUEUEBASE_H #define HITQUEUEBASE_H #include "PriorityQueue.h" namespace Lucene { class LPPAPI HitQueueBase : public LuceneObject { public: HitQueueBase(int32_t size); virtual ~HitQueueBase(); LUCENE_CLASS(HitQueueBase); public: virtual ScoreDocPtr add(const ScoreDocPtr& scoreDoc); virtual ScoreDocPtr addOverflow(const ScoreDocPtr& scoreDoc); virtual ScoreDocPtr top(); virtual ScoreDocPtr pop(); virtual ScoreDocPtr updateTop(); virtual int32_t size(); virtual bool empty(); virtual void clear(); protected: PriorityQueueScoreDocsPtr queue; int32_t queueSize; public: virtual void initialize(); protected: virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) = 0; virtual ScoreDocPtr getSentinelObject(); friend class PriorityQueueScoreDocs; }; class LPPAPI PriorityQueueScoreDocs : public PriorityQueue { public: PriorityQueueScoreDocs(const HitQueueBasePtr& hitQueue, int32_t size); virtual ~PriorityQueueScoreDocs(); LUCENE_CLASS(PriorityQueueScoreDocs); protected: HitQueueBaseWeakPtr _hitQueue; protected: virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second); virtual ScoreDocPtr getSentinelObject(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ISOLatin1AccentFilter.h000066400000000000000000000025541456444476200245220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ISOLATIN1ACCENTFILTER_H #define ISOLATIN1ACCENTFILTER_H #include "TokenFilter.h" namespace Lucene { /// A filter that replaces accented characters in the ISO Latin 1 character set (ISO-8859-1) by their unaccented /// equivalent. The case will not be altered. /// /// For instance, 'à' will be replaced by 'a'. /// /// @deprecated If you build a new index, use {@link ASCIIFoldingFilter} which covers a superset of Latin 1. /// This class is included for use with existing indexes and will be removed in a future release (possibly Lucene 4.0). class LPPAPI ISOLatin1AccentFilter : public TokenFilter { public: ISOLatin1AccentFilter(const TokenStreamPtr& input); virtual ~ISOLatin1AccentFilter(); LUCENE_CLASS(ISOLatin1AccentFilter); protected: CharArray output; int32_t outputPos; TermAttributePtr termAtt; public: virtual bool incrementToken(); /// To replace accented characters in a String by unaccented equivalents. void removeAccents(const wchar_t* input, int32_t length); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/IndexCommit.h000066400000000000000000000060031456444476200227440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXCOMMIT_H #define INDEXCOMMIT_H #include "LuceneObject.h" namespace Lucene { /// Represents a single commit into an index as seen by the {@link IndexDeletionPolicy} or {@link IndexReader}. /// /// Changes to the content of an index are made visible only after the writer who made that change commits by /// writing a new segments file (segments_N). This point in time, when the action of writing of a new segments /// file to the directory is completed, is an index commit. /// /// Each index commit point has a unique segments file associated with it. The segments file associated with a /// later index commit point would have a larger N. class LPPAPI IndexCommit : public LuceneObject { public: virtual ~IndexCommit(); LUCENE_CLASS(IndexCommit); public: /// Get the segments file (segments_N) associated with this commit point. virtual String getSegmentsFileName() = 0; /// Returns all index files referenced by this commit point. virtual HashSet getFileNames() = 0; /// Returns the {@link Directory} for the index. virtual DirectoryPtr getDirectory() = 0; /// Delete this commit point. This only applies when using the commit point in the context of IndexWriter's /// IndexDeletionPolicy. /// /// Upon calling this, the writer is notified that this commit point should be deleted. /// /// Decision that a commit-point should be deleted is taken by the {@link IndexDeletionPolicy} in effect /// and therefore this should only be called by its {@link IndexDeletionPolicy#onInit onInit()} or /// {@link IndexDeletionPolicy#onCommit onCommit()} methods. virtual void deleteCommit() = 0; virtual bool isDeleted() = 0; /// Returns true if this commit is an optimized index. virtual bool isOptimized() = 0; /// Two IndexCommits are equal if both their Directory and versions are equal. virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); /// Returns the version for this IndexCommit. This is the same value that {@link IndexReader#getVersion} /// would return if it were opened on this commit. virtual int64_t getVersion() = 0; /// Returns the generation (the _N in segments_N) for this IndexCommit. virtual int64_t getGeneration() = 0; /// Convenience method that returns the last modified time of the segments_N file corresponding to this /// index commit, equivalent to getDirectory()->fileModified(getSegmentsFileName()). virtual int64_t getTimestamp(); /// Returns userData, previously passed to {@link IndexWriter#commit(Map)} for this commit. Map is /// String -> String. virtual MapStringString getUserData() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/IndexDeletionPolicy.h000066400000000000000000000067371456444476200244550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXDELETIONPOLICY_H #define INDEXDELETIONPOLICY_H #include "LuceneObject.h" namespace Lucene { /// Policy for deletion of stale {@link IndexCommit index commits}. /// Implement this interface, and pass /// it to one of the {@link IndexWriter} or {@link IndexReader} constructors, to customize when older /// {@link IndexCommit point-in-time commits} are deleted from the index directory. The default deletion /// policy is {@link KeepOnlyLastCommitDeletionPolicy}, which always removes old commits as soon as a new /// commit is done (this matches the behavior before 2.2). /// /// One expected use case for this (and the reason why it was first created) is to work around problems /// with an index directory accessed via filesystems like NFS because NFS does not provide the "delete on /// last close" semantics that Lucene's "point in time" search normally relies on. By implementing a /// custom deletion policy, such as "a commit is only removed once it has been stale for more than X /// minutes", you can give your readers time to refresh to the new commit before {@link IndexWriter} /// removes the old commits. Note that doing so will increase the storage requirements of the index. class LPPAPI IndexDeletionPolicy : public LuceneObject { protected: IndexDeletionPolicy(); public: virtual ~IndexDeletionPolicy(); LUCENE_CLASS(IndexDeletionPolicy); public: /// This is called once when a writer is first instantiated to give the policy a chance to remove old /// commit points. /// /// The writer locates all index commits present in the index directory and calls this method. The /// policy may choose to delete some of the commit points, doing so by calling method {@link /// IndexCommit#delete delete()} of {@link IndexCommit}. /// /// Note: the last CommitPoint is the most recent one, ie. the "front index state". Be careful not to /// delete it, unless you know for sure what you are doing, and unless you can afford to lose the /// index content while doing that. /// /// @param commits List of current {@link IndexCommit point-in-time commits}, sorted by age (the 0th /// one is the oldest commit). virtual void onInit(Collection commits) = 0; /// This is called each time the writer completed a commit. This gives the policy a chance to remove /// old commit points with each commit. /// /// The policy may now choose to delete old commit points by calling method {@link /// IndexCommit#delete delete()} of {@link IndexCommit}. /// /// This method is only called when {@link IndexWriter#commit} or {@link IndexWriter#close} is called, /// or possibly not at all if the {@link IndexWriter#rollback} is called. /// /// Note: the last CommitPoint is the most recent one, ie. the "front index state". Be careful not to /// delete it, unless you know for sure what you are doing, and unless you can afford to lose the /// index content while doing that. /// /// @param commits List of {@link IndexCommit}, sorted by age (the 0th one is the oldest commit). virtual void onCommit(Collection commits) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/IndexFileDeleter.h000066400000000000000000000174651456444476200237160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXFILEDELETER_H #define INDEXFILEDELETER_H #include "IndexCommit.h" namespace Lucene { /// This class keeps track of each SegmentInfos instance that is still "live", either because it corresponds to a /// segments_N file in the Directory (a "commit", ie. a committed SegmentInfos) or because it's an in-memory /// SegmentInfos that a writer is actively updating but has not yet committed. This class uses simple reference /// counting to map the live SegmentInfos instances to individual files in the Directory. /// /// The same directory file may be referenced by more than one IndexCommit, i.e. more than one SegmentInfos. /// Therefore we count how many commits reference each file. When all the commits referencing a certain file have /// been deleted, the refcount for that file becomes zero, and the file is deleted. /// /// A separate deletion policy interface (IndexDeletionPolicy) is consulted on creation (onInit) and once per /// commit (onCommit), to decide when a commit should be removed. /// /// It is the business of the IndexDeletionPolicy to choose when to delete commit points. The actual mechanics of /// file deletion, retrying, etc, derived from the deletion of commit points is the business of the IndexFileDeleter. /// /// The current default deletion policy is {@link KeepOnlyLastCommitDeletionPolicy}, which removes all prior commits /// when a new commit has completed. This matches the behavior before 2.2. /// /// Note that you must hold the write.lock before instantiating this class. It opens segments_N file(s) directly /// with no retry logic. class LPPAPI IndexFileDeleter : public LuceneObject { public: /// Initialize the deleter: find all previous commits in the Directory, incref the files they reference, call /// the policy to let it delete commits. This will remove any files not referenced by any of the commits. IndexFileDeleter(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& policy, const SegmentInfosPtr& segmentInfos, const InfoStreamPtr& infoStream, const DocumentsWriterPtr& docWriter, HashSet synced); virtual ~IndexFileDeleter(); LUCENE_CLASS(IndexFileDeleter); protected: /// Files that we tried to delete but failed (likely because they are open and we are running on Windows), /// so we will retry them again later HashSet deletable; /// Reference count for all files in the index. Counts how many existing commits reference a file. MapStringRefCount refCounts; /// Holds all commits (segments_N) currently in the index. This will have just 1 commit if you are using the /// default delete policy (KeepOnlyLastCommitDeletionPolicy). Other policies may leave commit points live for /// longer in which case this list would be longer than 1 Collection commits; /// Holds files we had incref'd from the previous non-commit checkpoint Collection< HashSet > lastFiles; /// Commits that the IndexDeletionPolicy have decided to delete Collection commitsToDelete; InfoStreamPtr infoStream; DirectoryPtr directory; IndexDeletionPolicyPtr policy; DocumentsWriterPtr docWriter; SegmentInfosPtr lastSegmentInfos; HashSet synced; /// Change to true to see details of reference counts when infoStream != null static bool VERBOSE_REF_COUNTS; public: bool startingCommitDeleted; protected: void message(const String& message); /// Remove the CommitPoints in the commitsToDelete List by DecRef'ing all files from each SegmentInfos. void deleteCommits(); void deletePendingFiles(); RefCountPtr getRefCount(const String& fileName); public: void setInfoStream(const InfoStreamPtr& infoStream); SegmentInfosPtr getLastSegmentInfos(); /// Writer calls this when it has hit an error and had to roll back, to tell us that there may now be /// unreferenced files in the filesystem. So we re-list the filesystem and delete such files. If /// segmentName is non-null, we will only delete files corresponding to that segment. void refresh(const String& segmentName); void refresh(); void close(); /// For definition of "check point" see IndexWriter comments: "Clarification: Check Points (and commits)". /// Writer calls this when it has made a "consistent change" to the index, meaning new files are written to /// the index and the in-memory SegmentInfos have been modified to point to those files. /// /// This may or may not be a commit (segments_N may or may not have been written). /// /// We simply incref the files referenced by the new SegmentInfos and decref the files we had previously /// seen (if any). /// /// If this is a commit, we also call the policy to give it a chance to remove other commits. If any /// commits are removed, we decref their files as well. void checkpoint(const SegmentInfosPtr& segmentInfos, bool isCommit); void incRef(const SegmentInfosPtr& segmentInfos, bool isCommit); void incRef(HashSet files); void incRef(const String& fileName); void decRef(HashSet files); void decRef(const String& fileName); void decRef(const SegmentInfosPtr& segmentInfos); bool exists(const String& fileName); void deleteFiles(HashSet files); /// Deletes the specified files, but only if they are new (have not yet been incref'd). void deleteNewFiles(HashSet files); void deleteFile(const String& fileName); }; /// Tracks the reference count for a single index file class RefCount : public LuceneObject { public: RefCount(const String& fileName); virtual ~RefCount(); LUCENE_CLASS(RefCount); public: String fileName; // fileName used only for better assert error messages bool initDone; int32_t count; public: int32_t IncRef(); int32_t DecRef(); }; /// Holds details for each commit point. This class is also passed to the deletion policy. Note: this class /// has a natural ordering that is inconsistent with equals. class CommitPoint : public IndexCommit { public: CommitPoint(Collection commitsToDelete, const DirectoryPtr& directory, const SegmentInfosPtr& segmentInfos); virtual ~CommitPoint(); LUCENE_CLASS(CommitPoint); public: int64_t gen; HashSet files; String segmentsFileName; bool deleted; DirectoryPtr directory; Collection commitsToDelete; int64_t version; int64_t generation; bool _isOptimized; MapStringString userData; public: virtual String toString(); /// Returns true if this commit is an optimized index. virtual bool isOptimized(); /// Get the segments file (segments_N) associated with this commit point. virtual String getSegmentsFileName(); /// Returns all index files referenced by this commit point. virtual HashSet getFileNames(); /// Returns the {@link Directory} for the index. virtual DirectoryPtr getDirectory(); /// Returns the version for this IndexCommit. virtual int64_t getVersion(); /// Returns the generation (the _N in segments_N) for this IndexCommit. virtual int64_t getGeneration(); /// Returns userData, previously passed to {@link IndexWriter#commit(Map)} for this commit. virtual MapStringString getUserData(); /// Called only be the deletion policy, to remove this commit point from the index. virtual void deleteCommit(); virtual bool isDeleted(); virtual int32_t compareTo(const LuceneObjectPtr& other); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/IndexFileNameFilter.h000066400000000000000000000021251456444476200243430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXFILENAMEFILTER_H #define INDEXFILENAMEFILTER_H #include "LuceneObject.h" namespace Lucene { /// Filename filter that accept filenames and extensions only created by Lucene. class LPPAPI IndexFileNameFilter : public LuceneObject { public: /// Returns true if this is a file known to be a Lucene index file. static bool accept(const String& directory, const String& name); /// Returns true if this is a file that would be contained in a CFS file. /// This function should only be called on files that pass the /// {@link #accept} (ie, are already known to be a Lucene index file). static bool isCFSFile(const String& name); /// Return singleton IndexFileNameFilter static IndexFileNameFilterPtr getFilter(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/IndexFileNames.h000066400000000000000000000076701456444476200233720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXFILENAMES_H #define INDEXFILENAMES_H #include "LuceneObject.h" namespace Lucene { /// Constants representing filenames and extensions used by Lucene. class LPPAPI IndexFileNames : public LuceneObject { public: virtual ~IndexFileNames(); LUCENE_CLASS(IndexFileNames); public: /// Name of the index segment file. static const String& SEGMENTS(); /// Name of the generation reference file name. static const String& SEGMENTS_GEN(); /// Name of the index deletable file (only used in pre-lockless indices). static const String& DELETABLE(); /// Extension of norms file. static const String& NORMS_EXTENSION(); /// Extension of freq postings file. static const String& FREQ_EXTENSION(); /// Extension of prox postings file. static const String& PROX_EXTENSION(); /// Extension of terms file. static const String& TERMS_EXTENSION(); /// Extension of terms index file. static const String& TERMS_INDEX_EXTENSION(); /// Extension of stored fields index file. static const String& FIELDS_INDEX_EXTENSION(); /// Extension of stored fields file. static const String& FIELDS_EXTENSION(); /// Extension of vectors fields file. static const String& VECTORS_FIELDS_EXTENSION(); /// Extension of vectors documents file. static const String& VECTORS_DOCUMENTS_EXTENSION(); /// Extension of vectors index file. static const String& VECTORS_INDEX_EXTENSION(); /// Extension of compound file. static const String& COMPOUND_FILE_EXTENSION(); /// Extension of compound file for doc store files. static const String& COMPOUND_FILE_STORE_EXTENSION(); /// Extension of deletes. static const String& DELETES_EXTENSION(); /// Extension of field infos. static const String& FIELD_INFOS_EXTENSION(); /// Extension of plain norms. static const String& PLAIN_NORMS_EXTENSION(); /// Extension of separate norms. static const String& SEPARATE_NORMS_EXTENSION(); /// Extension of gen file. static const String& GEN_EXTENSION(); /// This array contains all filename extensions used by Lucene's index /// files, with two exceptions, namely the extension made up from /// ".f" + number and from ".s" + number. Also note that Lucene's /// "segments_N" files do not have any filename extension. static const HashSet INDEX_EXTENSIONS(); /// File extensions that are added to a compound file (same as /// {@link #INDEX_EXTENSIONS}, minus "del", "gen", "cfs"). static const HashSet INDEX_EXTENSIONS_IN_COMPOUND_FILE(); static const HashSet STORE_INDEX_EXTENSIONS(); static const HashSet NON_STORE_INDEX_EXTENSIONS(); /// File extensions of old-style index files. static const HashSet COMPOUND_EXTENSIONS(); /// File extensions for term vector support. static const HashSet VECTOR_EXTENSIONS(); /// Computes the full file name from base, extension and generation. /// If the generation is {@link SegmentInfo#NO}, the file name is null. /// If it's {@link SegmentInfo#WITHOUT_GEN} the file name is base+extension. /// If it's > 0, the file name is base_generation+extension. static String fileNameFromGeneration(const String& base, const String& extension, int64_t gen); /// Returns true if the provided filename is one of the doc store files /// (ends with an extension in STORE_INDEX_EXTENSIONS). static bool isDocStoreFile(const String& fileName); /// Return segment file name. static String segmentFileName(const String& segmentName, const String& ext); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/IndexInput.h000066400000000000000000000121321456444476200226130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXINPUT_H #define INDEXINPUT_H #include "LuceneObject.h" namespace Lucene { /// Abstract base class for input from a file in a {@link Directory}. /// A random-access input stream. Used for all Lucene index input operations. /// @see Directory class LPPAPI IndexInput : public LuceneObject { public: IndexInput(); virtual ~IndexInput(); LUCENE_CLASS(IndexInput); protected: bool preUTF8Strings; // true if we are reading old (modified UTF8) string format public: /// Reads and returns a single byte. /// @see IndexOutput#writeByte(uint8_t) virtual uint8_t readByte() = 0; /// Reads a specified number of bytes into an array at the specified offset. /// @param b the array to read bytes into. /// @param offset the offset in the array to start storing bytes. /// @param length the number of bytes to read. /// @see IndexOutput#writeBytes(const uint8_t*, int) virtual void readBytes(uint8_t* b, int32_t offset, int32_t length) = 0; /// Reads a specified number of bytes into an array at the specified offset /// with control over whether the read should be buffered (callers who have /// their own buffer should pass in "false" for useBuffer). Currently only /// {@link BufferedIndexInput} respects this parameter. /// @param b the array to read bytes into. /// @param offset the offset in the array to start storing bytes. /// @param length the number of bytes to read. /// @param useBuffer set to false if the caller will handle buffering. /// @see IndexOutput#writeBytes(const uint8_t*,int) virtual void readBytes(uint8_t* b, int32_t offset, int32_t length, bool useBuffer); /// Reads four bytes and returns an int. /// @see IndexOutput#writeInt(int32_t) virtual int32_t readInt(); /// Reads an int stored in variable-length format. Reads between one and five /// bytes. Smaller values take fewer bytes. Negative numbers are not supported. /// @see IndexOutput#writeVInt(int32_t) virtual int32_t readVInt(); /// Reads eight bytes and returns a int64. /// @see IndexOutput#writeLong(int64_t) virtual int64_t readLong(); /// Reads a int64 stored in variable-length format. Reads between one and nine /// bytes. Smaller values take fewer bytes. Negative numbers are not supported. virtual int64_t readVLong(); /// Call this if readString should read characters stored in the old modified /// UTF8 format. This is used for indices written pre-2.4. virtual void setModifiedUTF8StringsMode(); /// Reads a string. /// @see IndexOutput#writeString(const String&) virtual String readString(); /// Reads a modified UTF8 format string. virtual String readModifiedUTF8String(); /// Reads Lucene's old "modified UTF-8" encoded characters into an array. /// @param buffer the array to read characters into. /// @param start the offset in the array to start storing characters. /// @param length the number of characters to read. /// @see IndexOutput#writeChars(const String& s, int32_t, int32_t) virtual int32_t readChars(wchar_t* buffer, int32_t start, int32_t length); /// Similar to {@link #readChars(wchar_t*, int32_t, int32_t)} but does not /// do any conversion operations on the bytes it is reading in. It still /// has to invoke {@link #readByte()} just as {@link #readChars(wchar_t*, int32_t, int32_t)} /// does, but it does not need a buffer to store anything and it does not have /// to do any of the bitwise operations, since we don't actually care what is /// in the byte except to determine how many more bytes to read. /// @param length The number of chars to read. /// @deprecated this method operates on old "modified utf8" encoded strings. virtual void skipChars(int32_t length); /// Closes the stream to further operations. virtual void close() = 0; /// Returns the current position in this file, where the next read will occur. /// @see #seek(int64_t) virtual int64_t getFilePointer() = 0; /// Sets current position in this file, where the next read will occur. /// @see #getFilePointer() virtual void seek(int64_t pos) = 0; /// The number of bytes in the file. virtual int64_t length() = 0; /// Returns a clone of this stream. /// /// Clones of a stream access the same data, and are positioned at the same /// point as the stream they were cloned from. /// /// Subclasses must ensure that clones may be positioned at different points /// in the input from each other and from the stream they were cloned from. virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); /// Read string map as a series of key/value pairs. virtual MapStringString readStringStringMap(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/IndexOutput.h000066400000000000000000000075401456444476200230230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXOUTPUT_H #define INDEXOUTPUT_H #include "LuceneObject.h" namespace Lucene { /// Abstract base class for output to a file in a Directory. A random-access output stream. Used for all /// Lucene index output operations. /// @see Directory /// @see IndexInput class LPPAPI IndexOutput : public LuceneObject { public: virtual ~IndexOutput(); LUCENE_CLASS(IndexOutput); protected: static const int32_t COPY_BUFFER_SIZE; ByteArray copyBuffer; public: /// Writes a single byte. /// @see IndexInput#readByte() virtual void writeByte(uint8_t b) = 0; /// Writes an array of bytes. /// @param b the bytes to write. /// @param length the number of bytes to write. /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length) = 0; /// Forces any buffered output to be written. virtual void flush() = 0; /// Closes this stream to further operations. virtual void close() = 0; /// Returns the current position in this file, where the next write will occur. virtual int64_t getFilePointer() = 0; /// Sets current position in this file, where the next write will occur. /// @see #getFilePointer() virtual void seek(int64_t pos) = 0; /// The number of bytes in the file. virtual int64_t length() = 0; public: /// Writes an array of bytes. /// @param b the bytes to write. /// @param length the number of bytes to write. /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) void writeBytes(const uint8_t* b, int32_t length); /// Writes an int as four bytes. /// @see IndexInput#readInt() void writeInt(int32_t i); /// Writes an int in a variable-length format. Writes between one and five bytes. Smaller values take fewer bytes. /// Negative numbers are not supported. /// @see IndexInput#readVInt() void writeVInt(int32_t i); /// Writes a int64 as eight bytes. /// @see IndexInput#readLong() void writeLong(int64_t i); /// Writes an int64 in a variable-length format. Writes between one and five bytes. Smaller values take fewer bytes. /// Negative numbers are not supported. /// @see IndexInput#readVLong() void writeVLong(int64_t i); /// Writes a string. /// @see IndexInput#readString() void writeString(const String& s); /// Writes a sub sequence of characters from s as the old format (modified UTF-8 encoded bytes). /// @param s the source of the characters. /// @param start the first character in the sequence. /// @param length the number of characters in the sequence. /// @deprecated -- please use {@link #writeString} void writeChars(const String& s, int32_t start, int32_t length); /// Copy numBytes bytes from input to ourself. void copyBytes(const IndexInputPtr& input, int64_t numBytes); /// Set the file length. By default, this method does nothing (it's optional for a Directory to implement it). /// But, certain Directory implementations (for example @see FSDirectory) can use this to inform the underlying IO /// system to pre-allocate the file to the specified size. If the length is longer than the current file length, /// the bytes added to the file are undefined. Otherwise the file is truncated. /// @param length file length. void setLength(int64_t length); /// Write string map as a series of key/value pairs. /// @param map map of string-string key-values. void writeStringStringMap(MapStringString map); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/IndexReader.h000066400000000000000000000777061456444476200227400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXREADER_H #define INDEXREADER_H #include "SegmentInfos.h" namespace Lucene { /// IndexReader is an abstract class, providing an interface for accessing an index. Search of an index is done /// entirely through this abstract interface, so that any subclass which implements it is searchable. /// /// Concrete subclasses of IndexReader are usually constructed with a call to one of the static open methods, /// eg. {@link #open(DirectoryPtr, bool)}. /// /// For efficiency, in this API documents are often referred to via document numbers, non-negative integers which /// each name a unique document in the index. These document numbers are ephemeral -they may change as documents /// are added to and deleted from an index. Clients should thus not rely on a given document having the same number /// between sessions. /// /// An IndexReader can be opened on a directory for which an IndexWriter is opened already, but it cannot be used /// to delete documents from the index then. /// /// NOTE: for backwards API compatibility, several methods are not listed as abstract, but have no useful implementations /// in this base class and instead always throw UnsupportedOperation exception. Subclasses are strongly encouraged to /// override these methods, but in many cases may not need to. /// /// NOTE: as of 2.4, it's possible to open a read-only IndexReader using the static open methods that accept the bool /// readOnly parameter. Such a reader has better concurrency as it's not necessary to synchronize on the isDeleted /// method. You must specify false if you want to make changes with the resulting IndexReader. /// /// NOTE: {@link IndexReader} instances are completely thread safe, meaning multiple threads can call any of its methods, /// concurrently. If your application requires external synchronization, you should not synchronize on the IndexReader /// instance; use your own (non-Lucene) objects instead. class LPPAPI IndexReader : public LuceneObject { public: IndexReader(); virtual ~IndexReader(); LUCENE_CLASS(IndexReader); public: /// Constants describing field properties, for example used for {@link IndexReader#getFieldNames(FieldOption)}. enum FieldOption { /// All fields FIELD_OPTION_ALL, /// All indexed fields FIELD_OPTION_INDEXED, /// All fields that store payloads FIELD_OPTION_STORES_PAYLOADS, /// All fields that omit tf FIELD_OPTION_OMIT_TERM_FREQ_AND_POSITIONS, /// All fields which are not indexed FIELD_OPTION_UNINDEXED, /// All fields which are indexed with termvectors enabled FIELD_OPTION_INDEXED_WITH_TERMVECTOR, /// All fields which are indexed but don't have termvectors enabled FIELD_OPTION_INDEXED_NO_TERMVECTOR, /// All fields with termvectors enabled. Please note that only standard termvector fields are returned FIELD_OPTION_TERMVECTOR, /// All fields with termvectors with position values enabled FIELD_OPTION_TERMVECTOR_WITH_POSITION, /// All fields with termvectors with offset values enabled FIELD_OPTION_TERMVECTOR_WITH_OFFSET, /// All fields with termvectors with offset values and position values enabled FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET }; static const int32_t DEFAULT_TERMS_INDEX_DIVISOR; protected: bool closed; bool _hasChanges; int32_t refCount; public: /// Returns the current refCount for this reader int32_t getRefCount(); /// Increments the refCount of this IndexReader instance. RefCounts are used to determine when a reader can be /// closed safely, i.e. as soon as there are no more references. Be sure to always call a corresponding {@link /// #decRef}, in a finally clause; otherwise the reader may never be closed. Note that {@link #close} simply /// calls decRef(), which means that the IndexReader will not really be closed until {@link #decRef} has been /// called for all outstanding references. /// @see #decRef void incRef(); /// Decreases the refCount of this IndexReader instance. If the refCount drops to 0, then pending changes /// (if any) are committed to the index and this reader is closed. /// @see #incRef void decRef(); /// Returns a IndexReader reading the index in the given Directory, with readOnly = true. /// @param directory the index directory static IndexReaderPtr open(const DirectoryPtr& directory); /// Returns an IndexReader reading the index in the given Directory. You should pass readOnly = true, since it /// gives much better concurrent performance, unless you intend to do write operations (delete documents or change /// norms) with the reader. /// @param directory the index directory /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader static IndexReaderPtr open(const DirectoryPtr& directory, bool readOnly); /// Returns an IndexReader reading the index in the given {@link IndexCommit}. You should pass readOnly = true, /// since it gives much better concurrent performance, unless you intend to do write operations (delete documents /// or change norms) with the reader. /// @param commit the commit point to open /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader static IndexReaderPtr open(const IndexCommitPtr& commit, bool readOnly); /// Returns an IndexReader reading the index in the given Directory, with a custom {@link IndexDeletionPolicy}. /// You should pass readOnly=true, since it gives much better concurrent performance, unless you intend to do write /// operations (delete documents or change norms) with the reader. /// @param directory the index directory /// @param deletionPolicy a custom deletion policy (only used if you use this reader to perform /// deletes or to set norms); see {@link IndexWriter} for details. /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader static IndexReaderPtr open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly); /// Returns an IndexReader reading the index in the given Directory, with a custom {@link IndexDeletionPolicy}. /// You should pass readOnly=true, since it gives much better concurrent performance, unless you intend to do write /// operations (delete documents or change norms) with the reader. /// @param directory the index directory /// @param deletionPolicy a custom deletion policy (only used if you use this reader to perform /// deletes or to set norms); see {@link IndexWriter} for details. /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader /// @param termInfosIndexDivisor Subsamples which indexed terms are loaded into RAM. This has the /// same effect as {@link IndexWriter#setTermIndexInterval} except that setting must be done at /// indexing time while this setting can be set per reader. When set to N, then one in every /// N*termIndexInterval terms in the index is loaded into memory. By setting this to a value > 1 /// you can reduce memory usage, at the expense of higher latency when loading a TermInfo. The /// default value is 1. Set this to -1 to skip loading the terms index entirely. static IndexReaderPtr open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor); /// Returns an IndexReader reading the index in the given Directory, using a specific commit and with a custom /// {@link IndexDeletionPolicy}. You should pass readOnly=true, since it gives much better concurrent performance, /// unless you intend to do write operations (delete documents or change norms) with the reader. /// @param commit the specific {@link IndexCommit} to open; see {@link IndexReader#listCommits} to list all /// commits in a directory /// @param deletionPolicy a custom deletion policy (only used if you use this reader to perform /// deletes or to set norms); see {@link IndexWriter} for details. /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader static IndexReaderPtr open(const IndexCommitPtr& commit, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly); /// Returns an IndexReader reading the index in the given Directory, using a specific commit and with a custom {@link /// IndexDeletionPolicy}. You should pass readOnly=true, since it gives much better concurrent performance, unless /// you intend to do write operations (delete documents or change norms) with the reader. /// @param commit the specific {@link IndexCommit} to open; see {@link IndexReader#listCommits} to /// list all commits in a directory /// @param deletionPolicy a custom deletion policy (only used if you use this reader to perform deletes /// or to set norms); see {@link IndexWriter} for details. /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader /// @param termInfosIndexDivisor Subsamples which indexed terms are loaded into RAM. This has the same effect as /// {@link IndexWriter#setTermIndexInterval} except that setting must be done at indexing time while this setting can /// be set per reader. When set to N, then one in every N * termIndexInterval terms in the index is loaded into /// memory. By setting this to a value > 1 you can reduce memory usage, at the expense of higher latency when loading /// a TermInfo. The default value is 1. Set this to -1 to skip loading the terms index entirely. static IndexReaderPtr open(const IndexCommitPtr& commit, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor); /// Refreshes an IndexReader if the index has changed since this instance was (re)opened. /// /// Opening an IndexReader is an expensive operation. This method can be used to refresh an existing IndexReader to /// reduce these costs. This method tries to only load segments that have changed or were created after the /// IndexReader was (re)opened. /// /// If the index has not changed since this instance was (re)opened, then this call is a NOOP and returns this /// instance. Otherwise, a new instance is returned. The old instance is not closed and remains usable. /// /// If the reader is reopened, even though they share resources internally, it's safe to make changes (deletions, /// norms) with the new reader. All shared mutable state obeys "copy on write" semantics to ensure the changes are /// not seen by other readers. /// /// You can determine whether a reader was actually reopened by comparing the old instance with the /// instance returned by this method: /// ///
    /// IndexReaderPtr reader = ...
    /// ...
    /// IndexReaderPtr newReader = r.reopen();
    /// if (newReader != reader)
    /// {
    ///     ... // reader was reopened
    ///     reader->close();
    /// }
    /// reader = newReader;
    /// ...
    /// 
/// /// Be sure to synchronize that code so that other threads, if present, can never use reader after it has been /// closed and before it's switched to newReader. If this reader is a near real-time reader (obtained from /// {@link IndexWriter#getReader()}, reopen() will simply call writer.getReader() again for you, though this /// may change in the future. virtual IndexReaderPtr reopen(); /// Just like {@link #reopen()}, except you can change the readOnly of the original reader. If the index is /// unchanged but readOnly is different then a new reader will be returned. virtual IndexReaderPtr reopen(bool openReadOnly); /// Reopen this reader on a specific commit point. This always returns a readOnly reader. If the specified commit /// point matches what this reader is already on, and this reader is already readOnly, then this same instance is /// returned; if it is not already readOnly, a readOnly clone is returned. virtual IndexReaderPtr reopen(const IndexCommitPtr& commit); /// Efficiently clones the IndexReader (sharing most internal state). /// /// On cloning a reader with pending changes (deletions, norms), the original reader transfers its write lock to the /// cloned reader. This means only the cloned reader may make further changes to the index, and commit the changes /// to the index on close, but the old reader still reflects all changes made up until it was cloned. /// /// Like {@link #reopen()}, it's safe to make changes to either the original or the cloned reader: all shared mutable /// state obeys "copy on write" semantics to ensure the changes are not seen by other readers. virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); /// Clones the IndexReader and optionally changes readOnly. A readOnly reader cannot open a writable reader. virtual LuceneObjectPtr clone(bool openReadOnly, const LuceneObjectPtr& other = LuceneObjectPtr()); /// Returns the directory associated with this index. The default implementation returns the directory specified by /// subclasses when delegating to the IndexReader(Directory) constructor, or throws an UnsupportedOperation exception /// if one was not specified. virtual DirectoryPtr directory(); /// Returns the time the index in the named directory was last modified. Do not use this to check /// whether the reader is still up-to-date, use {@link #isCurrent()} instead. static int64_t lastModified(const DirectoryPtr& directory2); /// Reads version number from segments files. The version number is initialized with a timestamp /// and then increased by one for each change of the index. /// @param directory where the index resides. /// @return version number. static int64_t getCurrentVersion(const DirectoryPtr& directory); /// Reads commitUserData, previously passed to {@link IndexWriter#commit(MapStringString)}, from /// current index segments file. This will return null if {@link IndexWriter#commit(MapStringString)} /// has never been called for this index. static MapStringString getCommitUserData(const DirectoryPtr& directory); /// Version number when this IndexReader was opened. Not implemented in the IndexReader base class. /// /// If this reader is based on a Directory (ie, was created by calling {@link #open}, or {@link /// #reopen} on a reader based on a Directory), then this method returns the version recorded in the /// commit that the reader opened. This version is advanced every time {@link IndexWriter#commit} /// is called. /// /// If instead this reader is a near real-time reader (ie, obtained by a call to {@link /// IndexWriter#getReader}, or by calling {@link #reopen} on a near real-time reader), then this /// method returns the version of the last commit done by the writer. Note that even as further /// changes are made with the writer, the version will not changed until a commit is completed. /// Thus, you should not rely on this method to determine when a near real-time reader should be /// opened. Use {@link #isCurrent} instead. virtual int64_t getVersion(); /// Retrieve the String userData optionally passed to IndexWriter#commit. This will return null if /// {@link IndexWriter#commit(MapStringString)} has never been called for this index. virtual MapStringString getCommitUserData(); /// Check whether any new changes have occurred to the index since this reader was opened. /// /// If this reader is based on a Directory (ie, was created by calling {@link #open}, or {@link /// #reopen} on a reader based on a Directory), then this method checks if any further commits (see /// {@link IndexWriter#commit} have occurred in that directory). /// /// If instead this reader is a near real-time reader (ie, obtained by a call to {@link /// IndexWriter#getReader}, or by calling {@link #reopen} on a near real-time reader), then this /// method checks if either a new commit has occurred, or any new uncommitted changes have taken /// place via the writer. Note that even if the writer has only performed merging, this method /// will still return false. /// /// In any event, if this returns false, you should call {@link #reopen} to get a new reader that /// sees the changes. virtual bool isCurrent(); /// Checks is the index is optimized (if it has a single segment and no deletions). Not implemented /// in the IndexReader base class. /// @return true if the index is optimized; false otherwise virtual bool isOptimized(); /// Return an array of term frequency vectors for the specified document. The array contains a /// vector for each vectorized field in the document. Each vector contains terms and frequencies /// for all terms in a given vectorized field. If no such fields existed, the method returns null. /// The term vectors that are returned may either be of type {@link TermFreqVector} or of type /// {@link TermPositionVector} if positions or offsets have been stored. /// /// @param docNumber document for which term frequency vectors are returned /// @return array of term frequency vectors. May be null if no term vectors have been stored for the /// specified document. virtual Collection getTermFreqVectors(int32_t docNumber) = 0; /// Return a term frequency vector for the specified document and field. The returned vector contains /// terms and frequencies for the terms in the specified field of this document, if the field had the /// storeTermVector flag set. If termvectors had been stored with positions or offsets, a /// {@link TermPositionVector} is returned. /// /// @param docNumber document for which the term frequency vector is returned. /// @param field field for which the term frequency vector is returned. /// @return term frequency vector May be null if field does not exist in the specified document or /// term vector was not stored. virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field) = 0; /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays /// of the {@link TermFreqVector}. /// @param docNumber The number of the document to load the vector for /// @param field The name of the field to load /// @param mapper The {@link TermVectorMapper} to process the vector. Must not be null. virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper) = 0; /// Map all the term vectors for all fields in a Document /// @param docNumber The number of the document to load the vector for /// @param mapper The {@link TermVectorMapper} to process the vector. Must not be null. virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper) = 0; /// Returns true if an index exists at the specified directory. If the directory does not exist or /// if there is no index in it. /// @param directory the directory to check for an index /// @return true if an index exists; false otherwise static bool indexExists(const DirectoryPtr& directory); /// Returns the number of documents in this index. virtual int32_t numDocs() = 0; /// Returns one greater than the largest possible document number. This may be used to, eg., determine /// how big to allocate an array which will have an element for every document number in an index. virtual int32_t maxDoc() = 0; /// Returns the number of deleted documents. int32_t numDeletedDocs(); /// Returns the stored fields of the n'th Document in this index. /// /// NOTE: for performance reasons, this method does not check if the requested document is deleted, and /// therefore asking for a deleted document may yield unspecified results. Usually this is not required, /// however you can call {@link #isDeleted(int)} with the requested document ID to verify the document /// is not deleted. virtual DocumentPtr document(int32_t n); /// Get the {@link Document} at the n'th position. The {@link FieldSelector} may be used to determine /// what {@link Field}s to load and how they should be loaded. /// NOTE: If this Reader (more specifically, the underlying FieldsReader) is closed before the lazy /// {@link Field} is loaded an exception may be thrown. If you want the value of a lazy {@link Field} /// to be available after closing you must explicitly load it or fetch the Document again with a new /// loader. /// /// NOTE: for performance reasons, this method does not check if the requested document is deleted, /// and therefore asking for a deleted document may yield unspecified results. Usually this is not /// required, however you can call {@link #isDeleted(int32_t)} with the requested document ID to verify /// the document is not deleted. /// /// @param n Get the document at the n'th position /// @param fieldSelector The {@link FieldSelector} to use to determine what Fields should be loaded on /// the Document. May be null, in which case all Fields will be loaded. /// @return The stored fields of the {@link Document} at the n'th position /// @see Fieldable /// @see FieldSelector /// @see SetBasedFieldSelector /// @see LoadFirstFieldSelector virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector) = 0; /// Returns true if document n has been deleted virtual bool isDeleted(int32_t n) = 0; /// Returns true if any documents have been deleted virtual bool hasDeletions() = 0; /// Used for testing virtual bool hasChanges(); /// Returns true if there are norms stored for this field. virtual bool hasNorms(const String& field); /// Returns the byte-encoded normalization factor for the named field of every document. This is used /// by the search code to score documents. /// @see Field#setBoost(double) virtual ByteArray norms(const String& field) = 0; /// Reads the byte-encoded normalization factor for the named field of every document. This is used /// by the search code to score documents. /// @see Field#setBoost(double) virtual void norms(const String& field, ByteArray norms, int32_t offset) = 0; /// Resets the normalization factor for the named field of the named document. The norm represents /// the product of the field's {@link Fieldable#setBoost(double) boost} and its {@link /// Similarity#lengthNorm(String, int) length normalization}. Thus, to preserve the length normalization /// values when resetting this, one should base the new value upon the old. /// /// NOTE: If this field does not store norms, then this method call will silently do nothing. /// /// @see #norms(String) /// @see Similarity#decodeNorm(byte) virtual void setNorm(int32_t doc, const String& field, uint8_t value); /// Resets the normalization factor for the named field of the named document. /// /// @see #norms(String) /// @see Similarity#decodeNorm(byte) virtual void setNorm(int32_t doc, const String& field, double value); /// Returns an enumeration of all the terms in the index. The enumeration is ordered by /// Term::compareTo(). Each term is greater than all that precede it in the enumeration. /// Note that after calling terms(), {@link TermEnum#next()} must be called on the resulting /// enumeration before calling other methods such as {@link TermEnum#term()}. virtual TermEnumPtr terms() = 0; /// Returns an enumeration of all terms starting at a given term. If the given term does not /// exist, the enumeration is positioned at the first term greater than the supplied term. /// The enumeration is ordered by Term::compareTo(). Each term is greater than all that precede /// it in the enumeration. virtual TermEnumPtr terms(const TermPtr& t) = 0; /// Returns the number of documents containing the term t. virtual int32_t docFreq(const TermPtr& t) = 0; /// Returns an enumeration of all the documents which contain term. For each document, the /// document number, the frequency of the term in that document is also provided, for use in /// search scoring. If term is null, then all non-deleted docs are returned with freq=1. /// The enumeration is ordered by document number. Each document number is greater than all /// that precede it in the enumeration. virtual TermDocsPtr termDocs(const TermPtr& term); /// Returns an unpositioned {@link TermDocs} enumerator. virtual TermDocsPtr termDocs() = 0; /// Returns an enumeration of all the documents which contain term. For each document, in /// addition to the document number and frequency of the term in that document, a list of all /// of the ordinal positions of the term in the document is available. Thus, this method /// positions of the term in the document is available. /// This positional information facilitates phrase and proximity searching. /// The enumeration is ordered by document number. Each document number is greater than all /// that precede it in the enumeration. virtual TermPositionsPtr termPositions(const TermPtr& term); /// Returns an unpositioned {@link TermPositions} enumerator. virtual TermPositionsPtr termPositions() = 0; /// Deletes the document numbered docNum. Once a document is deleted it will not appear in /// TermDocs or TermPostitions enumerations. Attempts to read its field with the {@link /// #document} method will result in an error. The presence of this document may still be /// reflected in the {@link #docFreq} statistic, though this will be corrected eventually as /// the index is further modified. virtual void deleteDocument(int32_t docNum); /// Deletes all documents that have a given term indexed. This is useful if one uses a /// document field to hold a unique ID string for the document. Then to delete such a /// document, one merely constructs a term with the appropriate field and the unique ID string /// as its text and passes it to this method. See {@link #deleteDocument(int)} for information /// about when this deletion will become effective. /// @return the number of documents deleted virtual int32_t deleteDocuments(const TermPtr& term); /// Undeletes all documents currently marked as deleted in this index. virtual void undeleteAll(); void flush(); /// @param commitUserData Opaque Map (String -> String) that's recorded into the segments file /// in the index, and retrievable by {@link IndexReader#getCommitUserData}. void flush(MapStringString commitUserData); /// Commit changes resulting from delete, undeleteAll, or setNorm operations. /// If an exception is hit, then either no changes or all changes will have been committed to /// the index (transactional semantics). void commit(MapStringString commitUserData); /// Closes files associated with this index. Also saves any new deletions to disk. /// No other methods should be called after this has been called. void close(); /// Get a list of unique field names that exist in this index and have the specified field option information. /// @param fieldOption specifies which field option should be available for the returned fields /// @return Collection of Strings indicating the names of the fields. virtual HashSet getFieldNames(FieldOption fieldOption) = 0; /// Return the IndexCommit that this reader has opened. This method is only implemented by those /// readers that correspond to a Directory with its own segments_N file. virtual IndexCommitPtr getIndexCommit(); /// Prints the filename and size of each file within a given compound file. Add the -extract flag /// to extract files to the current working directory. In order to make the extracted version of /// the index work, you have to copy the segments file from the compound index into the directory /// where the extracted files are stored. /// @param args Usage: IndexReader [-extract] static void main(Collection args); /// Returns all commit points that exist in the Directory. Normally, because the default is {@link /// KeepOnlyLastCommitDeletionPolicy}, there would be only one commit point. But if you're using a /// custom {@link IndexDeletionPolicy} then there could be many commits. Once you have a given /// commit, you can open a reader on it by calling {@link IndexReader#open(IndexCommit,bool)}. /// There must be at least one commit in the Directory, else this method throws an exception. /// Note that if a commit is in progress while this method is running, that commit may or may not /// be returned array. static Collection listCommits(const DirectoryPtr& dir); /// Returns the sequential sub readers that this reader is logically composed of. For example, /// IndexSearcher uses this API to drive searching by one sub reader at a time. If this reader is /// not composed of sequential child readers, it should return null. If this method returns an empty /// array, that means this reader is a null reader (for example a MultiReader that has no sub readers). /// /// NOTE: You should not try using sub-readers returned by this method to make any changes (setNorm, /// deleteDocument, etc.). While this might succeed for one composite reader (like MultiReader), it /// will most likely lead to index corruption for other readers (like DirectoryReader obtained /// through {@link #open}. Use the parent reader directly. virtual Collection getSequentialSubReaders(); virtual LuceneObjectPtr getFieldCacheKey(); /// This returns null if the reader has no deletions. virtual LuceneObjectPtr getDeletesCacheKey(); /// Returns the number of unique terms (across all fields) in this reader. /// /// This method returns int64_t, even though internally Lucene cannot handle more than 2^31 unique /// terms, for a possible future when this limitation is removed. virtual int64_t getUniqueTermCount(); /// For IndexReader implementations that use TermInfosReader to read terms, this returns the current /// indexDivisor as specified when the reader was opened. virtual int32_t getTermInfosIndexDivisor(); protected: void ensureOpen(); static IndexReaderPtr open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, const IndexCommitPtr& commit, bool readOnly, int32_t termInfosIndexDivisor); /// Implements setNorm in subclass. virtual void doSetNorm(int32_t doc, const String& field, uint8_t value) = 0; /// Implements deletion of the document numbered docNum. /// Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}. virtual void doDelete(int32_t docNum) = 0; /// Implements actual undeleteAll() in subclass. virtual void doUndeleteAll() = 0; /// Does nothing by default. Subclasses that require a write lock for index modifications must /// implement this method. virtual void acquireWriteLock(); /// Commit changes resulting from delete, undeleteAll, or setNorm operations. /// If an exception is hit, then either no changes or all changes will have been committed to /// the index (transactional semantics). void commit(); /// Implements commit. virtual void doCommit(MapStringString commitUserData) = 0; /// Implements close. virtual void doClose() = 0; friend class DirectoryReader; friend class ParallelReader; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/IndexSearcher.h000066400000000000000000000112471456444476200232560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXSEARCHER_H #define INDEXSEARCHER_H #include "Searcher.h" namespace Lucene { /// Implements search over a single IndexReader. /// /// Applications usually need only call the inherited {@link #search(QueryPtr, int32_t)} or {@link /// #search(QueryPtr, FilterPtr, int32_t)} methods. For performance reasons it is recommended to open only /// one IndexSearcher and use it for all of your searches. /// /// NOTE: {@link IndexSearcher} instances are completely thread safe, meaning multiple threads can call any /// of its methods, concurrently. If your application requires external synchronization, you should not /// synchronize on the IndexSearcher instance; use your own (non-Lucene) objects instead. class LPPAPI IndexSearcher : public Searcher { public: /// Creates a searcher searching the index in the named directory. You should pass readOnly = true, /// since it gives much better concurrent performance, unless you intend to do write operations (delete /// documents or change norms) with the underlying IndexReader. /// @param path Directory where IndexReader will be opened /// @param readOnly If true, the underlying IndexReader will be opened readOnly IndexSearcher(const DirectoryPtr& path, bool readOnly = true); /// Creates a searcher searching the provided index. IndexSearcher(const IndexReaderPtr& reader); /// Directly specify the reader, subReaders and their docID starts. IndexSearcher(const IndexReaderPtr& reader, Collection subReaders, Collection docStarts); virtual ~IndexSearcher(); LUCENE_CLASS(IndexSearcher); public: IndexReaderPtr reader; protected: bool closeReader; Collection subReaders; Collection docStarts; bool fieldSortDoTrackScores; bool fieldSortDoMaxScore; public: /// Return the {@link IndexReader} this searches. IndexReaderPtr getIndexReader(); /// Note that the underlying IndexReader is not closed, if IndexSearcher was constructed with /// IndexSearcher(const IndexReaderPtr& reader). If the IndexReader was supplied implicitly by specifying a /// directory, then the IndexReader gets closed. virtual void close(); virtual int32_t docFreq(const TermPtr& term); virtual DocumentPtr doc(int32_t n); virtual DocumentPtr doc(int32_t n, const FieldSelectorPtr& fieldSelector); virtual int32_t maxDoc(); using Searcher::search; using Searcher::explain; virtual TopDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n); virtual TopFieldDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort); /// Just like {@link #search(WeightPtr, FilterPtr, int32_t, SortPtr)}, but you choose whether or not the /// fields in the returned {@link FieldDoc} instances should be set by specifying fillFields. /// /// NOTE: this does not compute scores by default. If you need scores, create a {@link TopFieldCollector} /// instance by calling {@link TopFieldCollector#create} and then pass that to {@link #search(WeightPtr, /// FilterPtr, CollectorPtr)}. virtual TopFieldDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort, bool fillFields); virtual void search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& results); virtual QueryPtr rewrite(const QueryPtr& query); virtual ExplanationPtr explain(const WeightPtr& weight, int32_t doc); /// By default, no scores are computed when sorting by field (using {@link #search(QueryPtr, FilterPtr, /// int32_t, SortPtr)}). You can change that, per IndexSearcher instance, by calling this method. Note /// that this will incur a CPU cost. /// /// @param doTrackScores If true, then scores are returned for every matching document in {@link TopFieldDocs}. /// @param doMaxScore If true, then the max score for all matching docs is computed. virtual void setDefaultFieldSortScoring(bool doTrackScores, bool doMaxScore); protected: void ConstructSearcher(const IndexReaderPtr& reader, bool closeReader); void gatherSubReaders(Collection allSubReaders, const IndexReaderPtr& reader); void searchWithFilter(const IndexReaderPtr& reader, const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& collector); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/IndexWriter.h000066400000000000000000001646621456444476200230100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INDEXWRITER_H #define INDEXWRITER_H #include "MergePolicy.h" namespace Lucene { /// An IndexWriter creates and maintains an index. /// /// The create argument to the {@link #IndexWriter(DirectoryPtr, AnalyzerPtr, bool, int32_t) constructor} /// determines whether a new index is created, or whether an existing index is opened. Note that you can /// open an index with create=true even while readers are using the index. The old readers will continue /// to search the "point in time" snapshot they had opened, and won't see the newly created index until /// they re-open. There are also {@link #IndexWriter(DirectoryPtr, AnalyzerPtr, int32_t) constructors} /// with no create argument which will create a new index if there is not already an index at the provided /// path and otherwise open the existing index. /// /// In either case, documents are added with {@link #addDocument(DocumentPtr) addDocument} and removed /// with {@link #deleteDocuments(TermPtr)} or {@link #deleteDocuments(QueryPtr)}. A document can be updated /// with {@link #updateDocument(TermPtr, DocumentPtr) updateDocument} (which just deletes and then adds /// the entire document). When finished adding, deleting and updating documents, {@link #close() close} /// should be called. /// /// These changes are buffered in memory and periodically flushed to the {@link Directory} (during the /// above method calls). A flush is triggered when there are enough buffered deletes (see /// {@link #setMaxBufferedDeleteTerms}) or enough added documents since the last flush, whichever is /// sooner. For the added documents, flushing is triggered either by RAM usage of the documents (see /// {@link #setRAMBufferSizeMB}) or the number of added documents. The default is to flush when RAM usage /// hits 16 MB. For best indexing speed you should flush by RAM usage with a large RAM buffer. Note that /// flushing just moves the internal buffered state in IndexWriter into the index, but these changes are /// not visible to IndexReader until either {@link #commit()} or {@link #close} is called. A flush may /// also trigger one or more segment merges which by default run with a background thread so as not to /// block the addDocument calls (see mergePolicy below for changing the {@link MergeScheduler}). /// /// If an index will not have more documents added for a while and optimal search performance is desired, /// then either the full {@link #optimize() optimize} method or partial {@link #optimize(int32_t)} method /// should be called before the index is closed. /// /// Opening an IndexWriter creates a lock file for the directory in use. Trying to open another IndexWriter /// on the same directory will lead to a LockObtainFailed exception. The LockObtainFailed exception is also /// thrown if an IndexReader on the same directory is used to delete documents from the index. /// /// IndexWriter allows an optional {@link IndexDeletionPolicy} implementation to be specified. You can use /// this to control when prior commits are deleted from the index. The default policy is {@link /// KeepOnlyLastCommitDeletionPolicy} which removes all prior commits as soon as a new commit is done (this /// matches behavior before 2.2). Creating your own policy can allow you to explicitly keep previous /// "point in time" commits alive in the index for some time, to allow readers to refresh to the new commit /// without having the old commit deleted out from under them. This is necessary on file systems like NFS /// that do not support "delete on last close" semantics, which Lucene's "point in time" search normally /// relies on. /// /// IndexWriter allows you to separately change the {@link MergePolicy} and the {@link MergeScheduler}. /// The {@link MergePolicy} is invoked whenever there are changes to the segments in the index. Its role /// is to select which merges to do, if any, and return a {@link MergePolicy.MergeSpecification} describing /// the merges. It also selects merges to do for optimize(). (The default is {@link LogByteSizeMergePolicy}. /// Then, the {@link MergeScheduler} is invoked with the requested merges and it decides when and how to run /// the merges. The default is {@link ConcurrentMergeScheduler}. /// /// NOTE: if you hit an std::bad_alloc then IndexWriter will quietly record this fact and block all future /// segment commits. This is a defensive measure in case any internal state (buffered documents and /// deletions) were corrupted. Any subsequent calls to {@link #commit()} will throw an IllegalState /// exception. The only course of action is to call {@link #close()}, which internally will call {@link /// #rollback()}, to undo any changes to the index since the last commit. You can also just call {@link /// #rollback()} directly. /// /// NOTE: {@link IndexWriter} instances are completely thread safe, meaning multiple threads can call any of /// its methods, concurrently. If your application requires external synchronization, you should not /// synchronize on the IndexWriter instance as this may cause deadlock; use your own (non-Lucene) objects /// instead. /// /// Clarification: Check Points (and commits) /// IndexWriter writes new index files to the directory without writing a new segments_N file which /// references these new files. It also means that the state of the in memory SegmentInfos object is different /// than the most recent segments_N file written to the directory. /// /// Each time the SegmentInfos is changed, and matches the (possibly modified) directory files, we have a new /// "check point". If the modified/new SegmentInfos is written to disk - as a new (generation of) segments_N /// file - this check point is also an IndexCommit. /// /// A new checkpoint always replaces the previous checkpoint and becomes the new "front" of the index. This /// allows the IndexFileDeleter to delete files that are referenced only by stale checkpoints (files that were /// created since the last commit, but are no longer referenced by the "front" of the index). For this, /// IndexFileDeleter keeps track of the last non commit checkpoint. class LPPAPI IndexWriter : public LuceneObject { protected: IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, bool create, const IndexDeletionPolicyPtr& deletionPolicy, int32_t mfl, const IndexingChainPtr& indexingChain, const IndexCommitPtr& commit); public: IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, bool create, int32_t mfl); IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, int32_t mfl); IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, const IndexDeletionPolicyPtr& deletionPolicy, int32_t mfl); IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, bool create, const IndexDeletionPolicyPtr& deletionPolicy, int32_t mfl); IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, const IndexDeletionPolicyPtr& deletionPolicy, int32_t mfl, const IndexCommitPtr& commit); virtual ~IndexWriter(); LUCENE_CLASS(IndexWriter); protected: int64_t writeLockTimeout; /// The normal read buffer size defaults to 1024, but increasing this during merging seems to /// yield performance gains. However we don't want to increase it too much because there are /// quite a few BufferedIndexInputs created during merging. static const int32_t MERGE_READ_BUFFER_SIZE; SynchronizePtr messageIDLock; static int32_t MESSAGE_ID; int32_t messageID; bool hitOOM; DirectoryPtr directory; // where this index resides AnalyzerPtr analyzer; // how to analyze text bool create; IndexDeletionPolicyPtr deletionPolicy; IndexingChainPtr indexingChain; IndexCommitPtr indexCommit; SimilarityPtr similarity; // how to normalize int64_t changeCount; // increments every time a change is completed int64_t lastCommitChangeCount; // last changeCount that was committed SegmentInfosPtr rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails MapSegmentInfoInt rollbackSegments; SegmentInfosPtr localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails int32_t localFlushedDocCount; SegmentInfosPtr segmentInfos; // the segments DocumentsWriterPtr docWriter; IndexFileDeleterPtr deleter; SetSegmentInfo segmentsToOptimize; // used by optimize to note those needing optimization int32_t optimizeMaxNumSegments; LockPtr writeLock; int32_t termIndexInterval; bool closed; bool closing; SetSegmentInfo mergingSegments; MergePolicyPtr mergePolicy; MergeSchedulerPtr mergeScheduler; Collection pendingMerges; SetOneMerge runningMerges; Collection mergeExceptions; int64_t mergeGen; bool stopMerges; int32_t flushCount; int32_t flushDeletesCount; /// Used to only allow one addIndexes to proceed at once int32_t readCount; // count of how many threads are holding read lock int64_t writeThread; // non-null if any thread holds write lock int32_t upgradeCount; int32_t readerTermsIndexDivisor; // This is a "write once" variable (like the organic dye on a DVD-R that may or may not // be heated by a laser and then cooled to permanently record the event): it's false, // until getReader() is called for the first time, at which point it's switched to true // and never changes back to false. Once this is true, we hold open and reuse SegmentReader // instances internally for applying deletes, doing merges, and reopening near real-time readers. bool poolReaders; /// The maximum number of terms that will be indexed for a single field in a document. This /// limits the amount of memory required for indexing, so that collections with very large files /// will not crash the indexing process by running out of memory. /// Note that this effectively truncates large documents, excluding from the index terms that /// occur further in the document. If you know your source documents are large, be sure to set /// this value high enough to accommodate the expected size. If you set it to INT_MAX, then the /// only limit is your memory, but you should anticipate an std::bad_alloc. By default, no more /// than 10,000 terms will be indexed for a field. /// /// @see #setMaxFieldLength(int32_t) int32_t maxFieldLength; InfoStreamPtr infoStream; static InfoStreamPtr defaultInfoStream; HashSet synced; // files that have been sync'd already HashSet syncing; // files that are now being sync'd IndexReaderWarmerPtr mergedSegmentWarmer; /// Used only by commit; lock order is commitLock -> IW SynchronizePtr commitLock; INTERNAL: SegmentInfosPtr pendingCommit; // set when a commit is pending (after prepareCommit() & before commit()) int64_t pendingCommitChangeCount; ReaderPoolPtr readerPool; public: /// Default value for the write lock timeout (1,000). /// @see #setDefaultWriteLockTimeout static int64_t WRITE_LOCK_TIMEOUT; static const String WRITE_LOCK_NAME; /// Value to denote a flush trigger is disabled. static const int32_t DISABLE_AUTO_FLUSH; /// Disabled by default (because IndexWriter flushes by RAM usage by default). Change using /// {@link #setMaxBufferedDocs(int32_t)}. static const int32_t DEFAULT_MAX_BUFFERED_DOCS; /// Default value is 16 MB (which means flush when buffered docs consume 16 MB RAM). /// Change using {@link #setRAMBufferSizeMB}. static const double DEFAULT_RAM_BUFFER_SIZE_MB; /// Disabled by default (because IndexWriter flushes by RAM usage by default). Change using /// {@link #setMaxBufferedDeleteTerms(int32_t)}. static const int32_t DEFAULT_MAX_BUFFERED_DELETE_TERMS; /// Default value is 10,000. Change using {@link #setMaxFieldLength(int32_t)}. static const int32_t DEFAULT_MAX_FIELD_LENGTH; /// Default value is 128. Change using {@link #setTermIndexInterval(int32_t)}. static const int32_t DEFAULT_TERM_INDEX_INTERVAL; /// Absolute hard maximum length for a term. If a term arrives from the analyzer longer than /// this length, it is skipped and a message is printed to infoStream, if set (see {@link /// #setInfoStream}). static int32_t MAX_TERM_LENGTH(); /// Sets the maximum field length to INT_MAX static const int32_t MaxFieldLengthUNLIMITED; /// Sets the maximum field length to {@link #DEFAULT_MAX_FIELD_LENGTH} static const int32_t MaxFieldLengthLIMITED; public: virtual void initialize(); /// Returns a read-only reader, covering all committed as well as un-committed changes to the /// index. This provides "near real-time" searching, in that changes made during an IndexWriter /// session can be quickly made available for searching without closing the writer nor calling /// {@link #commit}. /// /// Note that this is functionally equivalent to calling {#commit} and then using {@link /// IndexReader#open} to open a new reader. But the turnaround time of this method should be /// faster since it avoids the potentially costly {@link #commit}. /// /// You must close the {@link IndexReader} returned by this method once you are done using it. /// /// It's near real-time because there is no hard guarantee on how quickly you can get a new /// reader after making changes with IndexWriter. You'll have to experiment in your situation /// to determine if it's fast enough. As this is a new and experimental feature, please report /// back on your findings so we can learn, improve and iterate. /// /// The resulting reader supports {@link IndexReader#reopen}, but that call will simply forward /// back to this method (though this may change in the future). /// /// The very first time this method is called, this writer instance will make every effort to /// pool the readers that it opens for doing merges, applying deletes, etc. This means additional /// resources (RAM, file descriptors, CPU time) will be consumed. /// /// For lower latency on reopening a reader, you should call {@link #setMergedSegmentWarmer} to /// pre-warm a newly merged segment before it's committed to the index. This is important for /// minimizing index-to-search delay after a large merge. /// /// If an addIndexes* call is running in another thread, then this reader will only search those /// segments from the foreign index that have been successfully copied over, so far. /// /// NOTE: Once the writer is closed, any outstanding readers may continue to be used. However, /// if you attempt to reopen any of those readers, you'll hit an AlreadyClosed exception. /// /// NOTE: This API is experimental and might change in incompatible ways in the next release. /// /// @return IndexReader that covers entire index plus all changes made so far by this IndexWriter /// instance virtual IndexReaderPtr getReader(); /// Like {@link #getReader}, except you can specify which termInfosIndexDivisor should be used for /// any newly opened readers. /// /// @param termInfosIndexDivisor Subsamples which indexed terms are loaded into RAM. This has the /// same effect as {@link IndexWriter#setTermIndexInterval} except that setting must be done at /// indexing time while this setting can be set per reader. When set to N, then one in every /// N*termIndexInterval terms in the index is loaded into memory. By setting this to a value > 1 /// you can reduce memory usage, at the expense of higher latency when loading a TermInfo. /// The default value is 1. Set this to -1 to skip loading the terms index entirely. virtual IndexReaderPtr getReader(int32_t termInfosIndexDivisor); /// Obtain the number of deleted docs for a pooled reader. If the reader isn't being pooled, /// the segmentInfo's delCount is returned. virtual int32_t numDeletedDocs(const SegmentInfoPtr& info); virtual void acquireWrite(); virtual void releaseWrite(); virtual void acquireRead(); /// Allows one readLock to upgrade to a writeLock even if there are other readLocks as long /// as all other readLocks are also blocked in this method virtual void upgradeReadToWrite(); virtual void releaseRead(); virtual bool isOpen(bool includePendingClose); virtual void message(const String& message); /// Get the current setting of whether newly flushed segments will use the compound file format. /// Note that this just returns the value previously set with setUseCompoundFile(bool), or the /// default value (true). You cannot use this to query the status of previously flushed segments. /// /// Note that this method is a convenience method: it just calls mergePolicy.getUseCompoundFile /// as long as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument /// exception is thrown. /// @see #setUseCompoundFile(bool) virtual bool getUseCompoundFile(); /// Setting to turn on usage of a compound file. When on, multiple files for each segment are /// merged into a single file when a new segment is flushed. /// /// Note that this method is a convenience method: it just calls mergePolicy.setUseCompoundFile /// as long as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument /// exception is thrown. virtual void setUseCompoundFile(bool value); /// Set the Similarity implementation used by this IndexWriter. virtual void setSimilarity(const SimilarityPtr& similarity); /// Return the Similarity implementation used by this IndexWriter. /// This defaults to the current value of {@link Similarity#getDefault()}. virtual SimilarityPtr getSimilarity(); /// Set the interval between indexed terms. Large values cause less memory to be used by /// IndexReader, but slow random-access to terms. Small values cause more memory to be used by /// an IndexReader, and speed random-access to terms. /// /// This parameter determines the amount of computation required per query term, regardless of /// the number of documents that contain that term. In particular, it is the maximum number of /// other terms that must be scanned before a term is located and its frequency and position /// information may be processed. In a large index with user-entered query terms, query /// processing time is likely to be dominated not by term lookup but rather by the processing of /// frequency and positional data. In a small index or when many uncommon query terms are /// generated (eg., by wildcard queries) term lookup may become a dominant cost. /// /// In particular, numUniqueTerms/interval terms are read into memory by an IndexReader, and on /// average, interval/2 terms must be scanned for each random term access. /// /// @see #DEFAULT_TERM_INDEX_INTERVAL virtual void setTermIndexInterval(int32_t interval); /// Return the interval between indexed terms. /// @see #setTermIndexInterval(int32_t) virtual int32_t getTermIndexInterval(); /// Set the merge policy used by this writer. virtual void setMergePolicy(const MergePolicyPtr& mp); /// Returns the current MergePolicy in use by this writer. /// @see #setMergePolicy virtual MergePolicyPtr getMergePolicy(); /// Set the merge scheduler used by this writer. virtual void setMergeScheduler(const MergeSchedulerPtr& mergeScheduler); /// Returns the current MergePolicy in use by this writer. /// @see #setMergePolicy virtual MergeSchedulerPtr getMergeScheduler(); /// Determines the largest segment (measured by document count) that may be merged with other /// segments. Small values (eg., less than 10,000) are best for interactive indexing, as this /// limits the length of pauses while indexing to a few seconds. Larger values are best for /// batched indexing and speedier searches. /// /// The default value is INT_MAX. /// /// Note that this method is a convenience method: it just calls mergePolicy.setMaxMergeDocs as /// long as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument /// exception is thrown. /// /// The default merge policy ({@link LogByteSizeMergePolicy}) also allows you to set this limit /// by net size (in MB) of the segment, using {@link LogByteSizeMergePolicy#setMaxMergeMB}. virtual void setMaxMergeDocs(int32_t maxMergeDocs); /// Returns the largest segment (measured by document count) that may be merged with other /// segments. /// /// Note that this method is a convenience method: it just calls mergePolicy.getMaxMergeDocs as /// long as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument /// exception is thrown. /// /// @see #setMaxMergeDocs virtual int32_t getMaxMergeDocs(); /// The maximum number of terms that will be indexed for a single field in a document. This /// limits the amount of memory required for indexing, so that collections with very large files /// will not crash the indexing process by running out of memory. This setting refers to the /// number of running terms, not to the number of different terms. /// Note: this silently truncates large documents, excluding from the index all terms that occur /// further in the document. If you know your source documents are large, be sure to set this /// value high enough to accommodate the expected size. If you set it to INT_MAX, then the only /// limit is your memory, but you should anticipate an std::bad_alloc. /// By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms will be indexed for a field. virtual void setMaxFieldLength(int32_t maxFieldLength); /// Returns the maximum number of terms that will be indexed for a single field in a document. /// @see #setMaxFieldLength virtual int32_t getMaxFieldLength(); /// Sets the termsIndexDivisor passed to any readers that IndexWriter opens, for example when /// applying deletes or creating a near-real-time reader in {@link IndexWriter#getReader}. /// Default value is {@link IndexReader#DEFAULT_TERMS_INDEX_DIVISOR}. virtual void setReaderTermsIndexDivisor(int32_t divisor); /// @see #setReaderTermsIndexDivisor() virtual int32_t getReaderTermsIndexDivisor(); /// Determines the minimal number of documents required before the buffered in-memory documents /// are flushed as a new Segment. Large values generally gives faster indexing. /// /// When this is set, the writer will flush every maxBufferedDocs added documents. Pass in /// {@link #DISABLE_AUTO_FLUSH} to prevent triggering a flush due to number of buffered /// documents. Note that if flushing by RAM usage is also enabled, then the flush will be /// triggered by whichever comes first. /// /// Disabled by default (writer flushes by RAM usage). /// /// @see #setRAMBufferSizeMB virtual void setMaxBufferedDocs(int32_t maxBufferedDocs); /// Returns the number of buffered added documents that will trigger a flush if enabled. /// @see #setMaxBufferedDocs virtual int32_t getMaxBufferedDocs(); /// Determines the amount of RAM that may be used for buffering added documents and deletions /// before they are flushed to the Directory. Generally for faster indexing performance it's /// best to flush by RAM usage instead of document count and use as large a RAM buffer as you can. /// /// When this is set, the writer will flush whenever buffered documents and deletions use this /// much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a flush due to RAM usage. /// Note that if flushing by document count is also enabled, then the flush will be triggered by /// whichever comes first. /// /// Note: the account of RAM usage for pending deletions is only approximate. Specifically, if /// you delete by Query, Lucene currently has no way to measure the RAM usage if individual /// Queries so the accounting will under-estimate and you should compensate by either calling /// commit() periodically yourself, or by using {@link #setMaxBufferedDeleteTerms} to flush by /// count instead of RAM usage (each buffered delete Query counts as one). /// /// Note: because IndexWriter uses int32_t when managing its internal storage, the absolute /// maximum value for this setting is somewhat less than 2048 MB. The precise limit depends on /// various factors, such as how large your documents are, how many fields have norms, etc., so /// it's best to set this value comfortably under 2048. /// /// The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}. virtual void setRAMBufferSizeMB(double mb); /// Returns the value set by {@link #setRAMBufferSizeMB} if enabled. virtual double getRAMBufferSizeMB(); /// Determines the minimal number of delete terms required before the buffered in-memory delete /// terms are applied and flushed. If there are documents buffered in memory at the time, they /// are merged and a new segment is created. /// /// Disabled by default (writer flushes by RAM usage). /// @see #setRAMBufferSizeMB virtual void setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms); /// Returns the number of buffered deleted terms that will trigger a flush if enabled. /// @see #setMaxBufferedDeleteTerms virtual int32_t getMaxBufferedDeleteTerms(); /// Determines how often segment indices are merged by addDocument(). With smaller values, less /// RAM is used while indexing, and searches on unoptimized indices are faster, but indexing /// speed is slower. With larger values, more RAM is used during indexing, and while searches /// on unoptimized indices are slower, indexing is faster. Thus larger values (> 10) are best /// for batch index creation, and smaller values (< 10) for indices that are interactively maintained. /// /// Note that this method is a convenience method: it just calls mergePolicy.setMergeFactor as long /// as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument exception /// is thrown. This must never be less than 2. The default value is 10. virtual void setMergeFactor(int32_t mergeFactor); /// Returns the number of segments that are merged at once and also controls the total number of /// segments allowed to accumulate in the index. /// /// Note that this method is a convenience method: it just calls mergePolicy.getMergeFactor as long /// as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument exception /// is thrown. /// @see #setMergeFactor virtual int32_t getMergeFactor(); /// If non-null, this will be the default infoStream used by a newly instantiated IndexWriter. /// @see #setInfoStream static void setDefaultInfoStream(const InfoStreamPtr& infoStream); /// Returns the current default infoStream for newly instantiated IndexWriters. /// @see #setDefaultInfoStream static InfoStreamPtr getDefaultInfoStream(); /// If non-null, information about merges, deletes and a message when maxFieldLength is reached /// will be printed to this. virtual void setInfoStream(const InfoStreamPtr& infoStream); /// Returns the current infoStream in use by this writer. /// @see #setInfoStream virtual InfoStreamPtr getInfoStream(); /// Returns true if verbosing is enabled (i.e., infoStream != null). virtual bool verbose(); /// Sets the maximum time to wait for a write lock (in milliseconds) for this instance of /// IndexWriter. @see #setDefaultWriteLockTimeout to change the default value for all instances /// of IndexWriter. virtual void setWriteLockTimeout(int64_t writeLockTimeout); /// Returns allowed timeout when acquiring the write lock. /// @see #setWriteLockTimeout virtual int64_t getWriteLockTimeout(); /// Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock /// (in milliseconds). static void setDefaultWriteLockTimeout(int64_t writeLockTimeout); /// Returns default write lock timeout for newly instantiated IndexWriters. /// @see #setDefaultWriteLockTimeout static int64_t getDefaultWriteLockTimeout(); /// Commits all changes to an index and closes all associated files. Note that this may be /// a costly operation, so try to re-use a single writer instead of closing and opening a /// new one. See {@link #commit()} for caveats about write caching done by some IO devices. /// /// If an Exception is hit during close, eg due to disk full or some other reason, then both /// the on-disk index and the internal state of the IndexWriter instance will be consistent. /// However, the close will not be complete even though part of it (flushing buffered documents) /// may have succeeded, so the write lock will still be held. /// /// If you can correct the underlying cause (eg free up some disk space) then you can call /// close() again. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer, again. virtual void close(); /// Closes the index with or without waiting for currently running merges to finish. This is /// only meaningful when using a MergeScheduler that runs merges in background threads. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer, again. /// /// NOTE: it is dangerous to always call close(false), especially when IndexWriter is not open /// for very long, because this can result in "merge starvation" whereby long merges will never /// have a chance to finish. This will cause too many segments in your index over time. /// /// @param waitForMerges if true, this call will block until all merges complete; else, it will /// ask all running merges to abort, wait until those merges have finished (which should be at /// most a few seconds), and then return. virtual void close(bool waitForMerges); /// Returns the Directory used by this index. virtual DirectoryPtr getDirectory(); /// Returns the analyzer used by this index. virtual AnalyzerPtr getAnalyzer(); /// Returns total number of docs in this index, including docs not yet flushed (still in the /// RAM buffer), not counting deletions. /// @see #numDocs virtual int32_t maxDoc(); /// Returns total number of docs in this index, including docs not yet flushed (still in the /// RAM buffer), and including deletions. /// NOTE: buffered deletions are not counted. If you really need these to be counted you should /// call {@link #commit()} first. virtual int32_t numDocs(); virtual bool hasDeletions(); /// Adds a document to this index. If the document contains more than {@link /// #setMaxFieldLength(int32_t)} terms for a given field, the remainder are discarded. /// /// Note that if an Exception is hit (for example disk full) then the index will be consistent, /// but this document may not have been added. Furthermore, it's possible the index will have /// one segment in non-compound format even when using compound files (when a merge has partially /// succeeded). /// /// This method periodically flushes pending documents to the Directory, and also periodically /// triggers segment merges in the index according to the {@link MergePolicy} in use. /// /// Merges temporarily consume space in the directory. The amount of space required is up to 1X /// the size of all segments being merged, when no size of all segments being merged, when no /// 2X the size of all segments being merged when readers/searchers are open against the index /// (see {@link #optimize()} for details). The sequence of primitive merge operations performed /// is governed by the merge policy. /// /// Note that each term in the document can be no longer than 16383 characters, otherwise an /// IllegalArgument exception will be thrown. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void addDocument(const DocumentPtr& doc); /// Adds a document to this index, using the provided analyzer instead of the value of {@link /// #getAnalyzer()}. If the document contains more than {@link #setMaxFieldLength(int32_t)} terms /// for a given field, the remainder are discarded. /// /// See {@link #addDocument(DocumentPtr)} for details on index and IndexWriter state after an /// exception, and flushing/merging temporary free space requirements. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void addDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer); /// Deletes the document(s) containing term. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @param term the term to identify the documents to be deleted virtual void deleteDocuments(const TermPtr& term); /// Deletes the document(s) containing any of the terms. All deletes are flushed at the same time. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @param terms array of terms to identify the documents to be deleted virtual void deleteDocuments(Collection terms); /// Deletes the document(s) matching the provided query. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @param query the query to identify the documents to be deleted virtual void deleteDocuments(const QueryPtr& query); /// Deletes the document(s) matching any of the provided queries. All deletes are flushed at /// the same time. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @param queries array of queries to identify the documents to be deleted virtual void deleteDocuments(Collection queries); /// Updates a document by first deleting the document(s) containing term and then adding the new /// document. The delete and then add are atomic as seen by a reader on the same index (flush /// may happen only after the add). /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @param term the term to identify the document(s) to be deleted /// @param doc the document to be added virtual void updateDocument(const TermPtr& term, const DocumentPtr& doc); /// Updates a document by first deleting the document(s) containing term and then adding the new /// document. The delete and then add are atomic as seen by a reader on the same index (flush /// may happen only after the add). /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @param term the term to identify the document(s) to be deleted /// @param doc the document to be added /// @param analyzer the analyzer to use when analyzing the document virtual void updateDocument(const TermPtr& term, const DocumentPtr& doc, const AnalyzerPtr& analyzer); virtual int32_t getSegmentCount(); virtual int32_t getNumBufferedDocuments(); virtual int32_t getDocCount(int32_t i); virtual int32_t getFlushCount(); virtual int32_t getFlushDeletesCount(); virtual String newSegmentName(); /// Requests an "optimize" operation on an index, priming the index for the fastest available /// search. Traditionally this has meant merging all segments into a single segment as is done in /// the default merge policy, but individual merge policies may implement optimize in different ways. /// /// It is recommended that this method be called upon completion of indexing. In environments with /// frequent updates, optimize is best done during low volume times, if at all. /// /// Note that optimize requires 2X the index size free space in your Directory (3X if you're using /// compound file format). For example, if your index size is 10 MB then you need 20 MB free for /// optimize to complete (30 MB if you're using compound file format). /// /// If some but not all readers re-open while an optimize is underway, this will cause > 2X temporary /// space to be consumed as those new readers will then hold open the partially optimized segments at /// that time. It is best not to re-open readers while optimize is running. /// /// The actual temporary usage could be much less than these figures (it depends on many factors). /// /// In general, once the optimize completes, the total size of the index will be less than the size /// of the starting index. It could be quite a bit smaller (if there were many pending deletes) or /// just slightly smaller. /// /// If an Exception is hit during optimize(), for example due to disk full, the index will not be /// corrupt and no documents will have been lost. However, it may have been partially optimized /// (some segments were merged but not all), and it's possible that one of the segments in the index /// will be in non-compound format even when using compound file format. This will occur when the /// exception is hit during conversion of the segment into compound format. /// /// This call will optimize those segments present in the index when the call started. If other /// threads are still adding documents and flushing segments, those newly created segments will not /// be optimized unless you call optimize again. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @see LogMergePolicy#findMergesForOptimize virtual void optimize(); /// Optimize the index down to <= maxNumSegments. If maxNumSegments==1 then this is the same as /// {@link #optimize()}. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @param maxNumSegments maximum number of segments left in the index after optimization finishes virtual void optimize(int32_t maxNumSegments); /// Just like {@link #optimize()}, except you can specify whether the call should block until the /// optimize completes. This is only meaningful with a {@link MergeScheduler} that is able to run /// merges in background threads. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void optimize(bool doWait); /// Just like {@link #optimize(int32_t)}, except you can specify whether the call should block /// until the optimize completes. This is only meaningful with a {@link MergeScheduler} that is /// able to run merges in background threads. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void optimize(int32_t maxNumSegments, bool doWait); /// Just like {@link #expungeDeletes()}, except you can specify whether the call should block /// until the operation completes. This is only meaningful with a {@link MergeScheduler} that /// is able to run merges in background threads. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void expungeDeletes(bool doWait); /// Expunges all deletes from the index. When an index has many document deletions (or updates /// to existing documents), it's best to either call optimize or expungeDeletes to remove all /// unused data in the index associated with the deleted documents. To see how many deletions /// you have pending in your index, call {@link IndexReader#numDeletedDocs}. This saves disk /// space and memory usage while searching. expungeDeletes should be somewhat faster than /// optimize since it does not insist on reducing the index to a single segment (though, this /// depends on the {@link MergePolicy}; see {@link MergePolicy#findMergesToExpungeDeletes}.). /// Note that this call does not first commit any buffered documents, so you must do so yourself /// if necessary. See also {@link #expungeDeletes(bool)} /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void expungeDeletes(); /// Asks the mergePolicy whether any merges are necessary now and if so, runs the requested /// merges and then iterate (test again if merges are needed) until no more merges are returned /// by the mergePolicy. /// /// Explicit calls to maybeMerge() are usually not necessary. The most common case is when merge /// policy parameters have changed. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void maybeMerge(); /// The {@link MergeScheduler} calls this method to retrieve the next merge requested by the /// MergePolicy. virtual OneMergePtr getNextMerge(); /// Close the IndexWriter without committing any changes that have occurred since the last commit /// (or since it was opened, if commit hasn't been called). This removes any temporary files that /// had been created, after which the state of the index will be the same as it was when commit() /// was last called or when this writer was first opened. This also clears a previous call to /// {@link #prepareCommit}. virtual void rollback(); /// Delete all documents in the index. /// /// This method will drop all buffered documents and will remove all segments from the index. This /// change will not be visible until a {@link #commit()} has been called. This method can be rolled /// back using {@link #rollback()}. /// /// NOTE: this method is much faster than using {@link #deleteDocuments()}. /// /// NOTE: this method will forcefully abort all merges in progress. If other threads are running /// {@link #optimize()} or any of the addIndexes methods, they will receive {@link /// MergePolicy.MergeAbortedException} virtual void deleteAll(); /// Wait for any currently outstanding merges to finish. /// /// It is guaranteed that any merges started prior to calling this method will have completed once /// this method completes. virtual void waitForMerges(); /// Merges all segments from an array of indexes into this index. /// /// This may be used to parallelize batch indexing. A large document collection can be broken into /// sub-collections. Each sub-collection can be indexed in parallel, on a different thread, process /// or machine. The complete index can then be created by merging sub-collection indexes with this /// method. /// /// NOTE: the index in each Directory must not be changed (opened by a writer) while this method is /// running. This method does not acquire a write lock in each input Directory, so it is up to the /// caller to enforce this. /// /// NOTE: while this is running, any attempts to add or delete documents (with another thread) will /// be paused until this method completes. /// /// This method is transactional in how exceptions are handled: it does not commit a new segments_N /// file until all indexes are added. This means if an exception occurs (for example disk full), /// then either no indexes will have been added or they all will have been. /// /// Note that this requires temporary free space in the Directory up to 2X the sum of all input /// indexes (including the starting index). If readers/searchers are open against the starting index, /// then temporary free space required will be higher by the size of the starting index (see /// {@link #optimize()} for details). /// /// Once this completes, the final size of the index will be less than the sum of all input index /// sizes (including the starting index). It could be quite a bit smaller (if there were many pending /// deletes) or just slightly smaller. /// /// This requires this index not be among those to be added. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void addIndexesNoOptimize(Collection dirs); /// Merges the provided indexes into this index. /// After this completes, the index is optimized. The provided IndexReaders are not closed. /// /// NOTE: while this is running, any attempts to add or delete documents (with another thread) will /// be paused until this method completes. /// /// See {@link #addIndexesNoOptimize} for details on transactional semantics, temporary free space /// required in the Directory, and non-CFS segments on an exception. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void addIndexes(Collection readers); /// Prepare for commit. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// @see #prepareCommit(MapStringString) virtual void prepareCommit(); /// Prepare for commit, specifying commitUserData Map (String -> String). This does the first phase /// of 2-phase commit. This method does all steps necessary to commit changes since this writer was /// opened: flushes pending added and deleted docs, syncs the index files, writes most of next /// segments_N file. After calling this you must call either {@link #commit()} to finish the commit, /// or {@link #rollback()} to revert the commit and undo all changes done since the writer was opened. /// /// You can also just call {@link #commit(Map)} directly without prepareCommit first in which case /// that method will internally call prepareCommit. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @param commitUserData Opaque Map (String->String) that's recorded into the segments file in the /// index, and retrievable by {@link IndexReader#getCommitUserData}. Note that when IndexWriter /// commits itself during {@link #close}, the commitUserData is unchanged (just carried over from the /// prior commit). If this is null then the previous commitUserData is kept. Also, the commitUserData // will only "stick" if there are actually changes in the index to commit. virtual void prepareCommit(MapStringString commitUserData); /// Commits all pending changes (added & deleted documents, optimizations, segment merges, added /// indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the /// changes and the index updates will survive an OS or machine crash or power loss. Note that this /// does not wait for any running background merges to finish. This may be a costly operation, so you /// should test the cost in your application and do it only when really necessary. /// /// Note that this operation calls Directory.sync on the index files. That call should not return until /// the file contents & metadata are on stable storage. For FSDirectory, this calls the OS's fsync. /// But, beware: some hardware devices may in fact cache writes even during fsync, and return before the /// bits are actually on stable storage, to give the appearance of faster performance. If you have such /// a device, and it does not have a battery backup (for example) then on power loss it may still lose /// data. Lucene cannot guarantee consistency on such devices. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. /// /// @see #prepareCommit /// @see #commit(MapStringString) virtual void commit(); /// Commits all changes to the index, specifying a commitUserData Map (String -> String). This just /// calls {@link #prepareCommit(MapStringString)} (if you didn't already call it) and then /// {@link #finishCommit}. /// /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. virtual void commit(MapStringString commitUserData); /// Return the total size of all index files currently cached in memory. Useful for size management /// with flushRamDocs() virtual int64_t ramSizeInBytes(); /// Return the number of documents currently buffered in RAM. virtual int32_t numRamDocs(); /// Merges the indicated segments, replacing them in the stack with a single segment. virtual void merge(const OneMergePtr& merge); /// Hook that's called when the specified merge is complete. virtual void mergeSuccess(const OneMergePtr& merge); /// Checks whether this merge involves any segments already participating in a merge. If not, this /// merge is "registered", meaning we record that its segments are now participating in a merge, /// and true is returned. Else (the merge conflicts) false is returned. virtual bool registerMerge(const OneMergePtr& merge); /// Does initial setup for a merge, which is fast but holds the synchronized lock on IndexWriter /// instance. virtual void mergeInit(const OneMergePtr& merge); /// Does finishing for a merge, which is fast but holds the synchronized lock on IndexWriter instance. virtual void mergeFinish(const OneMergePtr& merge); virtual void addMergeException(const OneMergePtr& merge); /// For test purposes. virtual int32_t getBufferedDeleteTermsSize(); /// For test purposes. virtual int32_t getNumBufferedDeleteTerms(); /// Utility routines for tests virtual SegmentInfoPtr newestSegment(); virtual String segString(); /// Returns true if the index in the named directory is currently locked. /// @param directory the directory to check for a lock static bool isLocked(const DirectoryPtr& directory); /// Forcibly unlocks the index in the named directory. /// Caution: this should only be used by failure recovery code, when it is known that no other process /// nor thread is in fact currently accessing this index. static void unlock(const DirectoryPtr& directory); /// Set the merged segment warmer. See {@link IndexReaderWarmer}. virtual void setMergedSegmentWarmer(const IndexReaderWarmerPtr& warmer); /// Returns the current merged segment warmer. See {@link IndexReaderWarmer}. virtual IndexReaderWarmerPtr getMergedSegmentWarmer(); /// Used only by assert for testing. Current points: /// startDoFlush /// startCommitMerge /// startStartCommit /// midStartCommit /// midStartCommit2 /// midStartCommitSuccess /// finishStartCommit /// startCommitMergeDeletes /// startMergeInit /// startApplyDeletes /// startMergeInit /// startMergeInit virtual bool testPoint(const String& name); virtual bool nrtIsCurrent(const SegmentInfosPtr& infos); virtual bool isClosed(); protected: virtual void ensureOpen(bool includePendingClose); virtual void ensureOpen(); virtual void setMessageID(const InfoStreamPtr& infoStream); /// Casts current mergePolicy to LogMergePolicy, and throws an exception if the /// mergePolicy is not a LogMergePolicy. virtual LogMergePolicyPtr getLogMergePolicy(); virtual void setRollbackSegmentInfos(const SegmentInfosPtr& infos); /// If we are flushing by doc count (not by RAM usage), and using LogDocMergePolicy then push /// maxBufferedDocs down as its minMergeDocs, to keep backwards compatibility. virtual void pushMaxBufferedDocs(); virtual void messageState(); /// Returns true if this thread should attempt to close, or false if IndexWriter is now closed; /// else, waits until another thread finishes closing virtual bool shouldClose(); virtual void closeInternal(bool waitForMerges); /// Tells the docWriter to close its currently open shared doc stores (stored fields & vectors /// files). Return value specifies whether new doc store files are compound or not. virtual bool flushDocStores(); /// Returns true if any merges in pendingMerges or runningMerges are optimization merges. virtual bool optimizeMergesPending(); virtual void maybeMerge(bool optimize); virtual void maybeMerge(int32_t maxNumSegmentsOptimize, bool optimize); virtual void updatePendingMerges(int32_t maxNumSegmentsOptimize, bool optimize); /// Like {@link #getNextMerge()} except only returns a merge if it's external. virtual OneMergePtr getNextExternalMerge(); /// Begin a transaction. During a transaction, any segment merges that happen (or ram segments /// flushed) will not write a new segments file and will not remove any files that were present /// at the start of the transaction. You must make a matched call to commitTransaction() or /// rollbackTransaction() to finish the transaction. /// /// Note that buffered documents and delete terms are not handled within the transactions, so /// they must be flushed before the transaction is started. virtual void startTransaction(bool haveReadLock); /// Rolls back the transaction and restores state to where we were at the start. virtual void rollbackTransaction(); /// Commits the transaction. This will write the new segments file and remove and pending /// deletions we have accumulated during the transaction. virtual void commitTransaction(); virtual void rollbackInternal(); virtual void finishMerges(bool waitForMerges); /// Called whenever the SegmentInfos has been updated and the index files referenced exist /// (correctly) in the index directory. virtual void checkpoint(); virtual void finishAddIndexes(); virtual void blockAddIndexes(bool includePendingClose); virtual void resumeAddIndexes(); virtual void resetMergeExceptions(); virtual void noDupDirs(Collection dirs); virtual bool hasExternalSegments(); /// If any of our segments are using a directory != ours then we have to either copy them over one /// by one, merge them (if merge policy has chosen to) or wait until currently running merges (in /// the background) complete. We don't return until the SegmentInfos has no more external segments. /// Currently this is only used by addIndexesNoOptimize(). virtual void resolveExternalSegments(); /// A hook for extending classes to execute operations after pending added and deleted documents have /// been flushed to the Directory but before the change is committed (new segments_N file written). virtual void doAfterFlush(); /// A hook for extending classes to execute operations before pending added and deleted documents are /// flushed to the Directory. virtual void doBeforeFlush(); virtual void commit(int64_t sizeInBytes); virtual void finishCommit(); /// Flush all in-memory buffered updates (adds and deletes) to the Directory. /// @param triggerMerge if true, we may merge segments (if deletes or docs were flushed) if necessary /// @param flushDocStores if false we are allowed to keep doc stores open to share with the next segment /// @param flushDeletes whether pending deletes should also be flushed virtual void flush(bool triggerMerge, bool flushDocStores, bool flushDeletes); virtual bool doFlush(bool flushDocStores, bool flushDeletes); virtual bool doFlushInternal(bool flushDocStores, bool flushDeletes); virtual int32_t ensureContiguousMerge(const OneMergePtr& merge); /// Carefully merges deletes for the segments we just merged. This is tricky because, although merging /// will clear all deletes (compacts the documents), new deletes may have been flushed to the segments /// since the merge was started. This method "carries over" such new deletes onto the newly merged /// segment, and saves the resulting deletes file (incrementing the delete generation for merge.info). /// If no deletes were flushed, no new deletes file is saved. virtual void commitMergedDeletes(const OneMergePtr& merge, const SegmentReaderPtr& mergeReader); virtual bool commitMerge(const OneMergePtr& merge, const SegmentMergerPtr& merger, int32_t mergedDocCount, const SegmentReaderPtr& mergedReader); virtual LuceneException handleMergeException(const LuceneException& exc, const OneMergePtr& merge); virtual void _mergeInit(const OneMergePtr& merge); virtual void setDiagnostics(const SegmentInfoPtr& info, const String& source); virtual void setDiagnostics(const SegmentInfoPtr& info, const String& source, MapStringString details); virtual void setMergeDocStoreIsCompoundFile(const OneMergePtr& merge); virtual void closeMergeReaders(const OneMergePtr& merge, bool suppressExceptions); /// Does the actual (time-consuming) work of the merge, but without holding synchronized lock on /// IndexWriter instance. virtual int32_t mergeMiddle(const OneMergePtr& merge); /// Apply buffered deletes to all segments. virtual bool applyDeletes(); virtual String segString(const SegmentInfosPtr& infos); virtual bool startSync(const String& fileName, HashSet pending); virtual void finishSync(const String& fileName, bool success); /// Blocks until all files in syncing are sync'd bool waitForAllSynced(HashSet syncing); void doWait(); /// Walk through all files referenced by the current segmentInfos and ask the Directory to sync each /// file, if it wasn't already. If that succeeds, then we prepare a new segments_N file but do not /// fully commit it. virtual void startCommit(int64_t sizeInBytes, MapStringString commitUserData); virtual LuceneException handleOOM(const std::bad_alloc& oom, const String& location); friend class ReaderPool; }; /// If {@link #getReader} has been called (ie, this writer is in near real-time mode), then after /// a merge completes, this class can be invoked to warm the reader on the newly merged segment, /// before the merge commits. This is not required for near real-time search, but will reduce /// search latency on opening a new near real-time reader after a merge completes. /// /// NOTE: warm is called before any deletes have been carried over to the merged segment. class LPPAPI IndexReaderWarmer : public LuceneObject { public: virtual ~IndexReaderWarmer(); LUCENE_CLASS(IndexReaderWarmer); public: virtual void warm(const IndexReaderPtr& reader) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/InfoStream.h000066400000000000000000000030321456444476200225720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INFOSTREAM_H #define INFOSTREAM_H #include "LuceneObject.h" #include namespace Lucene { /// Utility class to support streaming info messages. class LPPAPI InfoStream : public LuceneObject { protected: InfoStream(); public: virtual ~InfoStream(); LUCENE_CLASS(InfoStream); public: virtual InfoStream& operator<< (const String& t) = 0; }; /// Stream override to write messages to a file. class LPPAPI InfoStreamFile : public InfoStream { public: InfoStreamFile(const String& path); virtual ~InfoStreamFile(); LUCENE_CLASS(InfoStreamFile); protected: boost::filesystem::wofstream file; public: virtual InfoStreamFile& operator<< (const String& t); }; /// Stream override to write messages to a std::cout. class LPPAPI InfoStreamOut : public InfoStream { public: virtual ~InfoStreamOut(); LUCENE_CLASS(InfoStreamOut); public: virtual InfoStreamOut& operator<< (const String& t); }; /// Null stream override to eat messages. class LPPAPI InfoStreamNull : public InfoStream { public: virtual ~InfoStreamNull(); LUCENE_CLASS(InfoStreamNull); public: virtual InfoStreamNull& operator<< (const String& t); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/InputStreamReader.h000066400000000000000000000023221456444476200241220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INPUTSTREAMREADER_H #define INPUTSTREAMREADER_H #include "Reader.h" namespace Lucene { /// An InputStreamReader is a bridge from byte streams to character streams. class LPPAPI InputStreamReader : public Reader { public: /// Create an InputStreamReader that uses the utf8 charset. InputStreamReader(const ReaderPtr& reader); virtual ~InputStreamReader(); LUCENE_CLASS(InputStreamReader); protected: ReaderPtr reader; UTF8DecoderStreamPtr decoder; public: /// Read a single character. virtual int32_t read(); /// Read characters into a portion of an array. virtual int32_t read(wchar_t* b, int32_t offset, int32_t length); /// Close the stream. virtual void close(); /// Tell whether this stream supports the mark() operation virtual bool markSupported(); /// Reset the stream. virtual void reset(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/IntBlockPool.h000066400000000000000000000017551456444476200230740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INTBLOCKPOOL_H #define INTBLOCKPOOL_H #include "LuceneObject.h" namespace Lucene { class IntBlockPool : public LuceneObject { public: IntBlockPool(const DocumentsWriterPtr& docWriter, bool trackAllocations); virtual ~IntBlockPool(); LUCENE_CLASS(IntBlockPool); public: Collection buffers; int32_t bufferUpto; // Which buffer we are upto int32_t intUpto; // Where we are in head buffer IntArray buffer; // Current head buffer int32_t intOffset; // Current head offset bool trackAllocations; protected: DocumentsWriterWeakPtr _docWriter; public: void reset(); void nextBuffer(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/IntFieldSource.h000066400000000000000000000034111456444476200234030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INTFIELDSOURCE_H #define INTFIELDSOURCE_H #include "FieldCacheSource.h" namespace Lucene { /// Obtains int field values from the {@link FieldCache} using getInts() and makes those values available /// as other numeric types, casting as needed. /// /// @see FieldCacheSource for requirements on the field. /// /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's /// best to switch your application to pass only atomic (single segment) readers to this API. Alternatively, /// for a short-term fix, you could wrap your ValueSource using {@link MultiValueSource}, which costs more CPU /// per lookup but will not consume double the FieldCache RAM. class LPPAPI IntFieldSource : public FieldCacheSource { public: /// Create a cached int field source with a specific string-to-int parser. IntFieldSource(const String& field, const IntParserPtr& parser = IntParserPtr()); virtual ~IntFieldSource(); LUCENE_CLASS(IntFieldSource); protected: IntParserPtr parser; public: virtual String description(); virtual DocValuesPtr getCachedFieldValues(const FieldCachePtr& cache, const String& field, const IndexReaderPtr& reader); virtual bool cachedFieldSourceEquals(const FieldCacheSourcePtr& other); virtual int32_t cachedFieldSourceHashCode(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/InvertedDocConsumer.h000066400000000000000000000024271456444476200244540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INVERTEDDOCCONSUMER_H #define INVERTEDDOCCONSUMER_H #include "LuceneObject.h" namespace Lucene { class InvertedDocConsumer : public LuceneObject { public: virtual ~InvertedDocConsumer(); LUCENE_CLASS(InvertedDocConsumer); public: FieldInfosPtr fieldInfos; public: /// Add a new thread virtual InvertedDocConsumerPerThreadPtr addThread(const DocInverterPerThreadPtr& docInverterPerThread) = 0; /// Abort (called after hitting AbortException) virtual void abort() = 0; /// Flush a new segment virtual void flush(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) = 0; /// Close doc stores virtual void closeDocStore(const SegmentWriteStatePtr& state) = 0; /// Attempt to free RAM, returning true if any RAM was freed virtual bool freeRAM() = 0; virtual void setFieldInfos(const FieldInfosPtr& fieldInfos); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/InvertedDocConsumerPerField.h000066400000000000000000000023741456444476200260700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INVERTEDDOCCONSUMERPERFIELD_H #define INVERTEDDOCCONSUMERPERFIELD_H #include "LuceneObject.h" namespace Lucene { class InvertedDocConsumerPerField : public LuceneObject { public: virtual ~InvertedDocConsumerPerField(); LUCENE_CLASS(InvertedDocConsumerPerField); public: /// Called once per field, and is given all Fieldable occurrences for this field in the document. /// Return true if you wish to see inverted tokens for these fields virtual bool start(Collection fields, int32_t count) = 0; /// Called before a field instance is being processed virtual void start(const FieldablePtr& field) = 0; /// Called once per inverted token virtual void add() = 0; /// Called once per field per document, after all Fieldable occurrences are inverted virtual void finish() = 0; /// Called on hitting an aborting exception virtual void abort() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/InvertedDocConsumerPerThread.h000066400000000000000000000016071456444476200262520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INVERTEDDOCCONSUMERPERTHREAD_H #define INVERTEDDOCCONSUMERPERTHREAD_H #include "LuceneObject.h" namespace Lucene { class InvertedDocConsumerPerThread : public LuceneObject { public: virtual ~InvertedDocConsumerPerThread(); LUCENE_CLASS(InvertedDocConsumerPerThread); public: virtual void startDocument() = 0; virtual InvertedDocConsumerPerFieldPtr addField(const DocInverterPerFieldPtr& docInverterPerField, const FieldInfoPtr& fieldInfo) = 0; virtual DocWriterPtr finishDocument() = 0; virtual void abort() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/InvertedDocEndConsumer.h000066400000000000000000000020441456444476200250760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INVERTEDDOCENDCONSUMER_H #define INVERTEDDOCENDCONSUMER_H #include "LuceneObject.h" namespace Lucene { class InvertedDocEndConsumer : public LuceneObject { public: virtual ~InvertedDocEndConsumer(); LUCENE_CLASS(InvertedDocEndConsumer); public: virtual InvertedDocEndConsumerPerThreadPtr addThread(const DocInverterPerThreadPtr& docInverterPerThread) = 0; virtual void flush(MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) = 0; virtual void closeDocStore(const SegmentWriteStatePtr& state) = 0; virtual void abort() = 0; virtual void setFieldInfos(const FieldInfosPtr& fieldInfos) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/InvertedDocEndConsumerPerField.h000066400000000000000000000013201456444476200265050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INVERTEDDOCENDCONSUMERPERFIELD_H #define INVERTEDDOCENDCONSUMERPERFIELD_H #include "LuceneObject.h" namespace Lucene { class InvertedDocEndConsumerPerField : public LuceneObject { public: virtual ~InvertedDocEndConsumerPerField(); LUCENE_CLASS(InvertedDocEndConsumerPerField); public: virtual void finish() = 0; virtual void abort() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/InvertedDocEndConsumerPerThread.h000066400000000000000000000016211456444476200266750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef INVERTEDDOCENDCONSUMERPERTHREAD_H #define INVERTEDDOCENDCONSUMERPERTHREAD_H #include "LuceneObject.h" namespace Lucene { class InvertedDocEndConsumerPerThread : public LuceneObject { public: virtual ~InvertedDocEndConsumerPerThread(); LUCENE_CLASS(InvertedDocEndConsumerPerThread); public: virtual void startDocument() = 0; virtual InvertedDocEndConsumerPerFieldPtr addField(const DocInverterPerFieldPtr& docInverterPerField, const FieldInfoPtr& fieldInfo) = 0; virtual void finishDocument() = 0; virtual void abort() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/KeepOnlyLastCommitDeletionPolicy.h000066400000000000000000000021521456444476200271140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef KEEPONLYLASTCOMMITDELETIONPOLICY_H #define KEEPONLYLASTCOMMITDELETIONPOLICY_H #include "IndexDeletionPolicy.h" namespace Lucene { /// This {@link IndexDeletionPolicy} implementation that keeps only the most recent commit and immediately /// removes all prior commits after a new commit is done. This is the default deletion policy. class LPPAPI KeepOnlyLastCommitDeletionPolicy : public IndexDeletionPolicy { public: virtual ~KeepOnlyLastCommitDeletionPolicy(); LUCENE_CLASS(KeepOnlyLastCommitDeletionPolicy); public: /// Deletes all commits except the most recent one. virtual void onInit(Collection commits); /// Deletes all commits except the most recent one. virtual void onCommit(Collection commits); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/KeywordAnalyzer.h000066400000000000000000000015771456444476200236710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef KEYWORDANALYZER_H #define KEYWORDANALYZER_H #include "Analyzer.h" namespace Lucene { /// Tokenizes the entire stream as a single token. This is useful for data like zip codes, ids, and some /// product names. class LPPAPI KeywordAnalyzer : public Analyzer { public: virtual ~KeywordAnalyzer(); LUCENE_CLASS(KeywordAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/KeywordTokenizer.h000066400000000000000000000023361456444476200240500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef KEYWORDTOKENIZER_H #define KEYWORDTOKENIZER_H #include "Tokenizer.h" namespace Lucene { /// Emits the entire input as a single token. class LPPAPI KeywordTokenizer : public Tokenizer { public: KeywordTokenizer(const ReaderPtr& input); KeywordTokenizer(const ReaderPtr& input, int32_t bufferSize); KeywordTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input, int32_t bufferSize); KeywordTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input, int32_t bufferSize); virtual ~KeywordTokenizer(); LUCENE_CLASS(KeywordTokenizer); protected: static const int32_t DEFAULT_BUFFER_SIZE; bool done; int32_t finalOffset; TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; protected: void init(int32_t bufferSize); public: virtual bool incrementToken(); virtual void end(); virtual void reset(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LengthFilter.h000066400000000000000000000017341456444476200231210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LENGTHFILTER_H #define LENGTHFILTER_H #include "TokenFilter.h" namespace Lucene { /// Removes words that are too long or too short from the stream. class LPPAPI LengthFilter : public TokenFilter { public: /// Build a filter that removes words that are too long or too short from the text. LengthFilter(const TokenStreamPtr& input, int32_t min, int32_t max); virtual ~LengthFilter(); LUCENE_CLASS(LengthFilter); public: int32_t min; int32_t max; protected: TermAttributePtr termAtt; public: /// Returns the next input Token whose term() is the right len virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LetterTokenizer.h000066400000000000000000000026761456444476200236720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LETTERTOKENIZER_H #define LETTERTOKENIZER_H #include "CharTokenizer.h" namespace Lucene { /// A LetterTokenizer is a tokenizer that divides text at non-letters. That's to say, it defines tokens as maximal /// strings of adjacent letters, as defined UnicodeUtil::isAlpha(c) predicate. /// /// Note: this does a decent job for most European languages, but does a terrible job for some Asian languages, where /// words are not separated by spaces. class LPPAPI LetterTokenizer : public CharTokenizer { public: /// Construct a new LetterTokenizer. LetterTokenizer(const ReaderPtr& input); /// Construct a new LetterTokenizer using a given {@link AttributeSource}. LetterTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); /// Construct a new LetterTokenizer using a given {@link AttributeFactory}. LetterTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); virtual ~LetterTokenizer(); LUCENE_CLASS(LetterTokenizer); public: /// Collects only characters which satisfy UnicodeUtil::isAlpha(c). virtual bool isTokenChar(wchar_t c); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LoadFirstFieldSelector.h000066400000000000000000000014141456444476200250610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LOADFIRSTFIELDSELECTOR_H #define LOADFIRSTFIELDSELECTOR_H #include "FieldSelector.h" namespace Lucene { /// Load the First field and break. /// See {@link FieldSelectorResult#LOAD_AND_BREAK} class LPPAPI LoadFirstFieldSelector : public FieldSelector { public: virtual ~LoadFirstFieldSelector(); LUCENE_CLASS(LoadFirstFieldSelector); public: virtual FieldSelectorResult accept(const String& fieldName); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Lock.h000066400000000000000000000033111456444476200214130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LOCK_H #define LOCK_H #include "LuceneObject.h" namespace Lucene { /// An interprocess mutex lock. /// @see Directory#makeLock(const String&) class LPPAPI Lock : public LuceneObject { public: virtual ~Lock(); LUCENE_CLASS(Lock); public: /// How long {@link #obtain(int64_t)} waits, in milliseconds, in between attempts to acquire the lock. static const int32_t LOCK_OBTAIN_WAIT_FOREVER; /// Pass this value to {@link #obtain(int64_t)} to try forever to obtain the lock. static const int32_t LOCK_POLL_INTERVAL; public: /// Attempts to obtain exclusive access and immediately return upon success or failure. /// @return true if exclusive access is obtained. virtual bool obtain() = 0; /// Releases exclusive access. virtual void release() = 0; /// Returns true if the resource is currently locked. Note that one must still call {@link #obtain()} /// before using the resource. virtual bool isLocked() = 0; /// Attempts to obtain an exclusive lock within amount of time given. Polls once per {@link #LOCK_POLL_INTERVAL} /// (currently 1000) milliseconds until lockWaitTimeout is passed. /// @param lockWaitTimeout length of time to wait in milliseconds or {@link #LOCK_OBTAIN_WAIT_FOREVER} /// to retry forever. /// @return true if lock was obtained. bool obtain(int32_t lockWaitTimeout); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LockFactory.h000066400000000000000000000037401456444476200227510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LOCKFACTORY_H #define LOCKFACTORY_H #include "LuceneObject.h" namespace Lucene { /// Base class for Locking implementation. {@link Directory} uses /// instances of this class to implement locking. /// Note that there are some useful tools to verify that /// your LockFactory is working correctly: {@link /// VerifyingLockFactory}, {@link LockStressTest}, {@link /// LockVerifyServer}. /// @see LockVerifyServer /// @see LockStressTest /// @see VerifyingLockFactory class LPPAPI LockFactory : public LuceneObject { public: virtual ~LockFactory(); LUCENE_CLASS(LockFactory); protected: String lockPrefix; public: /// Set the prefix in use for all locks created in this LockFactory. This is normally called once, when a /// Directory gets this LockFactory instance. However, you can also call this (after this instance is /// assigned to a Directory) to override the prefix in use. This is helpful if you're running Lucene on /// machines that have different mount points for the same shared directory. virtual void setLockPrefix(const String& lockPrefix); /// Get the prefix in use for all locks created in this LockFactory. virtual String getLockPrefix(); /// Return a new Lock instance identified by lockName. /// @param lockName name of the lock to be created. virtual LockPtr makeLock(const String& lockName) = 0; /// Attempt to clear (forcefully unlock and remove) the specified lock. Only call this at a time when you /// are certain this lock is no longer in use. /// @param lockName name of the lock to be cleared. virtual void clearLock(const String& lockName) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LogByteSizeMergePolicy.h000066400000000000000000000047621456444476200250760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LOGBYTESIZEMERGEPOLICY_H #define LOGBYTESIZEMERGEPOLICY_H #include "LogMergePolicy.h" namespace Lucene { /// This is a {@link LogMergePolicy} that measures size of a segment as the total byte size of the /// segment's files. class LPPAPI LogByteSizeMergePolicy : public LogMergePolicy { public: LogByteSizeMergePolicy(const IndexWriterPtr& writer); virtual ~LogByteSizeMergePolicy(); LUCENE_CLASS(LogByteSizeMergePolicy); public: /// Default minimum segment size. @see setMinMergeMB. static const double DEFAULT_MIN_MERGE_MB; /// Default maximum segment size. A segment of this size or larger will never be merged. /// @see setMaxMergeMB static const double DEFAULT_MAX_MERGE_MB; protected: virtual int64_t size(const SegmentInfoPtr& info); public: /// Determines the largest segment (measured by total byte size of the segment's files, in MB) /// that may be merged with other segments. Small values (eg., less than 50 MB) are best for /// interactive indexing, as this limits the length of pauses while indexing to a few seconds. /// Larger values are best for batched indexing and speedier searches. /// /// Note that {@link #setMaxMergeDocs} is also used to check whether a segment is too large for /// merging (it's either or). void setMaxMergeMB(double mb); /// Returns the largest segment (measured by total byte size of the segment's files, in MB) that /// may be merged with other segments. @see #setMaxMergeMB double getMaxMergeMB(); /// Sets the minimum size for the lowest level segments. Any segments below this size are /// considered to be on the same level (even if they vary drastically in size) and will be merged /// whenever there are mergeFactor of them. This effectively truncates the "long tail" of small /// segments that would otherwise be created into a single level. If you set this too large, it /// could greatly increase the merging cost during indexing (if you flush many small segments). void setMinMergeMB(double mb); /// Get the minimum size for a segment to remain un-merged. @see #setMinMergeMB double getMinMergeMB(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LogDocMergePolicy.h000066400000000000000000000031531456444476200240360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LOGDOCMERGEPOLICY_H #define LOGDOCMERGEPOLICY_H #include "LogMergePolicy.h" namespace Lucene { /// This is a {@link LogMergePolicy} that measures size of a segment as the number of documents /// (not taking deletions into account). class LPPAPI LogDocMergePolicy : public LogMergePolicy { public: LogDocMergePolicy(const IndexWriterPtr& writer); virtual ~LogDocMergePolicy(); LUCENE_CLASS(LogDocMergePolicy); public: /// Default minimum segment size. @see setMinMergeDocs static const int32_t DEFAULT_MIN_MERGE_DOCS; protected: virtual int64_t size(const SegmentInfoPtr& info); public: /// Sets the minimum size for the lowest level segments. Any segments below this size are considered /// to be on the same level (even if they vary drastically in size) and will be merged whenever there /// are mergeFactor of them. This effectively truncates the "long tail" of small segments that would /// otherwise be created into a single level. If you set this too large, it could greatly increase the /// merging cost during indexing (if you flush many small segments). void setMinMergeDocs(int32_t minMergeDocs); /// Get the minimum size for a segment to remain un-merged. @see #setMinMergeDocs int32_t getMinMergeDocs(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LogMergePolicy.h000066400000000000000000000172061456444476200234140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LOGMERGEPOLICY_H #define LOGMERGEPOLICY_H #include "MergePolicy.h" namespace Lucene { /// This class implements a {@link MergePolicy} that tries to merge segments into levels of exponentially /// increasing size, where each level has fewer segments than the value of the merge factor. Whenever extra /// segments (beyond the merge factor upper bound) are encountered, all segments within the level are merged. /// You can get or set the merge factor using {@link #getMergeFactor()} and {@link #setMergeFactor(int)} /// respectively. /// /// This class is abstract and requires a subclass to define the {@link #size} method which specifies how a /// segment's size is determined. {@link LogDocMergePolicy} is one subclass that measures size by document /// count in the segment. {@link LogByteSizeMergePolicy} is another subclass that measures size as the total /// byte size of the file(s) for the segment. class LPPAPI LogMergePolicy : public MergePolicy { public: LogMergePolicy(const IndexWriterPtr& writer); virtual ~LogMergePolicy(); LUCENE_CLASS(LogMergePolicy); protected: int32_t mergeFactor; double noCFSRatio; bool calibrateSizeByDeletes; bool _useCompoundFile; bool _useCompoundDocStore; public: /// Defines the allowed range of log(size) for each level. A level is computed by taking the max segment /// log size, minus LEVEL_LOG_SPAN, and finding all segments falling within that range. static const double LEVEL_LOG_SPAN; /// Default merge factor, which is how many segments are merged at a time. static const int32_t DEFAULT_MERGE_FACTOR; /// Default maximum segment size. A segment of this size or larger will never be merged. /// @see setMaxMergeDocs static const int32_t DEFAULT_MAX_MERGE_DOCS; /// Default noCFSRatio. If a merge's size is >= 10% of the index, then we disable compound file for it. /// @see #setNoCFSRatio static const double DEFAULT_NO_CFS_RATIO; int64_t minMergeSize; int64_t maxMergeSize; int32_t maxMergeDocs; public: /// @see #setNoCFSRatio double getNoCFSRatio(); /// If a merged segment will be more than this percentage of the total size of the index, leave the segment as /// non-compound file even if compound file is enabled. Set to 1.0 to always use CFS regardless of merge size. void setNoCFSRatio(double noCFSRatio); /// Returns the number of segments that are merged at once and also controls the total number of segments /// allowed to accumulate in the index. int32_t getMergeFactor(); /// Determines how often segment indices are merged by addDocument(). With smaller values, less RAM is /// used while indexing, and searches on unoptimized indices are faster, but indexing speed is slower. /// With larger values, more RAM is used during indexing, and while searches on unoptimized indices are /// slower, indexing is faster. Thus larger values (> 10) are best for batch index creation, and smaller /// values (< 10) for indices that are interactively maintained. void setMergeFactor(int32_t mergeFactor); /// Returns true if a newly flushed (not from merge) segment should use the compound file format. virtual bool useCompoundFile(const SegmentInfosPtr& segments, const SegmentInfoPtr& newSegment); /// Sets whether compound file format should be used for newly flushed and newly merged segments. void setUseCompoundFile(bool useCompoundFile); /// Returns true if newly flushed and newly merge segments are written in compound file format. /// @see #setUseCompoundFile bool getUseCompoundFile(); /// Returns true if the doc store files should use the compound file format. virtual bool useCompoundDocStore(const SegmentInfosPtr& segments); /// Sets whether compound file format should be used for newly flushed and newly merged doc store /// segment files (term vectors and stored fields). void setUseCompoundDocStore(bool useCompoundDocStore); /// Returns true if newly flushed and newly merge doc store segment files (term vectors and stored fields) /// are written in compound file format. @see #setUseCompoundDocStore bool getUseCompoundDocStore(); /// Sets whether the segment size should be calibrated by the number of deletes when choosing segments /// for merge. void setCalibrateSizeByDeletes(bool calibrateSizeByDeletes); /// Returns true if the segment size should be calibrated by the number of deletes when choosing segments /// for merge. bool getCalibrateSizeByDeletes(); /// Release all resources for the policy. virtual void close(); /// Returns the merges necessary to optimize the index. This merge policy defines "optimized" to mean only /// one segment in the index, where that segment has no deletions pending nor separate norms, and it is in /// compound file format if the current useCompoundFile setting is true. This method returns multiple merges /// (mergeFactor at a time) so the {@link MergeScheduler} in use may make use of concurrency. virtual MergeSpecificationPtr findMergesForOptimize(const SegmentInfosPtr& segmentInfos, int32_t maxSegmentCount, SetSegmentInfo segmentsToOptimize); /// Finds merges necessary to expunge all deletes from the index. We simply merge adjacent segments that have /// deletes, up to mergeFactor at a time. virtual MergeSpecificationPtr findMergesToExpungeDeletes(const SegmentInfosPtr& segmentInfos); /// Checks if any merges are now necessary and returns a {@link MergePolicy.MergeSpecification} if so. A merge /// is necessary when there are more than {@link #setMergeFactor} segments at a given level. When multiple /// levels have too many segments, this method will return multiple merges, allowing the {@link MergeScheduler} /// to use concurrency. virtual MergeSpecificationPtr findMerges(const SegmentInfosPtr& segmentInfos); /// Determines the largest segment (measured by document count) that may be merged with other segments. /// Small values (eg., less than 10,000) are best for interactive indexing, as this limits the length of /// pauses while indexing to a few seconds. Larger values are best for batched indexing and speedier searches. /// /// The default value is INT_MAX. /// /// The default merge policy ({@link LogByteSizeMergePolicy}) also allows you to set this limit by net size /// (in MB) of the segment, using {@link LogByteSizeMergePolicy#setMaxMergeMB}. void setMaxMergeDocs(int32_t maxMergeDocs); /// Returns the largest segment (measured by document count) that may be merged with other segments. /// @see #setMaxMergeDocs int32_t getMaxMergeDocs(); protected: bool verbose(); void message(const String& message); virtual int64_t size(const SegmentInfoPtr& info) = 0; int64_t sizeDocs(const SegmentInfoPtr& info); int64_t sizeBytes(const SegmentInfoPtr& info); bool isOptimized(const SegmentInfosPtr& infos, int32_t maxNumSegments, SetSegmentInfo segmentsToOptimize); /// Returns true if this single info is optimized (has no pending norms or deletes, is in the same dir as the /// writer, and matches the current compound file setting bool isOptimized(const SegmentInfoPtr& info); OneMergePtr makeOneMerge(const SegmentInfosPtr& infos, const SegmentInfosPtr& infosToMerge); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LowerCaseFilter.h000066400000000000000000000013651456444476200235640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LOWERCASEFILTER_H #define LOWERCASEFILTER_H #include "TokenFilter.h" namespace Lucene { /// Normalizes token text to lower case. class LPPAPI LowerCaseFilter : public TokenFilter { public: LowerCaseFilter(const TokenStreamPtr& input); virtual ~LowerCaseFilter(); LUCENE_CLASS(LowerCaseFilter); protected: TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LowerCaseTokenizer.h000066400000000000000000000032161456444476200243060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LOWERCASETOKENIZER_H #define LOWERCASETOKENIZER_H #include "LetterTokenizer.h" namespace Lucene { /// LowerCaseTokenizer performs the function of LetterTokenizer and LowerCaseFilter together. It divides text at /// non-letters and converts them to lower case. While it is functionally equivalent to the combination of /// LetterTokenizer and LowerCaseFilter, there is a performance advantage to doing the two tasks at once, hence /// this (redundant) implementation. /// /// Note: this does a decent job for most European languages, but does a terrible job for some Asian languages, /// where words are not separated by spaces. class LPPAPI LowerCaseTokenizer : public LetterTokenizer { public: /// Construct a new LowerCaseTokenizer. LowerCaseTokenizer(const ReaderPtr& input); /// Construct a new LowerCaseTokenizer using a given {@link AttributeSource}. LowerCaseTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); /// Construct a new LowerCaseTokenizer using a given {@link AttributeFactory}. LowerCaseTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); virtual ~LowerCaseTokenizer(); LUCENE_CLASS(LowerCaseTokenizer); public: /// Converts char to lower case CharFolder::toLower. virtual wchar_t normalize(wchar_t c); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Lucene.h000066400000000000000000000224631456444476200217470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENE_H #define LUCENE_H #include "Config.h" #include #include #include #include #include #include #include #include #ifdef LPP_USE_BOOST_INTEGER #include #endif #include #include #include #include #ifdef LPP_USE_BOOST_INTEGER using boost::int8_t; using boost::uint8_t; using boost::int16_t; using boost::uint16_t; using boost::int32_t; using boost::uint32_t; using boost::int64_t; using boost::uint64_t; #endif #define SIZEOF_ARRAY(arr) (sizeof(arr) / sizeof((arr)[0])) #include "LuceneTypes.h" #include "LuceneAllocator.h" namespace boost { struct blank; class thread; class any; template < typename Signature > class function; namespace interprocess { class file_lock; } namespace posix_time { class ptime; } } namespace Lucene { typedef std::basic_string< char, std::char_traits > SingleString; typedef std::basic_ostringstream< char, std::char_traits > SingleStringStream; typedef std::basic_string< wchar_t, std::char_traits > String; typedef std::basic_ostringstream< wchar_t, std::char_traits > StringStream; const std::basic_string< wchar_t, std::char_traits > EmptyString; typedef boost::shared_ptr filelockPtr; typedef boost::shared_ptr threadPtr; typedef boost::shared_ptr ofstreamPtr; typedef boost::shared_ptr ifstreamPtr; typedef boost::shared_ptr localePtr; } #include "LuceneFactory.h" #include "LuceneException.h" #include "Array.h" #include "Collection.h" #include "Map.h" #include "Set.h" #include "HashMap.h" #include "HashSet.h" #include "Constants.h" namespace Lucene { typedef Array ByteArray; typedef Array IntArray; typedef Array LongArray; typedef Array CharArray; typedef Array DoubleArray; template struct luceneEquals { inline bool operator()(const TYPE& first, const TYPE& second) const { return first ? first->equals(second) : (!first && !second); } }; template struct luceneEqualTo { luceneEqualTo(const TYPE& type) : equalType(type) {} inline bool operator()(const TYPE& other) const { return equalType->equals(other); } const TYPE& equalType; }; template struct luceneWeakEquals { inline bool operator()(const TYPE& first, const TYPE& second) const { if (first.expired() || second.expired()) { return (first.expired() && second.expired()); } return first.lock()->equals(second.lock()); } }; template struct luceneHash { std::size_t operator()(const TYPE& type) const { return type ? type->hashCode() : 0; } }; template struct luceneWeakHash { std::size_t operator()(const TYPE& type) const { return type.expired() ? 0 : type.lock()->hashCode(); } }; template struct luceneCompare { inline bool operator()(const TYPE& first, const TYPE& second) const { if (!second) { return false; } if (!first) { return true; } return (first->compareTo(second) < 0); } }; typedef boost::blank VariantNull; typedef boost::variant FieldsData; typedef boost::variant ComparableValue; typedef boost::variant NumericValue; typedef boost::variant StringValue; typedef boost::variant, Collection, Collection, VariantNull> CollectionValue; typedef HashSet< SegmentInfoPtr, luceneHash, luceneEquals > SetSegmentInfo; typedef HashSet< MergeThreadPtr, luceneHash, luceneEquals > SetMergeThread; typedef HashSet< OneMergePtr, luceneHash, luceneEquals > SetOneMerge; typedef HashSet< QueryPtr, luceneHash, luceneEquals > SetQuery; typedef HashSet< TermPtr, luceneHash, luceneEquals > SetTerm; typedef HashSet< BooleanClausePtr, luceneHash, luceneEquals > SetBooleanClause; typedef HashSet< ReaderFieldPtr, luceneHash, luceneEquals > SetReaderField; typedef HashSet SetByteArray; typedef HashMap< String, String > MapStringString; typedef HashMap< wchar_t, NormalizeCharMapPtr > MapCharNormalizeCharMap; typedef HashMap< String, AnalyzerPtr > MapStringAnalyzer; typedef HashMap< String, ByteArray > MapStringByteArray; typedef HashMap< String, int32_t > MapStringInt; typedef HashMap< String, FieldInfoPtr > MapStringFieldInfo; typedef HashMap< String, Collection > MapStringCollectionTermVectorEntry; typedef HashMap< String, RefCountPtr > MapStringRefCount; typedef HashMap< int32_t, TermVectorsPositionInfoPtr > MapIntTermVectorsPositionInfo; typedef HashMap< String, MapIntTermVectorsPositionInfo > MapStringMapIntTermVectorsPositionInfo; typedef HashMap< String, NormPtr > MapStringNorm; typedef HashMap< String, TermVectorEntryPtr > MapStringTermVectorEntry; typedef HashMap< String, RAMFilePtr > MapStringRAMFile; typedef HashMap< int32_t, ByteArray > MapIntByteArray; typedef HashMap< int32_t, FilterItemPtr > MapIntFilterItem; typedef HashMap< int32_t, double > MapIntDouble; typedef HashMap< int64_t, int32_t > MapLongInt; typedef HashMap< String, double > MapStringDouble; typedef HashMap< int32_t, CachePtr > MapStringCache; typedef HashMap< String, LockPtr > MapStringLock; typedef HashMap< SegmentInfoPtr, SegmentReaderPtr, luceneHash, luceneEquals > MapSegmentInfoSegmentReader; typedef HashMap< SegmentInfoPtr, int32_t, luceneHash, luceneEquals > MapSegmentInfoInt; typedef HashMap< DocFieldConsumerPerThreadPtr, Collection, luceneHash, luceneEquals > MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField; typedef HashMap< InvertedDocConsumerPerThreadPtr, Collection, luceneHash, luceneEquals > MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField; typedef HashMap< InvertedDocEndConsumerPerThreadPtr, Collection, luceneHash, luceneEquals > MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField; typedef HashMap< TermsHashConsumerPerThreadPtr, Collection, luceneHash, luceneEquals > MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField; typedef HashMap< FieldInfoPtr, Collection, luceneHash, luceneEquals > MapFieldInfoCollectionNormsWriterPerField; typedef HashMap< IndexReaderPtr, HashSet, luceneHash, luceneEquals > MapIndexReaderSetString; typedef HashMap< TermPtr, int32_t, luceneHash, luceneEquals > MapTermInt; typedef HashMap< QueryPtr, int32_t, luceneHash, luceneEquals > MapQueryInt; typedef HashMap< EntryPtr, boost::any, luceneHash, luceneEquals > MapEntryAny; typedef HashMap< PhrasePositionsPtr, LuceneObjectPtr, luceneHash, luceneEquals > MapPhrasePositionsLuceneObject; typedef HashMap< ReaderFieldPtr, SetReaderField, luceneHash, luceneEquals > MapReaderFieldSetReaderField; typedef WeakHashMap< LuceneObjectWeakPtr, LuceneObjectPtr, luceneWeakHash, luceneWeakEquals > WeakMapObjectObject; typedef WeakHashMap< LuceneObjectWeakPtr, MapEntryAny, luceneWeakHash, luceneWeakEquals > WeakMapLuceneObjectMapEntryAny; typedef Map< String, AttributePtr > MapStringAttribute; typedef Map< int64_t, DocumentsWriterThreadStatePtr > MapThreadDocumentsWriterThreadState; typedef Map< String, IndexReaderPtr > MapStringIndexReader; typedef Map< TermPtr, NumPtr, luceneCompare > MapTermNum; typedef boost::function TermVectorEntryComparator; template < class KEY, class VALUE, class HASH = boost::hash, class EQUAL = std::equal_to > class SimpleLRUCache; typedef SimpleLRUCache< TermPtr, TermInfoPtr, luceneHash, luceneEquals > TermInfoCache; typedef boost::shared_ptr TermInfoCachePtr; } #include "Synchronize.h" #include "CycleCheck.h" #if defined(LPP_BUILDING_LIB) || defined(LPP_EXPOSE_INTERNAL) #define INTERNAL public #else #define INTERNAL protected #endif #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LuceneAllocator.h000066400000000000000000000012401456444476200235760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ALLOCATOR_H #define ALLOCATOR_H #include "Config.h" namespace Lucene { /// Allocate block of memory. LPPAPI void* AllocMemory(size_t size); /// Reallocate a given block of memory. LPPAPI void* ReallocMemory(void* memory, size_t size); /// Release a given block of memory. LPPAPI void FreeMemory(void* memory); } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LuceneException.h000066400000000000000000000105261456444476200236230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENEEXCEPTION_H #define LUCENEEXCEPTION_H #include "Lucene.h" namespace Lucene { /// Lucene exception container. class LPPAPI LuceneException : public std::exception { public: enum ExceptionType { Null, AlreadyClosed, Compression, CorruptIndex, FieldReader, FileNotFound, IllegalArgument, IllegalState, IndexOutOfBounds, IO, LockObtainFailed, LockReleaseFailed, Lookahead, MergeAborted, Merge, NoSuchDirectory, NullPointer, NumberFormat, OutOfMemory, Parse, QueryParser, Runtime, StaleReader, StopFillCache, Temporary, TimeExceeded, TooManyClauses, UnsupportedOperation }; LuceneException(const String& error = EmptyString, LuceneException::ExceptionType type = Null) throw(); ~LuceneException() throw(); protected: ExceptionType type; String error; std::string _what; public: ExceptionType getType() const; String getError() const; bool isNull() const; void throwException(); virtual const char* what() const throw(); }; template class LPPAPI ExceptionTemplate : public ParentException { public: ExceptionTemplate(const String& error = EmptyString, LuceneException::ExceptionType type = Type) : ParentException(error, type) { } }; typedef ExceptionTemplate RuntimeException; typedef ExceptionTemplate OutOfMemoryError; typedef ExceptionTemplate TemporaryException; typedef ExceptionTemplate IllegalStateException; typedef ExceptionTemplate IllegalArgumentException; typedef ExceptionTemplate IndexOutOfBoundsException; typedef ExceptionTemplate NullPointerException; typedef ExceptionTemplate FieldReaderException; typedef ExceptionTemplate MergeException; typedef ExceptionTemplate StopFillCacheException; typedef ExceptionTemplate TimeExceededException; typedef ExceptionTemplate TooManyClausesException; typedef ExceptionTemplate UnsupportedOperationException; typedef ExceptionTemplate NumberFormatException; typedef ExceptionTemplate AlreadyClosedException; typedef ExceptionTemplate IOException; typedef ExceptionTemplate CorruptIndexException; typedef ExceptionTemplate FileNotFoundException; typedef ExceptionTemplate LockObtainFailedException; typedef ExceptionTemplate LockReleaseFailedException; typedef ExceptionTemplate MergeAbortedException; typedef ExceptionTemplate StaleReaderException; typedef ExceptionTemplate NoSuchDirectoryException; typedef ExceptionTemplate LookaheadSuccess; typedef ExceptionTemplate ParseException; typedef ExceptionTemplate QueryParserError; typedef ExceptionTemplate CompressionException; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LuceneFactory.h000066400000000000000000000146551456444476200233030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENEFACTORY_H #define LUCENEFACTORY_H #include #include namespace Lucene { template boost::shared_ptr newInstance() { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T); #else return boost::make_shared(); #endif } template boost::shared_ptr newInstance(A1 const& a1) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1)); #else return boost::make_shared(a1); #endif } template boost::shared_ptr newInstance(A1 const& a1, A2 const& a2) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1, a2)); #else return boost::make_shared(a1, a2); #endif } template boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1, a2, a3)); #else return boost::make_shared(a1, a2, a3); #endif } template boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1, a2, a3, a4)); #else return boost::make_shared(a1, a2, a3, a4); #endif } template boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1, a2, a3, a4, a5)); #else return boost::make_shared(a1, a2, a3, a4, a5); #endif } template boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1, a2, a3, a4, a5, a6)); #else return boost::make_shared(a1, a2, a3, a4, a5, a6); #endif } template boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1, a2, a3, a4, a5, a6, a7)); #else return boost::make_shared(a1, a2, a3, a4, a5, a6, a7); #endif } template boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7, A8 const& a8) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1, a2, a3, a4, a5, a6, a7, a8)); #else return boost::make_shared(a1, a2, a3, a4, a5, a6, a7, a8); #endif } template boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7, A8 const& a8, A9 const& a9) { #if BOOST_VERSION <= 103800 return boost::shared_ptr(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9)); #else return boost::make_shared(a1, a2, a3, a4, a5, a6, a7, a8, a9); #endif } template boost::shared_ptr newLucene() { #if BOOST_VERSION <= 103800 boost::shared_ptr instance = boost::shared_ptr(new T); #else boost::shared_ptr instance = boost::make_shared(); #endif instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1) { boost::shared_ptr instance(newInstance(a1)); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1, A2 const& a2) { boost::shared_ptr instance(newInstance(a1, a2)); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3) { boost::shared_ptr instance(newInstance(a1, a2, a3)); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4) { boost::shared_ptr instance(newInstance(a1, a2, a3, a4)); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5) { boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5)); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6) { boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5, a6)); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7) { boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5, a6, a7)); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7, A8 const& a8) { boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5, a6, a7, a8)); instance->initialize(); return instance; } template boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7, A8 const& a8, A9 const& a9) { boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5, a6, a7, a8, a9)); instance->initialize(); return instance; } } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LuceneHeaders.h000066400000000000000000000061361456444476200232420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENEHEADERS_H #define LUCENEHEADERS_H #include "Lucene.h" // Include most common files: analysis #include "StandardAnalyzer.h" #include "StandardFilter.h" #include "StandardTokenizer.h" #include "KeywordAnalyzer.h" #include "KeywordTokenizer.h" #include "LowerCaseFilter.h" #include "LowerCaseTokenizer.h" #include "PerFieldAnalyzerWrapper.h" #include "PorterStemFilter.h" #include "SimpleAnalyzer.h" #include "StopAnalyzer.h" #include "StopFilter.h" #include "Token.h" #include "TokenFilter.h" #include "WhitespaceAnalyzer.h" #include "WhitespaceTokenizer.h" // Include most common files: document #include "DateField.h" #include "DateTools.h" #include "Document.h" #include "Field.h" #include "NumberTools.h" #include "NumericField.h" // Include most common files: index #include "IndexCommit.h" #include "IndexDeletionPolicy.h" #include "IndexReader.h" #include "IndexWriter.h" #include "KeepOnlyLastCommitDeletionPolicy.h" #include "LogByteSizeMergePolicy.h" #include "LogDocMergePolicy.h" #include "LogMergePolicy.h" #include "MergeScheduler.h" #include "MultiReader.h" #include "ParallelReader.h" #include "Term.h" #include "TermDocs.h" #include "TermEnum.h" // Include most common files: queryparser #include "MultiFieldQueryParser.h" #include "QueryParseError.h" #include "QueryParser.h" // Include most common files: search #include "BooleanClause.h" #include "BooleanQuery.h" #include "DocIdSet.h" #include "DocIdSetIterator.h" #include "Explanation.h" #include "IndexSearcher.h" #include "MatchAllDocsQuery.h" #include "MultiPhraseQuery.h" #include "MultiSearcher.h" #include "MultiTermQuery.h" #include "NumericRangeFilter.h" #include "NumericRangeQuery.h" #include "ParallelMultiSearcher.h" #include "PhraseQuery.h" #include "PrefixFilter.h" #include "PrefixQuery.h" #include "ScoreDoc.h" #include "Scorer.h" #include "Searcher.h" #include "Sort.h" #include "TermQuery.h" #include "TermRangeFilter.h" #include "TermRangeQuery.h" #include "TopDocs.h" #include "TopDocsCollector.h" #include "TopFieldCollector.h" #include "TopScoreDocCollector.h" #include "Weight.h" #include "WildcardQuery.h" #include "SpanFirstQuery.h" #include "SpanNearQuery.h" #include "SpanNotQuery.h" #include "SpanOrQuery.h" #include "SpanQuery.h" // Include most common files: store #include "FSDirectory.h" #include "MMapDirectory.h" #include "RAMDirectory.h" #include "RAMFile.h" #include "RAMInputStream.h" #include "RAMOutputStream.h" #include "SimpleFSDirectory.h" // Include most common files: util #include "MiscUtils.h" #include "StringUtils.h" #include "BufferedReader.h" #include "DocIdBitSet.h" #include "FileReader.h" #include "InfoStream.h" #include "LuceneThread.h" #include "OpenBitSet.h" #include "OpenBitSetDISI.h" #include "OpenBitSetIterator.h" #include "StringReader.h" #include "ThreadPool.h" #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LuceneObject.h000066400000000000000000000037071456444476200230760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENEOBJECT_H #define LUCENEOBJECT_H #include #include "LuceneSync.h" #ifdef LPP_USE_CYCLIC_CHECK #define LUCENE_INTERFACE(Name) \ static String _getClassName() { return L###Name; } \ virtual String getClassName() { return L###Name; } \ CycleCheckT cycleCheck; #else #define LUCENE_INTERFACE(Name) \ static String _getClassName() { return L###Name; } \ virtual String getClassName() { return L###Name; } #endif #define LUCENE_CLASS(Name) \ LUCENE_INTERFACE(Name); \ boost::shared_ptr shared_from_this() { return boost::static_pointer_cast(LuceneObject::shared_from_this()); } \ namespace Lucene { /// Base class for all Lucene classes class LPPAPI LuceneObject : public LuceneSync, public boost::enable_shared_from_this { public: virtual ~LuceneObject(); protected: LuceneObject(); public: /// Called directly after instantiation to create objects that depend on this object being /// fully constructed. virtual void initialize(); /// Return clone of this object /// @param other clone reference - null when called initially, then set in top virtual override. virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); /// Return hash code for this object. virtual int32_t hashCode(); /// Return whether two objects are equal virtual bool equals(const LuceneObjectPtr& other); /// Compare two objects virtual int32_t compareTo(const LuceneObjectPtr& other); /// Returns a string representation of the object virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LuceneSignal.h000066400000000000000000000021011456444476200230700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENESIGNAL_H #define LUCENESIGNAL_H #include #include "Lucene.h" namespace Lucene { /// Utility class to support signaling notifications. class LPPAPI LuceneSignal { public: LuceneSignal(const SynchronizePtr& objectLock = SynchronizePtr()); virtual ~LuceneSignal(); protected: boost::mutex waitMutex; boost::condition signalCondition; SynchronizePtr objectLock; public: /// create a new LuceneSignal instance atomically. static void createSignal(LuceneSignalPtr& signal, const SynchronizePtr& objectLock); /// Wait for signal using an optional timeout. void wait(int32_t timeout = 0); /// Notify all threads waiting for signal. void notifyAll(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LuceneSync.h000066400000000000000000000023141456444476200225750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENESYNC_H #define LUCENESYNC_H #include "Lucene.h" namespace Lucene { /// Base class for all Lucene synchronised classes class LPPAPI LuceneSync { public: virtual ~LuceneSync(); protected: SynchronizePtr objectLock; LuceneSignalPtr objectSignal; public: /// Return this object synchronize lock. virtual SynchronizePtr getSync(); /// Return this object signal. virtual LuceneSignalPtr getSignal(); /// Lock this object using an optional timeout. virtual void lock(int32_t timeout = 0); /// Unlock this object. virtual void unlock(); /// Returns true if this object is currently locked by current thread. virtual bool holdsLock(); /// Wait for signal using an optional timeout. virtual void wait(int32_t timeout = 0); /// Notify all threads waiting for signal. virtual void notifyAll(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LuceneThread.h000066400000000000000000000046321456444476200230750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENETHREAD_H #define LUCENETHREAD_H #include "LuceneObject.h" namespace Lucene { /// Lucene thread container. /// /// It seems there are major issues with using boost::thread::id under Windows. /// After many hours of debugging and trying various strategies, I was unable to fix an /// occasional crash whereby boost::thread::thread_data was being deleted prematurely. /// /// This problem is most visible when running the AtomicUpdateTest test suite. /// /// Therefore, I now uniquely identify threads by their native id. class LPPAPI LuceneThread : public LuceneObject { public: LuceneThread(); virtual ~LuceneThread(); LUCENE_CLASS(LuceneThread); public: static const int32_t MAX_THREAD_PRIORITY; static const int32_t NORM_THREAD_PRIORITY; static const int32_t MIN_THREAD_PRIORITY; protected: threadPtr thread; /// Flag to indicate running thread. /// @see #isAlive bool running; public: /// start thread see {@link #run}. virtual void start(); /// return whether thread is current running. virtual bool isAlive(); /// set running thread priority. virtual void setPriority(int32_t priority); /// return running thread priority. virtual int32_t getPriority(); /// wait for thread to finish using an optional timeout. virtual bool join(int32_t timeout = 0); /// causes the currently executing thread object to temporarily pause and allow other threads to execute. virtual void yield(); /// override to provide the body of the thread. virtual void run() = 0; /// Return representation of current execution thread. static int64_t currentId(); /// Suspends current execution thread for a given time. static void threadSleep(int32_t time); /// Yield current execution thread. static void threadYield(); protected: /// set thread running state. void setRunning(bool running); /// return thread running state. bool isRunning(); /// function that controls the lifetime of the running thread. static void runThread(LuceneThread* thread); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/LuceneTypes.h000066400000000000000000000474621456444476200230020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENETYPES_H #define LUCENETYPES_H #include "Lucene.h" #define DECLARE_SHARED_PTR(Type) \ class Type; \ typedef boost::shared_ptr Type##Ptr; \ typedef boost::weak_ptr Type##WeakPtr; namespace Lucene { // analysis DECLARE_SHARED_PTR(Analyzer) DECLARE_SHARED_PTR(ASCIIFoldingFilter) DECLARE_SHARED_PTR(BaseCharFilter) DECLARE_SHARED_PTR(CachingTokenFilter) DECLARE_SHARED_PTR(CharArraySet) DECLARE_SHARED_PTR(CharFilter) DECLARE_SHARED_PTR(CharReader) DECLARE_SHARED_PTR(CharStream) DECLARE_SHARED_PTR(CharTokenizer) DECLARE_SHARED_PTR(FlagsAttribute) DECLARE_SHARED_PTR(ISOLatin1AccentFilter) DECLARE_SHARED_PTR(KeywordAnalyzer) DECLARE_SHARED_PTR(KeywordTokenizer) DECLARE_SHARED_PTR(LengthFilter) DECLARE_SHARED_PTR(LetterTokenizer) DECLARE_SHARED_PTR(LowerCaseFilter) DECLARE_SHARED_PTR(LowerCaseTokenizer) DECLARE_SHARED_PTR(MappingCharFilter) DECLARE_SHARED_PTR(NormalizeCharMap) DECLARE_SHARED_PTR(NumericTokenStream) DECLARE_SHARED_PTR(OffsetAttribute) DECLARE_SHARED_PTR(PayloadAttribute) DECLARE_SHARED_PTR(PerFieldAnalyzerWrapper) DECLARE_SHARED_PTR(PorterStemFilter) DECLARE_SHARED_PTR(PorterStemmer) DECLARE_SHARED_PTR(PositionIncrementAttribute) DECLARE_SHARED_PTR(SimpleAnalyzer) DECLARE_SHARED_PTR(SinkFilter) DECLARE_SHARED_PTR(SinkTokenStream) DECLARE_SHARED_PTR(StandardAnalyzer) DECLARE_SHARED_PTR(StandardAnalyzerSavedStreams) DECLARE_SHARED_PTR(StandardFilter) DECLARE_SHARED_PTR(StandardTokenizer) DECLARE_SHARED_PTR(StandardTokenizerImpl) DECLARE_SHARED_PTR(StopAnalyzer) DECLARE_SHARED_PTR(StopAnalyzerSavedStreams) DECLARE_SHARED_PTR(StopFilter) DECLARE_SHARED_PTR(TeeSinkTokenFilter) DECLARE_SHARED_PTR(TermAttribute) DECLARE_SHARED_PTR(Token) DECLARE_SHARED_PTR(TokenAttributeFactory) DECLARE_SHARED_PTR(TokenFilter) DECLARE_SHARED_PTR(Tokenizer) DECLARE_SHARED_PTR(TokenStream) DECLARE_SHARED_PTR(TypeAttribute) DECLARE_SHARED_PTR(WhitespaceAnalyzer) DECLARE_SHARED_PTR(WhitespaceTokenizer) DECLARE_SHARED_PTR(WordlistLoader) // document DECLARE_SHARED_PTR(AbstractField) DECLARE_SHARED_PTR(CompressionTools) DECLARE_SHARED_PTR(DateField) DECLARE_SHARED_PTR(DateTools) DECLARE_SHARED_PTR(Document) DECLARE_SHARED_PTR(Field) DECLARE_SHARED_PTR(Fieldable) DECLARE_SHARED_PTR(FieldSelector) DECLARE_SHARED_PTR(LoadFirstFieldSelector) DECLARE_SHARED_PTR(MapFieldSelector) DECLARE_SHARED_PTR(NumberTools) DECLARE_SHARED_PTR(NumericField) DECLARE_SHARED_PTR(SetBasedFieldSelector) // index DECLARE_SHARED_PTR(AbstractAllTermDocs) DECLARE_SHARED_PTR(AllTermDocs) DECLARE_SHARED_PTR(BufferedDeletes) DECLARE_SHARED_PTR(ByteBlockAllocator) DECLARE_SHARED_PTR(ByteBlockPool) DECLARE_SHARED_PTR(ByteBlockPoolAllocatorBase) DECLARE_SHARED_PTR(ByteSliceReader) DECLARE_SHARED_PTR(ByteSliceWriter) DECLARE_SHARED_PTR(CharBlockPool) DECLARE_SHARED_PTR(CheckAbort) DECLARE_SHARED_PTR(CheckIndex) DECLARE_SHARED_PTR(CommitPoint) DECLARE_SHARED_PTR(CompoundFileReader) DECLARE_SHARED_PTR(CompoundFileWriter) DECLARE_SHARED_PTR(ConcurrentMergeScheduler) DECLARE_SHARED_PTR(CoreReaders) DECLARE_SHARED_PTR(CSIndexInput) DECLARE_SHARED_PTR(DefaultIndexingChain) DECLARE_SHARED_PTR(DefaultSkipListReader) DECLARE_SHARED_PTR(DefaultSkipListWriter) DECLARE_SHARED_PTR(DirectoryReader) DECLARE_SHARED_PTR(DocConsumer) DECLARE_SHARED_PTR(DocConsumerPerThread) DECLARE_SHARED_PTR(DocFieldConsumer) DECLARE_SHARED_PTR(DocFieldConsumerPerField) DECLARE_SHARED_PTR(DocFieldConsumerPerThread) DECLARE_SHARED_PTR(DocFieldConsumers) DECLARE_SHARED_PTR(DocFieldConsumersPerDoc) DECLARE_SHARED_PTR(DocFieldConsumersPerField) DECLARE_SHARED_PTR(DocFieldConsumersPerThread) DECLARE_SHARED_PTR(DocFieldProcessor) DECLARE_SHARED_PTR(DocFieldProcessorPerField) DECLARE_SHARED_PTR(DocFieldProcessorPerThread) DECLARE_SHARED_PTR(DocFieldProcessorPerThreadPerDoc) DECLARE_SHARED_PTR(DocInverter) DECLARE_SHARED_PTR(DocInverterPerField) DECLARE_SHARED_PTR(DocInverterPerThread) DECLARE_SHARED_PTR(DocState) DECLARE_SHARED_PTR(DocumentsWriter) DECLARE_SHARED_PTR(DocumentsWriterThreadState) DECLARE_SHARED_PTR(DocWriter) DECLARE_SHARED_PTR(FieldInfo) DECLARE_SHARED_PTR(FieldInfos) DECLARE_SHARED_PTR(FieldInvertState) DECLARE_SHARED_PTR(FieldNormStatus) DECLARE_SHARED_PTR(FieldSortedTermVectorMapper) DECLARE_SHARED_PTR(FieldsReader) DECLARE_SHARED_PTR(FieldsReaderLocal) DECLARE_SHARED_PTR(FieldsWriter) DECLARE_SHARED_PTR(FilterIndexReader) DECLARE_SHARED_PTR(FindSegmentsModified) DECLARE_SHARED_PTR(FindSegmentsOpen) DECLARE_SHARED_PTR(FindSegmentsRead) DECLARE_SHARED_PTR(FindSegmentsReopen) DECLARE_SHARED_PTR(FormatPostingsDocsConsumer) DECLARE_SHARED_PTR(FormatPostingsDocsWriter) DECLARE_SHARED_PTR(FormatPostingsFieldsConsumer) DECLARE_SHARED_PTR(FormatPostingsFieldsWriter) DECLARE_SHARED_PTR(FormatPostingsPositionsConsumer) DECLARE_SHARED_PTR(FormatPostingsPositionsWriter) DECLARE_SHARED_PTR(FormatPostingsTermsConsumer) DECLARE_SHARED_PTR(FormatPostingsTermsWriter) DECLARE_SHARED_PTR(FreqProxFieldMergeState) DECLARE_SHARED_PTR(FreqProxTermsWriter) DECLARE_SHARED_PTR(FreqProxTermsWriterPerField) DECLARE_SHARED_PTR(FreqProxTermsWriterPerThread) DECLARE_SHARED_PTR(FreqProxTermsWriterPostingList) DECLARE_SHARED_PTR(IndexCommit) DECLARE_SHARED_PTR(IndexDeletionPolicy) DECLARE_SHARED_PTR(IndexFileDeleter) DECLARE_SHARED_PTR(IndexFileNameFilter) DECLARE_SHARED_PTR(IndexingChain) DECLARE_SHARED_PTR(IndexReader) DECLARE_SHARED_PTR(IndexReaderWarmer) DECLARE_SHARED_PTR(IndexStatus) DECLARE_SHARED_PTR(IndexWriter) DECLARE_SHARED_PTR(IntBlockPool) DECLARE_SHARED_PTR(IntQueue) DECLARE_SHARED_PTR(InvertedDocConsumer) DECLARE_SHARED_PTR(InvertedDocConsumerPerField) DECLARE_SHARED_PTR(InvertedDocConsumerPerThread) DECLARE_SHARED_PTR(InvertedDocEndConsumer) DECLARE_SHARED_PTR(InvertedDocEndConsumerPerField) DECLARE_SHARED_PTR(InvertedDocEndConsumerPerThread) DECLARE_SHARED_PTR(KeepOnlyLastCommitDeletionPolicy) DECLARE_SHARED_PTR(LogByteSizeMergePolicy) DECLARE_SHARED_PTR(LogDocMergePolicy) DECLARE_SHARED_PTR(LogMergePolicy) DECLARE_SHARED_PTR(MergeDocIDRemapper) DECLARE_SHARED_PTR(MergePolicy) DECLARE_SHARED_PTR(MergeScheduler) DECLARE_SHARED_PTR(MergeSpecification) DECLARE_SHARED_PTR(MergeThread) DECLARE_SHARED_PTR(MultiLevelSkipListReader) DECLARE_SHARED_PTR(MultiLevelSkipListWriter) DECLARE_SHARED_PTR(MultipleTermPositions) DECLARE_SHARED_PTR(MultiReader) DECLARE_SHARED_PTR(MultiTermDocs) DECLARE_SHARED_PTR(MultiTermEnum) DECLARE_SHARED_PTR(MultiTermPositions) DECLARE_SHARED_PTR(MyCommitPoint) DECLARE_SHARED_PTR(MySegmentTermDocs) DECLARE_SHARED_PTR(Norm) DECLARE_SHARED_PTR(NormsWriter) DECLARE_SHARED_PTR(NormsWriterPerField) DECLARE_SHARED_PTR(NormsWriterPerThread) DECLARE_SHARED_PTR(Num) DECLARE_SHARED_PTR(OneMerge) DECLARE_SHARED_PTR(ParallelArrayTermVectorMapper) DECLARE_SHARED_PTR(ParallelReader) DECLARE_SHARED_PTR(ParallelTermEnum) DECLARE_SHARED_PTR(ParallelTermDocs) DECLARE_SHARED_PTR(ParallelTermPositions) DECLARE_SHARED_PTR(Payload) DECLARE_SHARED_PTR(PerDocBuffer) DECLARE_SHARED_PTR(PositionBasedTermVectorMapper) DECLARE_SHARED_PTR(RawPostingList) DECLARE_SHARED_PTR(ReaderCommit) DECLARE_SHARED_PTR(ReaderPool) DECLARE_SHARED_PTR(ReadOnlyDirectoryReader) DECLARE_SHARED_PTR(ReadOnlySegmentReader) DECLARE_SHARED_PTR(RefCount) DECLARE_SHARED_PTR(ReusableStringReader) DECLARE_SHARED_PTR(SegmentInfo) DECLARE_SHARED_PTR(SegmentInfoCollection) DECLARE_SHARED_PTR(SegmentInfos) DECLARE_SHARED_PTR(SegmentInfoStatus) DECLARE_SHARED_PTR(SegmentMergeInfo) DECLARE_SHARED_PTR(SegmentMergeQueue) DECLARE_SHARED_PTR(SegmentMerger) DECLARE_SHARED_PTR(SegmentReader) DECLARE_SHARED_PTR(SegmentReaderRef) DECLARE_SHARED_PTR(SegmentTermDocs) DECLARE_SHARED_PTR(SegmentTermEnum) DECLARE_SHARED_PTR(SegmentTermPositions) DECLARE_SHARED_PTR(SegmentTermPositionVector) DECLARE_SHARED_PTR(SegmentTermVector) DECLARE_SHARED_PTR(SegmentWriteState) DECLARE_SHARED_PTR(SerialMergeScheduler) DECLARE_SHARED_PTR(SingleTokenAttributeSource) DECLARE_SHARED_PTR(SkipBuffer) DECLARE_SHARED_PTR(SkipDocWriter) DECLARE_SHARED_PTR(SnapshotDeletionPolicy) DECLARE_SHARED_PTR(SortedTermVectorMapper) DECLARE_SHARED_PTR(StoredFieldStatus) DECLARE_SHARED_PTR(StoredFieldsWriter) DECLARE_SHARED_PTR(StoredFieldsWriterPerDoc) DECLARE_SHARED_PTR(StoredFieldsWriterPerThread) DECLARE_SHARED_PTR(Term) DECLARE_SHARED_PTR(TermBuffer) DECLARE_SHARED_PTR(TermEnum) DECLARE_SHARED_PTR(TermDocs) DECLARE_SHARED_PTR(TermFreqVector) DECLARE_SHARED_PTR(TermIndexStatus) DECLARE_SHARED_PTR(TermInfo) DECLARE_SHARED_PTR(TermInfosReader) DECLARE_SHARED_PTR(TermInfosReaderThreadResources) DECLARE_SHARED_PTR(TermInfosWriter) DECLARE_SHARED_PTR(TermPositions) DECLARE_SHARED_PTR(TermPositionsQueue) DECLARE_SHARED_PTR(TermPositionVector) DECLARE_SHARED_PTR(TermsHash) DECLARE_SHARED_PTR(TermsHashConsumer) DECLARE_SHARED_PTR(TermsHashConsumerPerField) DECLARE_SHARED_PTR(TermsHashConsumerPerThread) DECLARE_SHARED_PTR(TermsHashPerField) DECLARE_SHARED_PTR(TermsHashPerThread) DECLARE_SHARED_PTR(TermVectorEntry) DECLARE_SHARED_PTR(TermVectorEntryFreqSortedComparator) DECLARE_SHARED_PTR(TermVectorMapper) DECLARE_SHARED_PTR(TermVectorOffsetInfo) DECLARE_SHARED_PTR(TermVectorsReader) DECLARE_SHARED_PTR(TermVectorStatus) DECLARE_SHARED_PTR(TermVectorsTermsWriter) DECLARE_SHARED_PTR(TermVectorsTermsWriterPerDoc) DECLARE_SHARED_PTR(TermVectorsTermsWriterPerField) DECLARE_SHARED_PTR(TermVectorsTermsWriterPerThread) DECLARE_SHARED_PTR(TermVectorsTermsWriterPostingList) DECLARE_SHARED_PTR(TermVectorsWriter) DECLARE_SHARED_PTR(TermVectorsPositionInfo) DECLARE_SHARED_PTR(WaitQueue) // query parser DECLARE_SHARED_PTR(FastCharStream) DECLARE_SHARED_PTR(MultiFieldQueryParser) DECLARE_SHARED_PTR(QueryParser) DECLARE_SHARED_PTR(QueryParserCharStream) DECLARE_SHARED_PTR(QueryParserConstants) DECLARE_SHARED_PTR(QueryParserToken) DECLARE_SHARED_PTR(QueryParserTokenManager) // search DECLARE_SHARED_PTR(AveragePayloadFunction) DECLARE_SHARED_PTR(BooleanClause) DECLARE_SHARED_PTR(BooleanQuery) DECLARE_SHARED_PTR(BooleanScorer) DECLARE_SHARED_PTR(BooleanScorerCollector) DECLARE_SHARED_PTR(BooleanScorer2) DECLARE_SHARED_PTR(BooleanWeight) DECLARE_SHARED_PTR(Bucket) DECLARE_SHARED_PTR(BucketScorer) DECLARE_SHARED_PTR(BucketTable) DECLARE_SHARED_PTR(ByteCache) DECLARE_SHARED_PTR(ByteFieldSource) DECLARE_SHARED_PTR(ByteParser) DECLARE_SHARED_PTR(Cache) DECLARE_SHARED_PTR(CachedDfSource) DECLARE_SHARED_PTR(CachingSpanFilter) DECLARE_SHARED_PTR(CachingWrapperFilter) DECLARE_SHARED_PTR(CellQueue) DECLARE_SHARED_PTR(Collector) DECLARE_SHARED_PTR(ComplexExplanation) DECLARE_SHARED_PTR(ConjunctionScorer) DECLARE_SHARED_PTR(ConstantScoreAutoRewrite) DECLARE_SHARED_PTR(ConstantScoreAutoRewriteDefault) DECLARE_SHARED_PTR(ConstantScoreBooleanQueryRewrite) DECLARE_SHARED_PTR(ConstantScoreFilterRewrite) DECLARE_SHARED_PTR(ConstantScoreQuery) DECLARE_SHARED_PTR(ConstantScorer) DECLARE_SHARED_PTR(ConstantWeight) DECLARE_SHARED_PTR(Coordinator) DECLARE_SHARED_PTR(CountingConjunctionSumScorer) DECLARE_SHARED_PTR(CountingDisjunctionSumScorer) DECLARE_SHARED_PTR(CreationPlaceholder) DECLARE_SHARED_PTR(CustomScoreProvider) DECLARE_SHARED_PTR(CustomScoreQuery) DECLARE_SHARED_PTR(CustomWeight) DECLARE_SHARED_PTR(CustomScorer) DECLARE_SHARED_PTR(DefaultByteParser) DECLARE_SHARED_PTR(DefaultCustomScoreProvider) DECLARE_SHARED_PTR(DefaultDoubleParser) DECLARE_SHARED_PTR(DefaultIntParser) DECLARE_SHARED_PTR(DefaultLongParser) DECLARE_SHARED_PTR(DefaultSimilarity) DECLARE_SHARED_PTR(DisjunctionMaxQuery) DECLARE_SHARED_PTR(DisjunctionMaxScorer) DECLARE_SHARED_PTR(DisjunctionMaxWeight) DECLARE_SHARED_PTR(DisjunctionSumScorer) DECLARE_SHARED_PTR(DocIdSet) DECLARE_SHARED_PTR(DocIdSetIterator) DECLARE_SHARED_PTR(DocValues) DECLARE_SHARED_PTR(DoubleCache) DECLARE_SHARED_PTR(DoubleFieldSource) DECLARE_SHARED_PTR(DoubleParser) DECLARE_SHARED_PTR(EmptyDocIdSet) DECLARE_SHARED_PTR(EmptyDocIdSetIterator) DECLARE_SHARED_PTR(Entry) DECLARE_SHARED_PTR(ExactPhraseScorer) DECLARE_SHARED_PTR(Explanation) DECLARE_SHARED_PTR(FieldCache) DECLARE_SHARED_PTR(FieldCacheDocIdSet) DECLARE_SHARED_PTR(FieldCacheEntry) DECLARE_SHARED_PTR(FieldCacheEntryImpl) DECLARE_SHARED_PTR(FieldCacheImpl) DECLARE_SHARED_PTR(FieldCacheRangeFilter) DECLARE_SHARED_PTR(FieldCacheRangeFilterByte) DECLARE_SHARED_PTR(FieldCacheRangeFilterDouble) DECLARE_SHARED_PTR(FieldCacheRangeFilterInt) DECLARE_SHARED_PTR(FieldCacheRangeFilterLong) DECLARE_SHARED_PTR(FieldCacheRangeFilterString) DECLARE_SHARED_PTR(FieldCacheSource) DECLARE_SHARED_PTR(FieldCacheTermsFilter) DECLARE_SHARED_PTR(FieldCacheTermsFilterDocIdSet) DECLARE_SHARED_PTR(FieldComparator) DECLARE_SHARED_PTR(FieldComparatorSource) DECLARE_SHARED_PTR(FieldDoc) DECLARE_SHARED_PTR(FieldDocIdSetIteratorIncrement) DECLARE_SHARED_PTR(FieldDocIdSetIteratorTermDocs) DECLARE_SHARED_PTR(FieldDocSortedHitQueue) DECLARE_SHARED_PTR(FieldMaskingSpanQuery) DECLARE_SHARED_PTR(FieldScoreQuery) DECLARE_SHARED_PTR(FieldValueHitQueue) DECLARE_SHARED_PTR(FieldValueHitQueueEntry) DECLARE_SHARED_PTR(Filter) DECLARE_SHARED_PTR(FilterCache) DECLARE_SHARED_PTR(FilterCleaner) DECLARE_SHARED_PTR(FilteredDocIdSet) DECLARE_SHARED_PTR(FilteredDocIdSetIterator) DECLARE_SHARED_PTR(FilteredQuery) DECLARE_SHARED_PTR(FilteredQueryWeight) DECLARE_SHARED_PTR(FilteredTermEnum) DECLARE_SHARED_PTR(FilterItem) DECLARE_SHARED_PTR(FilterManager) DECLARE_SHARED_PTR(FuzzyQuery) DECLARE_SHARED_PTR(FuzzyTermEnum) DECLARE_SHARED_PTR(HitQueue) DECLARE_SHARED_PTR(HitQueueBase) DECLARE_SHARED_PTR(IDFExplanation) DECLARE_SHARED_PTR(IndexSearcher) DECLARE_SHARED_PTR(IntCache) DECLARE_SHARED_PTR(IntFieldSource) DECLARE_SHARED_PTR(IntParser) DECLARE_SHARED_PTR(LongCache) DECLARE_SHARED_PTR(LongParser) DECLARE_SHARED_PTR(MatchAllDocsQuery) DECLARE_SHARED_PTR(MatchAllDocsWeight) DECLARE_SHARED_PTR(MatchAllScorer) DECLARE_SHARED_PTR(MaxPayloadFunction) DECLARE_SHARED_PTR(MinPayloadFunction) DECLARE_SHARED_PTR(MultiComparatorsFieldValueHitQueue) DECLARE_SHARED_PTR(MultiPhraseQuery) DECLARE_SHARED_PTR(MultiSearcher) DECLARE_SHARED_PTR(MultiSearcherCallableNoSort) DECLARE_SHARED_PTR(MultiSearcherCallableWithSort) DECLARE_SHARED_PTR(MultiTermQuery) DECLARE_SHARED_PTR(MultiTermQueryWrapperFilter) DECLARE_SHARED_PTR(NearSpansOrdered) DECLARE_SHARED_PTR(NearSpansUnordered) DECLARE_SHARED_PTR(NumericRangeFilter) DECLARE_SHARED_PTR(NumericRangeQuery) DECLARE_SHARED_PTR(NumericUtilsDoubleParser) DECLARE_SHARED_PTR(NumericUtilsIntParser) DECLARE_SHARED_PTR(NumericUtilsLongParser) DECLARE_SHARED_PTR(OneComparatorFieldValueHitQueue) DECLARE_SHARED_PTR(OrdFieldSource) DECLARE_SHARED_PTR(ParallelMultiSearcher) DECLARE_SHARED_PTR(Parser) DECLARE_SHARED_PTR(PayloadFunction) DECLARE_SHARED_PTR(PayloadNearQuery) DECLARE_SHARED_PTR(PayloadNearSpanScorer) DECLARE_SHARED_PTR(PayloadNearSpanWeight) DECLARE_SHARED_PTR(PayloadSpanUtil) DECLARE_SHARED_PTR(PayloadTermQuery) DECLARE_SHARED_PTR(PayloadTermSpanScorer) DECLARE_SHARED_PTR(PayloadTermWeight) DECLARE_SHARED_PTR(PhrasePositions) DECLARE_SHARED_PTR(PhraseQuery) DECLARE_SHARED_PTR(PhraseQueue) DECLARE_SHARED_PTR(PhraseScorer) DECLARE_SHARED_PTR(PositionInfo) DECLARE_SHARED_PTR(PositiveScoresOnlyCollector) DECLARE_SHARED_PTR(PrefixFilter) DECLARE_SHARED_PTR(PrefixQuery) DECLARE_SHARED_PTR(PrefixTermEnum) DECLARE_SHARED_PTR(PriorityQueueScoreDocs) DECLARE_SHARED_PTR(Query) DECLARE_SHARED_PTR(QueryTermVector) DECLARE_SHARED_PTR(QueryWrapperFilter) DECLARE_SHARED_PTR(ReqExclScorer) DECLARE_SHARED_PTR(ReqOptSumScorer) DECLARE_SHARED_PTR(RewriteMethod) DECLARE_SHARED_PTR(ReverseOrdFieldSource) DECLARE_SHARED_PTR(ScoreCachingWrappingScorer) DECLARE_SHARED_PTR(ScoreDoc) DECLARE_SHARED_PTR(Scorer) DECLARE_SHARED_PTR(ScoreTerm) DECLARE_SHARED_PTR(ScoreTermQueue) DECLARE_SHARED_PTR(ScoringBooleanQueryRewrite) DECLARE_SHARED_PTR(Searchable) DECLARE_SHARED_PTR(Searcher) DECLARE_SHARED_PTR(Similarity) DECLARE_SHARED_PTR(SimilarityDisableCoord) DECLARE_SHARED_PTR(SimilarityDelegator) DECLARE_SHARED_PTR(SimilarityIDFExplanation) DECLARE_SHARED_PTR(SingleMatchScorer) DECLARE_SHARED_PTR(SingleTermEnum) DECLARE_SHARED_PTR(SloppyPhraseScorer) DECLARE_SHARED_PTR(Sort) DECLARE_SHARED_PTR(SortField) DECLARE_SHARED_PTR(SpanFilter) DECLARE_SHARED_PTR(SpanFilterResult) DECLARE_SHARED_PTR(SpanFirstQuery) DECLARE_SHARED_PTR(SpanNearQuery) DECLARE_SHARED_PTR(SpanNotQuery) DECLARE_SHARED_PTR(SpanOrQuery) DECLARE_SHARED_PTR(SpanQuery) DECLARE_SHARED_PTR(SpanQueryFilter) DECLARE_SHARED_PTR(SpanQueue) DECLARE_SHARED_PTR(Spans) DECLARE_SHARED_PTR(SpansCell) DECLARE_SHARED_PTR(SpanScorer) DECLARE_SHARED_PTR(SpanTermQuery) DECLARE_SHARED_PTR(SpanWeight) DECLARE_SHARED_PTR(StartEnd) DECLARE_SHARED_PTR(StringCache) DECLARE_SHARED_PTR(StringIndex) DECLARE_SHARED_PTR(StringIndexCache) DECLARE_SHARED_PTR(SubScorer) DECLARE_SHARED_PTR(TermQuery) DECLARE_SHARED_PTR(TermRangeFilter) DECLARE_SHARED_PTR(TermRangeQuery) DECLARE_SHARED_PTR(TermRangeTermEnum) DECLARE_SHARED_PTR(TermScorer) DECLARE_SHARED_PTR(TermSpans) DECLARE_SHARED_PTR(TimeLimitingCollector) DECLARE_SHARED_PTR(TimerThread) DECLARE_SHARED_PTR(TopDocs) DECLARE_SHARED_PTR(TopDocsCollector) DECLARE_SHARED_PTR(TopFieldCollector) DECLARE_SHARED_PTR(TopFieldDocs) DECLARE_SHARED_PTR(TopScoreDocCollector) DECLARE_SHARED_PTR(ValueSource) DECLARE_SHARED_PTR(ValueSourceQuery) DECLARE_SHARED_PTR(ValueSourceScorer) DECLARE_SHARED_PTR(ValueSourceWeight) DECLARE_SHARED_PTR(Weight) DECLARE_SHARED_PTR(WildcardQuery) DECLARE_SHARED_PTR(WildcardTermEnum) // store DECLARE_SHARED_PTR(BufferedIndexInput) DECLARE_SHARED_PTR(BufferedIndexOutput) DECLARE_SHARED_PTR(ChecksumIndexInput) DECLARE_SHARED_PTR(ChecksumIndexOutput) DECLARE_SHARED_PTR(Directory) DECLARE_SHARED_PTR(FileSwitchDirectory) DECLARE_SHARED_PTR(FSDirectory) DECLARE_SHARED_PTR(FSLockFactory) DECLARE_SHARED_PTR(IndexInput) DECLARE_SHARED_PTR(IndexOutput) DECLARE_SHARED_PTR(InputFile) DECLARE_SHARED_PTR(Lock) DECLARE_SHARED_PTR(LockFactory) DECLARE_SHARED_PTR(MMapDirectory) DECLARE_SHARED_PTR(MMapIndexInput) DECLARE_SHARED_PTR(NativeFSLock) DECLARE_SHARED_PTR(NativeFSLockFactory) DECLARE_SHARED_PTR(NoLock) DECLARE_SHARED_PTR(NoLockFactory) DECLARE_SHARED_PTR(OutputFile) DECLARE_SHARED_PTR(RAMDirectory) DECLARE_SHARED_PTR(RAMFile) DECLARE_SHARED_PTR(RAMInputStream) DECLARE_SHARED_PTR(RAMOutputStream) DECLARE_SHARED_PTR(SimpleFSDirectory) DECLARE_SHARED_PTR(SimpleFSIndexInput) DECLARE_SHARED_PTR(SimpleFSIndexOutput) DECLARE_SHARED_PTR(SimpleFSLock) DECLARE_SHARED_PTR(SimpleFSLockFactory) DECLARE_SHARED_PTR(SingleInstanceLock) DECLARE_SHARED_PTR(SingleInstanceLockFactory) // util DECLARE_SHARED_PTR(Attribute) DECLARE_SHARED_PTR(AttributeFactory) DECLARE_SHARED_PTR(AttributeSource) DECLARE_SHARED_PTR(AttributeSourceState) DECLARE_SHARED_PTR(BitSet) DECLARE_SHARED_PTR(BitVector) DECLARE_SHARED_PTR(BufferedReader) DECLARE_SHARED_PTR(Collator) DECLARE_SHARED_PTR(DefaultAttributeFactory) DECLARE_SHARED_PTR(DocIdBitSet) DECLARE_SHARED_PTR(FieldCacheSanityChecker) DECLARE_SHARED_PTR(FileReader) DECLARE_SHARED_PTR(Future) DECLARE_SHARED_PTR(HeapedScorerDoc) DECLARE_SHARED_PTR(InfoStream) DECLARE_SHARED_PTR(InfoStreamFile) DECLARE_SHARED_PTR(InfoStreamOut) DECLARE_SHARED_PTR(InputStreamReader) DECLARE_SHARED_PTR(Insanity) DECLARE_SHARED_PTR(IntRangeBuilder) DECLARE_SHARED_PTR(LongRangeBuilder) DECLARE_SHARED_PTR(LuceneObject) DECLARE_SHARED_PTR(LuceneSignal) DECLARE_SHARED_PTR(LuceneThread) DECLARE_SHARED_PTR(NumericUtils) DECLARE_SHARED_PTR(OpenBitSet) DECLARE_SHARED_PTR(OpenBitSetDISI) DECLARE_SHARED_PTR(OpenBitSetIterator) DECLARE_SHARED_PTR(Random) DECLARE_SHARED_PTR(Reader) DECLARE_SHARED_PTR(ReaderField) DECLARE_SHARED_PTR(ScorerDocQueue) DECLARE_SHARED_PTR(SortedVIntList) DECLARE_SHARED_PTR(StringReader) DECLARE_SHARED_PTR(Synchronize) DECLARE_SHARED_PTR(ThreadPool) DECLARE_SHARED_PTR(UnicodeResult) DECLARE_SHARED_PTR(UTF8Decoder) DECLARE_SHARED_PTR(UTF8DecoderStream) DECLARE_SHARED_PTR(UTF8Encoder) DECLARE_SHARED_PTR(UTF8EncoderStream) DECLARE_SHARED_PTR(UTF8Result) DECLARE_SHARED_PTR(UTF16Decoder) } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MMapDirectory.h000066400000000000000000000034611456444476200232500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MMAPDIRECTORY_H #define MMAPDIRECTORY_H #include "FSDirectory.h" namespace Lucene { /// File-based {@link Directory} implementation that uses mmap for reading, and {@link SimpleFSIndexOutput} for writing. /// /// NOTE: memory mapping uses up a portion of the virtual memory address space in your process equal to the size of the /// file being mapped. Before using this class, be sure your have plenty of virtual address space. /// /// NOTE: Accessing this class either directly or indirectly from a thread while it's interrupted can close the /// underlying channel immediately if at the same time the thread is blocked on IO. The channel will remain closed and /// subsequent access to {@link MMapDirectory} will throw an exception. class LPPAPI MMapDirectory : public FSDirectory { public: /// Create a new MMapDirectory for the named location. /// @param path the path of the directory. /// @param lockFactory the lock factory to use, or null for the default ({@link NativeFSLockFactory}) MMapDirectory(const String& path, const LockFactoryPtr& lockFactory = LockFactoryPtr()); virtual ~MMapDirectory(); LUCENE_CLASS(MMapDirectory); public: using FSDirectory::openInput; /// Creates an IndexInput for the file with the given name. virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); /// Creates an IndexOutput for the file with the given name. virtual IndexOutputPtr createOutput(const String& name); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Map.h000066400000000000000000000060151456444476200212440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MAP_H #define MAP_H #include #include "LuceneSync.h" namespace Lucene { /// Utility template class to handle maps that can be safely copied and shared template < class KEY, class VALUE, class LESS = std::less > class Map : public LuceneSync { public: typedef Map this_type; typedef std::pair key_value; typedef std::map map_type; typedef typename map_type::iterator iterator; typedef typename map_type::const_iterator const_iterator; typedef KEY key_type; typedef VALUE value_type; virtual ~Map() { } protected: boost::shared_ptr mapContainer; public: static this_type newInstance() { this_type instance; instance.mapContainer = Lucene::newInstance(); return instance; } void reset() { mapContainer.reset(); } int32_t size() const { return (int32_t)mapContainer->size(); } bool empty() const { return mapContainer->empty(); } void clear() { mapContainer->clear(); } iterator begin() { return mapContainer->begin(); } iterator end() { return mapContainer->end(); } const_iterator begin() const { return mapContainer->begin(); } const_iterator end() const { return mapContainer->end(); } operator bool() const { return mapContainer.get() != NULL; } bool operator! () const { return !mapContainer; } map_type& operator= (const map_type& other) { mapContainer = other.mapContainer; return *this; } void put(const KEY& key, const VALUE& value) { (*mapContainer)[key] = value; } template void putAll(ITER first, ITER last) { for (iterator current = first; current != last; ++current) { (*mapContainer)[current->first] = current->second; } } template void remove(ITER pos) { mapContainer->erase(pos); } template ITER remove(ITER first, ITER last) { return mapContainer->erase(first, last); } bool remove(const KEY& key) { return (mapContainer->erase(key) > 0); } iterator find(const KEY& key) { return mapContainer->find(key); } VALUE get(const KEY& key) const { iterator findValue = mapContainer->find(key); return findValue == mapContainer->end() ? VALUE() : findValue->second; } bool contains(const KEY& key) const { return (mapContainer->find(key) != mapContainer->end()); } VALUE& operator[] (const KEY& key) { return (*mapContainer)[key]; } }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MapFieldSelector.h000066400000000000000000000026621456444476200237150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MAPFIELDSELECTOR_H #define MAPFIELDSELECTOR_H #include "FieldSelector.h" namespace Lucene { typedef HashMap MapStringFieldSelectorResult; /// A {@link FieldSelector} based on a Map of field names to {@link FieldSelectorResult}s class LPPAPI MapFieldSelector : public FieldSelector { public: /// Create a MapFieldSelector /// @param fieldSelections maps from field names (String) to {@link FieldSelectorResult}s MapFieldSelector(MapStringFieldSelectorResult fieldSelections); /// Create a MapFieldSelector /// @param fields fields to LOAD. List of Strings. All other fields are NO_LOAD. MapFieldSelector(Collection fields); virtual ~MapFieldSelector(); LUCENE_CLASS(MapFieldSelector); public: MapStringFieldSelectorResult fieldSelections; public: /// Load field according to its associated value in fieldSelections /// @param field a field name /// @return the fieldSelections value that field maps to or NO_LOAD if none. virtual FieldSelectorResult accept(const String& fieldName); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MapOfSets.h000066400000000000000000000043401456444476200223670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MAPOFSETS_H #define MAPOFSETS_H #include "Lucene.h" namespace Lucene { /// Helper class for keeping Lists of Objects associated with keys. template class MapOfSets { public: typedef HashSet set_type; typedef HashMap map_type; MapOfSets(map_type m) { theMap = m; } protected: map_type theMap; public: /// @return direct access to the map backing this object. map_type getMap() { return theMap; } /// Adds val to the HashSet associated with key in the HashMap. If key is not already in the map, /// a new HashSet will first be created. /// @return the size of the HashSet associated with key once val is added to it. int32_t put(MAPKEY key, SETVALUE val) { typename map_type::iterator entry = theMap.find(key); if (entry != theMap.end()) { entry->second.add(val); return entry->second.size(); } else { set_type theSet(set_type::newInstance()); theSet.add(val); theMap.put(key, theSet); return 1; } } /// Adds multiple vals to the HashSet associated with key in the HashMap. If key is not already in /// the map, a new HashSet will first be created. /// @return the size of the HashSet associated with key once val is added to it. int32_t putAll(MAPKEY key, set_type vals) { typename map_type::iterator entry = theMap.find(key); if (entry != theMap.end()) { entry->second.addAll(vals.begin(), vals.end()); return entry->second.size(); } else { set_type theSet(set_type::newInstance(vals.begin(), vals.end())); theMap.put(key, theSet); return theSet.size(); } } }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MappingCharFilter.h000066400000000000000000000027251456444476200240720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MAPPINGCHARFILTER_H #define MAPPINGCHARFILTER_H #include "BaseCharFilter.h" namespace Lucene { /// Simplistic {@link CharFilter} that applies the mappings contained in a {@link NormalizeCharMap} to the character /// stream, and correcting the resulting changes to the offsets. class LPPAPI MappingCharFilter : public BaseCharFilter { public: /// Default constructor that takes a {@link CharStream}. MappingCharFilter(const NormalizeCharMapPtr& normMap, const CharStreamPtr& in); /// Easy-use constructor that takes a {@link Reader}. MappingCharFilter(const NormalizeCharMapPtr& normMap, const ReaderPtr& in); virtual ~MappingCharFilter(); LUCENE_CLASS(MappingCharFilter); protected: NormalizeCharMapPtr normMap; Collection buffer; String replacement; int32_t charPointer; int32_t nextCharCounter; public: virtual int32_t read(); virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); protected: int32_t nextChar(); void pushChar(int32_t c); void pushLastChar(int32_t c); NormalizeCharMapPtr match(const NormalizeCharMapPtr& map); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MatchAllDocsQuery.h000066400000000000000000000023061456444476200240520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MATCHALLDOCSQUERY_H #define MATCHALLDOCSQUERY_H #include "Query.h" namespace Lucene { /// A query that matches all documents. class LPPAPI MatchAllDocsQuery : public Query { public: /// @param normsField Field used for normalization factor (document boost). Null if nothing. MatchAllDocsQuery(const String& normsField = EmptyString); virtual ~MatchAllDocsQuery(); LUCENE_CLASS(MatchAllDocsQuery); protected: String normsField; public: using Query::toString; virtual WeightPtr createWeight(const SearcherPtr& searcher); virtual void extractTerms(SetTerm terms); virtual String toString(const String& field); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); friend class MatchAllDocsWeight; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MaxPayloadFunction.h000066400000000000000000000021741456444476200242760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MAXPAYLOADFUNCTION_H #define MAXPAYLOADFUNCTION_H #include "PayloadFunction.h" namespace Lucene { /// Returns the maximum payload score seen, else 1 if there are no payloads on the doc. /// /// Is thread safe and completely reusable. class LPPAPI MaxPayloadFunction : public PayloadFunction { public: virtual ~MaxPayloadFunction(); LUCENE_CLASS(MaxPayloadFunction); public: virtual double currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, double currentScore, double currentPayloadScore); virtual double docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore); virtual int32_t hashCode(); virtual bool equals(const LuceneObjectPtr& other); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MergeDocIDRemapper.h000066400000000000000000000026721456444476200241320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MERGEDOCIDREMAPPER_H #define MERGEDOCIDREMAPPER_H #include "LuceneObject.h" namespace Lucene { /// Remaps docIDs after a merge has completed, where the merged segments had at least one deletion. /// This is used to renumber the buffered deletes in IndexWriter when a merge of segments with deletions /// commits. class MergeDocIDRemapper : public LuceneObject { public: MergeDocIDRemapper(const SegmentInfosPtr& infos, Collection< Collection > docMaps, Collection delCounts, const OneMergePtr& merge, int32_t mergedDocCount); virtual ~MergeDocIDRemapper(); LUCENE_CLASS(MergeDocIDRemapper); public: Collection starts; // used for binary search of mapped docID Collection newStarts; // starts, minus the deletes Collection< Collection > docMaps; // maps docIDs in the merged set int32_t minDocID; // minimum docID that needs renumbering int32_t maxDocID; // 1+ the max docID that needs renumbering int32_t docShift; // total # deleted docs that were compacted by this merge public: int32_t remap(int32_t oldDocID); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MergePolicy.h000066400000000000000000000134321456444476200227470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MERGEPOLICY_H #define MERGEPOLICY_H #include "SegmentInfos.h" namespace Lucene { /// A MergePolicy determines the sequence of primitive merge operations to be used for overall merge /// and optimize operations. /// /// Whenever the segments in an index have been altered by {@link IndexWriter}, either the addition of /// a newly flushed segment, addition of many segments from addIndexes* calls, or a previous merge that /// may now need to cascade, {@link IndexWriter} invokes {@link #findMerges} to give the MergePolicy a /// chance to pick merges that are now required. This method returns a {@link MergeSpecification} /// instance describing the set of merges that should be done, or null if no merges are necessary. /// When IndexWriter.optimize is called, it calls {@link #findMergesForOptimize} and the MergePolicy /// should then return the necessary merges. /// /// Note that the policy can return more than one merge at a time. In this case, if the writer is using /// {@link SerialMergeScheduler}, the merges will be run sequentially but if it is using {@link /// ConcurrentMergeScheduler} they will be run concurrently. /// /// The default MergePolicy is {@link LogByteSizeMergePolicy}. /// /// NOTE: This API is new and still experimental (subject to change suddenly in the next release) class LPPAPI MergePolicy : public LuceneObject { public: MergePolicy(const IndexWriterPtr& writer); virtual ~MergePolicy(); LUCENE_CLASS(MergePolicy); protected: IndexWriterWeakPtr _writer; public: /// Determine what set of merge operations are now necessary on the index. {@link IndexWriter} calls /// this whenever there is a change to the segments. This call is always synchronized on the {@link /// IndexWriter} instance so only one thread at a time will call this method. /// @param segmentInfos the total set of segments in the index virtual MergeSpecificationPtr findMerges(const SegmentInfosPtr& segmentInfos) = 0; /// Determine what set of merge operations is necessary in order to optimize the index. {@link /// IndexWriter} calls this when its {@link IndexWriter#optimize()} method is called. This call is /// always synchronized on the {@link IndexWriter} instance so only one thread at a time will call /// this method. /// @param segmentInfos the total set of segments in the index /// @param maxSegmentCount requested maximum number of segments in the index (currently this is always 1) /// @param segmentsToOptimize contains the specific SegmentInfo instances that must be merged away. /// This may be a subset of all SegmentInfos. virtual MergeSpecificationPtr findMergesForOptimize(const SegmentInfosPtr& segmentInfos, int32_t maxSegmentCount, SetSegmentInfo segmentsToOptimize) = 0; /// Determine what set of merge operations is necessary in order to expunge all deletes from the index. /// @param segmentInfos the total set of segments in the index virtual MergeSpecificationPtr findMergesToExpungeDeletes(const SegmentInfosPtr& segmentInfos) = 0; /// Release all resources for the policy. virtual void close() = 0; /// Returns true if a newly flushed (not from merge) segment should use the compound file format. virtual bool useCompoundFile(const SegmentInfosPtr& segments, const SegmentInfoPtr& newSegment) = 0; /// Returns true if the doc store files should use the compound file format. virtual bool useCompoundDocStore(const SegmentInfosPtr& segments) = 0; }; /// OneMerge provides the information necessary to perform an individual primitive merge operation, /// resulting in a single new segment. The merge spec includes the subset of segments to be merged /// as well as whether the new segment should use the compound file format. class LPPAPI OneMerge : public LuceneObject { public: OneMerge(const SegmentInfosPtr& segments, bool useCompoundFile); virtual ~OneMerge(); LUCENE_CLASS(OneMerge); public: SegmentInfoPtr info; // used by IndexWriter bool mergeDocStores; // used by IndexWriter bool optimize; // used by IndexWriter bool registerDone; // used by IndexWriter int64_t mergeGen; // used by IndexWriter bool isExternal; // used by IndexWriter int32_t maxNumSegmentsOptimize; // used by IndexWriter Collection readers; // used by IndexWriter Collection readersClone; // used by IndexWriter SegmentInfosPtr segments; bool useCompoundFile; bool aborted; LuceneException error; public: /// Record that an exception occurred while executing this merge void setException(const LuceneException& error); /// Retrieve previous exception set by {@link #setException}. LuceneException getException(); /// Mark this merge as aborted. If this is called before the merge is committed then the merge will not be committed. void abort(); /// Returns true if this merge was aborted. bool isAborted(); void checkAborted(const DirectoryPtr& dir); String segString(const DirectoryPtr& dir); }; /// A MergeSpecification instance provides the information necessary to perform multiple merges. /// It simply contains a list of {@link OneMerge} instances. class LPPAPI MergeSpecification : public LuceneObject { public: MergeSpecification(); virtual ~MergeSpecification(); LUCENE_CLASS(MergeSpecification); public: Collection merges; public: void add(const OneMergePtr& merge); String segString(const DirectoryPtr& dir); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MergeScheduler.h000066400000000000000000000017201456444476200234230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MERGESCHEDULER_H #define MERGESCHEDULER_H #include "LuceneObject.h" namespace Lucene { /// {@link IndexWriter} uses an instance implementing this interface to execute the merges /// selected by a {@link MergePolicy}. The default MergeScheduler is {@link ConcurrentMergeScheduler}. class LPPAPI MergeScheduler : public LuceneObject { public: virtual ~MergeScheduler(); LUCENE_CLASS(MergeScheduler); public: /// Run the merges provided by {@link IndexWriter#getNextMerge()}. virtual void merge(const IndexWriterPtr& writer) = 0; /// Close this MergeScheduler. virtual void close() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MinPayloadFunction.h000066400000000000000000000020341456444476200242670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MINPAYLOADFUNCTION_H #define MINPAYLOADFUNCTION_H #include "PayloadFunction.h" namespace Lucene { /// Calculates the minimum payload seen class LPPAPI MinPayloadFunction : public PayloadFunction { public: virtual ~MinPayloadFunction(); LUCENE_CLASS(MinPayloadFunction); public: virtual double currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, double currentScore, double currentPayloadScore); virtual double docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore); virtual int32_t hashCode(); virtual bool equals(const LuceneObjectPtr& other); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MiscUtils.h000066400000000000000000000137111456444476200224440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MISCUTILS_H #define MISCUTILS_H #include "Lucene.h" namespace Lucene { class LPPAPI MiscUtils { protected: static const uint32_t SINGLE_EXPONENT_MASK; static const uint32_t SINGLE_MANTISSA_MASK; static const uint32_t SINGLE_NAN_BITS; static const uint64_t DOUBLE_SIGN_MASK; static const uint64_t DOUBLE_EXPONENT_MASK; static const uint64_t DOUBLE_MANTISSA_MASK; static const uint64_t DOUBLE_NAN_BITS; public: /// Return given time in milliseconds. static uint64_t getTimeMillis(boost::posix_time::ptime time); /// Returns the current time in milliseconds. static uint64_t currentTimeMillis(); /// This over-allocates proportional to the list size, making room for additional growth. /// The over-allocation is mild, but is enough to give linear-time amortized behavior over a long /// sequence of appends(). /// The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... static int32_t getNextSize(int32_t targetSize); /// Only reallocate if we are "substantially" smaller. This saves us from "running hot" (constantly /// making a bit bigger then a bit smaller, over and over) static int32_t getShrinkSize(int32_t currentSize, int32_t targetSize); /// Compares two byte[] arrays, element by element, and returns the number of elements common to /// both arrays. /// @param bytes1 The first byte[] to compare /// @param bytes2 The second byte[] to compare /// @return The number of common elements. static int32_t bytesDifference(uint8_t* bytes1, int32_t len1, uint8_t* bytes2, int32_t len2); template static int32_t hashLucene(TYPE type) { return type->hashCode(); } template static int32_t hashNumeric(TYPE type) { return type; } template static int32_t hashCode(ITER first, ITER last, PRED pred) { int32_t code = 0; for (ITER hash = first; hash != last; ++hash) { code = code * 31 + pred(*hash); } return code; } /// Returns hash of chars in range start (inclusive) to end (inclusive) static int32_t hashCode(const wchar_t* array, int32_t start, int32_t end); /// Returns hash of bytes in range start (inclusive) to end (inclusive) static int32_t hashCode(const uint8_t* array, int32_t start, int32_t end); /// Returns hash code of given boolean static int32_t hashCode(bool value); /// Copy elements from on buffer to another template static void arrayCopy(SOURCE source, int32_t sourceOffset, DEST dest, int32_t destOffset, int32_t length) { std::copy(source + sourceOffset, source + sourceOffset + length, dest + destOffset); } /// Fill buffer with given element template static void arrayFill(DEST dest, int32_t destFrom, int32_t destTo, FILL value) { std::fill(dest + destFrom, dest + destTo, value); } /// Returns a representation of the specified floating-point value according to the IEEE 754 floating-point /// "single format" bit layout. static int32_t doubleToIntBits(double value); /// Returns a representation of the specified floating-point value according to the IEEE 754 floating-point /// "single format" bit layout, preserving Not-a-Number (NaN) values. static int32_t doubleToRawIntBits(double value); /// Returns the float value corresponding to a given bit representation. The argument is considered to be a /// representation of a floating-point value according to the IEEE 754 floating-point "single format" bit layout. static double intBitsToDouble(int32_t bits); /// Returns a representation of the specified floating-point value according to the IEEE 754 floating-point /// "double format" bit layout. static int64_t doubleToLongBits(double value); /// Returns a representation of the specified floating-point value according to the IEEE 754 floating-point /// "double format" bit layout, preserving Not-a-Number (NaN) values. static int64_t doubleToRawLongBits(double value); /// Returns the double value corresponding to a given bit representation. The argument is considered to be a /// representation of a floating-point value according to the IEEE 754 floating-point "double format" bit layout. static double longBitsToDouble(int64_t bits); /// Returns true if the specified number is infinitely large in magnitude, false otherwise. static bool isInfinite(double value); /// Returns true if this Double value is a Not-a-Number (NaN), false otherwise. static bool isNaN(double value); /// Return whether given Lucene object is of a specified type template static bool typeOf(const LuceneObjectPtr& object) { return boost::dynamic_pointer_cast(object).get() != NULL; } /// Return whether given Lucene objects are of equal type. static bool equalTypes(const LuceneObjectPtr& first, const LuceneObjectPtr& second); /// Perform unsigned right-shift (left bits are zero filled) static int64_t unsignedShift(int64_t num, int64_t shift); /// Perform unsigned right-shift (left bits are zero filled) static int32_t unsignedShift(int32_t num, int32_t shift); }; inline int64_t MiscUtils::unsignedShift(int64_t num, int64_t shift) { return (shift & 0x3f) == 0 ? num : (((uint64_t)num >> 1) & 0x7fffffffffffffffLL) >> ((shift & 0x3f) - 1); } inline int32_t MiscUtils::unsignedShift(int32_t num, int32_t shift) { return (shift & 0x1f) == 0 ? num : (((uint32_t)num >> 1) & 0x7fffffff) >> ((shift & 0x1f) - 1); } } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MultiFieldQueryParser.h000066400000000000000000000140521456444476200247700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTIFIELDQUERYPARSER_H #define MULTIFIELDQUERYPARSER_H #include "QueryParser.h" #include "BooleanClause.h" namespace Lucene { /// A QueryParser which constructs queries to search multiple fields. class LPPAPI MultiFieldQueryParser : public QueryParser { public: /// Creates a MultiFieldQueryParser. Allows passing of a map with term to Boost, and the boost to /// apply to each term. /// /// It will, when parse(String query) is called, construct a query like this (assuming the query /// consists of two terms and you specify the two fields title and body): ///
    /// (title:term1 body:term1) (title:term2 body:term2)
    /// 
/// /// When setDefaultOperator(AND_OPERATOR) is set, the result will be: ///
    /// +(title:term1 body:term1) +(title:term2 body:term2)
    /// 
/// /// When you pass a boost (title=>5 body=>10) you can get: ///
    /// +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0)
    /// 
/// /// In other words, all the query's terms must appear, but it doesn't matter in what fields they /// appear. MultiFieldQueryParser(LuceneVersion::Version matchVersion, Collection fields, const AnalyzerPtr& analyzer, MapStringDouble boosts); /// Creates a MultiFieldQueryParser. It will, when parse(String query) is called, construct a /// query like this (assuming the query consists of two terms and you specify the two fields /// title and body): ///
    /// (title:term1 body:term1) (title:term2 body:term2)
    /// 
/// /// When setDefaultOperator(AND_OPERATOR) is set, the result will be: ///
    /// +(title:term1 body:term1) +(title:term2 body:term2)
    /// 
/// /// In other words, all the query's terms must appear, but it doesn't matter in what fields they /// appear. MultiFieldQueryParser(LuceneVersion::Version matchVersion, Collection fields, const AnalyzerPtr& analyzer); virtual ~MultiFieldQueryParser(); LUCENE_CLASS(MultiFieldQueryParser); protected: Collection fields; MapStringDouble boosts; public: using QueryParser::parse; /// Parses a query which searches on the fields specified. /// /// If x fields are specified, this effectively constructs: ///
    /// (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
    /// 
/// @param matchVersion Lucene version to match; this is passed through to QueryParser. /// @param queries Queries strings to parse /// @param fields Fields to search on /// @param analyzer Analyzer to use static QueryPtr parse(LuceneVersion::Version matchVersion, Collection queries, Collection fields, const AnalyzerPtr& analyzer); /// Parses a query, searching on the fields specified. Use this if you need to specify certain fields as /// required, and others as prohibited. /// ///
    /// Usage:
    /// Collection fields = newCollection(L"filename", L"contents", L"description");
    /// Collection flags = newCollection(BooleanClause::SHOULD, BooleanClause::MUST, BooleanClause::MUST_NOT);
    /// MultiFieldQueryParser::parse(L"query", fields, flags, analyzer);
    /// 
/// /// The code above would construct a query: ///
    /// (filename:query) +(contents:query) -(description:query)
    /// 
/// /// @param matchVersion Lucene version to match; this is passed through to QueryParser. /// @param query Query string to parse /// @param fields Fields to search on /// @param flags Flags describing the fields /// @param analyzer Analyzer to use static QueryPtr parse(LuceneVersion::Version matchVersion, const String& query, Collection fields, Collection flags, const AnalyzerPtr& analyzer); /// Parses a query, searching on the fields specified. Use this if you need to specify certain fields as /// required, and others as prohibited. /// ///
    /// Usage:
    /// Collection query = newCollection(L"query1", L"query2", L"query3");
    /// Collection fields = newCollection(L"filename", L"contents", L"description");
    /// Collection flags = newCollection(BooleanClause::SHOULD, BooleanClause::MUST, BooleanClause::MUST_NOT);
    /// MultiFieldQueryParser::parse(query, fields, flags, analyzer);
    /// 
/// /// The code above would construct a query: ///
    /// (filename:query1) +(contents:query2) -(description:query3)
    /// 
/// /// @param matchVersion Lucene version to match; this is passed through to QueryParser. /// @param queries Queries string to parse /// @param fields Fields to search on /// @param flags Flags describing the fields /// @param analyzer Analyzer to use static QueryPtr parse(LuceneVersion::Version matchVersion, Collection queries, Collection fields, Collection flags, const AnalyzerPtr& analyzer); protected: virtual QueryPtr getFieldQuery(const String& field, const String& queryText, int32_t slop); virtual QueryPtr getFieldQuery(const String& field, const String& queryText); void applySlop(const QueryPtr& query, int32_t slop); virtual QueryPtr getFuzzyQuery(const String& field, const String& termStr, double minSimilarity); virtual QueryPtr getPrefixQuery(const String& field, const String& termStr); virtual QueryPtr getWildcardQuery(const String& field, const String& termStr); virtual QueryPtr getRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MultiLevelSkipListReader.h000066400000000000000000000104041456444476200254140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTILEVELSKIPLISTREADER_H #define MULTILEVELSKIPLISTREADER_H #include "IndexInput.h" namespace Lucene { /// This abstract class reads skip lists with multiple levels. /// /// See {@link MultiLevelSkipListWriter} for the information about the encoding of the multi level skip lists. /// /// Subclasses must implement the abstract method {@link #readSkipData(int, IndexInput)} which defines the /// actual format of the skip data. class MultiLevelSkipListReader : public LuceneObject { public: MultiLevelSkipListReader(const IndexInputPtr& skipStream, int32_t maxSkipLevels, int32_t skipInterval); virtual ~MultiLevelSkipListReader(); LUCENE_CLASS(MultiLevelSkipListReader); protected: /// the maximum number of skip levels possible for this index int32_t maxNumberOfSkipLevels; /// number of levels in this skip list int32_t numberOfSkipLevels; /// Defines the number of top skip levels to buffer in memory. Reducing this number results in less /// memory usage, but possibly slower performance due to more random I/Os. Please notice that the space /// each level occupies is limited by the skipInterval. The top level can not contain more than /// skipLevel entries, the second top level can not contain more than skipLevel^2 entries and so forth. int32_t numberOfLevelsToBuffer; int32_t docCount; bool haveSkipped; Collection skipStream; // skipStream for each level Collection skipPointer; // the start pointer of each skip level Collection skipInterval; // skipInterval of each level Collection numSkipped; // number of docs skipped per level Collection skipDoc; // doc id of current skip entry per level int32_t lastDoc; // doc id of last read skip entry with docId <= target Collection childPointer; // child pointer of current skip entry per level int64_t lastChildPointer; // childPointer of last read skip entry with docId <= target bool inputIsBuffered; public: /// Returns the id of the doc to which the last call of {@link #skipTo(int)} has skipped. virtual int32_t getDoc(); /// Skips entries to the first beyond the current whose document number is greater than or equal to /// target. Returns the current doc count. virtual int32_t skipTo(int32_t target); virtual void close(); /// Initializes the reader. virtual void init(int64_t skipPointer, int32_t df); protected: virtual bool loadNextSkip(int32_t level); /// Seeks the skip entry on the given level virtual void seekChild(int32_t level); /// Loads the skip levels virtual void loadSkipLevels(); /// Subclasses must implement the actual skip data encoding in this method. /// /// @param level the level skip data shall be read from /// @param skipStream the skip stream to read from virtual int32_t readSkipData(int32_t level, const IndexInputPtr& skipStream) = 0; /// Copies the values of the last read skip entry on this level virtual void setLastSkipData(int32_t level); }; /// Used to buffer the top skip levels class SkipBuffer : public IndexInput { public: SkipBuffer(const IndexInputPtr& input, int32_t length); virtual ~SkipBuffer(); LUCENE_CLASS(SkipBuffer); protected: ByteArray data; int64_t pointer; int32_t pos; public: /// Closes the stream to further operations. virtual void close(); /// Returns the current position in this file, where the next read will occur. virtual int64_t getFilePointer(); /// The number of bytes in the file. virtual int64_t length(); /// Reads and returns a single byte. virtual uint8_t readByte(); /// Reads a specified number of bytes into an array at the specified offset. virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); /// Sets current position in this file, where the next read will occur. virtual void seek(int64_t pos); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MultiLevelSkipListWriter.h000066400000000000000000000054751456444476200255020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTILEVELSKIPLISTWRITER_H #define MULTILEVELSKIPLISTWRITER_H #include "LuceneObject.h" namespace Lucene { /// This abstract class writes skip lists with multiple levels. /// /// Example for skipInterval = 3: /// /// c (skip level 2) /// c c c (skip level 1) /// x x x x x x x x x x (skip level 0) /// d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list) /// 3 6 9 12 15 18 21 24 27 30 (df) /// /// d - document /// x - skip data /// c - skip data with child pointer /// /// Skip level i contains every skipInterval-th entry from skip level i-1. /// Therefore the number of entries on level i is: floor(df / ((skipInterval ^ (i + 1))). /// /// Each skip entry on a level i>0 contains a pointer to the corresponding skip entry in list i-1. /// This guarantees a logarithmic amount of skips to find the target document. /// /// While this class takes care of writing the different skip levels, subclasses must define the /// actual format of the skip data. class MultiLevelSkipListWriter : public LuceneObject { public: MultiLevelSkipListWriter(int32_t skipInterval, int32_t maxSkipLevels, int32_t df); virtual ~MultiLevelSkipListWriter(); LUCENE_CLASS(MultiLevelSkipListWriter); protected: /// number of levels in this skip list int32_t numberOfSkipLevels; /// the skip interval in the list with level = 0 int32_t skipInterval; /// for every skip level a different buffer is used Collection skipBuffer; public: /// Writes the current skip data to the buffers. The current document frequency determines /// the max level is skip data is to be written to. /// @param df the current document frequency void bufferSkip(int32_t df); /// Writes the buffered skip lists to the given output. /// @param output the IndexOutput the skip lists shall be written to /// @return the pointer the skip list starts int64_t writeSkip(const IndexOutputPtr& output); protected: void init(); virtual void resetSkip(); /// Subclasses must implement the actual skip data encoding in this method. /// @param level the level skip data shall be writing for /// @param skipBuffer the skip buffer to write to virtual void writeSkipData(int32_t level, const IndexOutputPtr& skipBuffer) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MultiPhraseQuery.h000066400000000000000000000052561456444476200240200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTIPHRASEQUERY_H #define MULTIPHRASEQUERY_H #include "Query.h" namespace Lucene { /// MultiPhraseQuery is a generalized version of PhraseQuery, with an added method {@link #add(Term[])}. /// To use this class, to search for the phrase "Microsoft app*" first use add(Term) on the term "Microsoft", /// then find all terms that have "app" as prefix using IndexReader.terms(Term), and use /// MultiPhraseQuery.add(Term[] terms) to add them to the query. class LPPAPI MultiPhraseQuery : public Query { public: MultiPhraseQuery(); virtual ~MultiPhraseQuery(); LUCENE_CLASS(MultiPhraseQuery); protected: String field; Collection< Collection > termArrays; Collection positions; int32_t slop; public: using Query::toString; /// Sets the phrase slop for this query. /// @see PhraseQuery#setSlop(int32_t) void setSlop(int32_t s); /// Gets the phrase slop for this query. /// @see PhraseQuery#getSlop() int32_t getSlop(); /// Add a single term at the next position in the phrase. /// @see PhraseQuery#add(Term) void add(const TermPtr& term); /// Add multiple terms at the next position in the phrase. Any of the terms may match. /// @see PhraseQuery#add(Term) void add(Collection terms); /// Allows to specify the relative position of terms within the phrase. /// @see PhraseQuery#add(Term, int) void add(Collection terms, int32_t position); /// Returns a List of the terms in the multiphrase. Do not modify the List or its contents. Collection< Collection > getTermArrays(); /// Returns the relative positions of terms in this phrase. Collection getPositions(); virtual void extractTerms(SetTerm terms); virtual QueryPtr rewrite(const IndexReaderPtr& reader); virtual WeightPtr createWeight(const SearcherPtr& searcher); /// Prints a user-readable version of this query. virtual String toString(const String& field); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); protected: int32_t termArraysHashCode(); bool termArraysEquals(Collection< Collection > first, Collection< Collection > second); friend class MultiPhraseWeight; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MultiReader.h000066400000000000000000000130371456444476200227460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTIREADER_H #define MULTIREADER_H #include "IndexReader.h" namespace Lucene { /// An IndexReader which reads multiple indexes, appending their content. class LPPAPI MultiReader : public IndexReader { public: /// Construct a MultiReader aggregating the named set of (sub)readers. Directory locking for delete, /// undeleteAll, and setNorm operations is left to the subreaders. /// @param closeSubReaders indicates whether the subreaders should be closed when this MultiReader is closed /// @param subReaders set of (sub)readers MultiReader(Collection subReaders, bool closeSubReaders = true); virtual ~MultiReader(); LUCENE_CLASS(MultiReader); protected: Collection subReaders; Collection starts; // 1st docno for each segment Collection decrefOnClose; // remember which subreaders to decRef on close MapStringByteArray normsCache; int32_t _maxDoc; int32_t _numDocs; bool _hasDeletions; public: /// Tries to reopen the subreaders. /// /// If one or more subreaders could be re-opened (ie. subReader.reopen() returned a new instance != subReader), /// then a new MultiReader instance is returned, otherwise this instance is returned. /// /// A re-opened instance might share one or more subreaders with the old instance. Index modification /// operations result in undefined behavior when performed before the old instance is closed. (see {@link /// IndexReader#reopen()}). /// /// If subreaders are shared, then the reference count of those readers is increased to ensure that the /// subreaders remain open until the last referring reader is closed. virtual IndexReaderPtr reopen(); /// Clones the subreaders. (see {@link IndexReader#clone()}). /// /// If subreaders are shared, then the reference count of those readers is increased to ensure that the /// subreaders remain open until the last referring reader is closed. virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual Collection getTermFreqVectors(int32_t docNumber); virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper); virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper); virtual bool isOptimized(); /// Returns the number of documents in this index. virtual int32_t numDocs(); /// Returns one greater than the largest possible document number. virtual int32_t maxDoc(); /// Get the {@link Document} at the n'th position. The {@link FieldSelector} may be used to determine /// what {@link Field}s to load and how they should be loaded. virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector); /// Returns true if document n has been deleted virtual bool isDeleted(int32_t n); /// Returns true if any documents have been deleted virtual bool hasDeletions(); /// Returns true if there are norms stored for this field. virtual bool hasNorms(const String& field); /// Returns the byte-encoded normalization factor for the named field of every document. virtual ByteArray norms(const String& field); /// Reads the byte-encoded normalization factor for the named field of every document. virtual void norms(const String& field, ByteArray norms, int32_t offset); /// Returns an enumeration of all the terms in the index. virtual TermEnumPtr terms(); /// Returns an enumeration of all terms starting at a given term. virtual TermEnumPtr terms(const TermPtr& t); /// Returns the number of documents containing the term t. virtual int32_t docFreq(const TermPtr& t); /// Returns an unpositioned {@link TermDocs} enumerator. virtual TermDocsPtr termDocs(); /// Returns an unpositioned {@link TermPositions} enumerator. virtual TermPositionsPtr termPositions(); /// Get a list of unique field names that exist in this index and have the specified field option /// information. virtual HashSet getFieldNames(FieldOption fieldOption); /// Checks recursively if all subreaders are up to date. virtual bool isCurrent(); /// Not implemented. virtual int64_t getVersion(); /// Returns the sequential sub readers that this reader is logically composed of. virtual Collection getSequentialSubReaders(); protected: /// If clone is true then we clone each of the subreaders /// @param doClone /// @return New IndexReader, or same one (this) if reopen/clone is not necessary IndexReaderPtr doReopen(bool doClone); /// Implements deletion of the document numbered docNum. virtual void doDelete(int32_t docNum); /// Implements actual undeleteAll() in subclass. virtual void doUndeleteAll(); /// Find reader for doc n int32_t readerIndex(int32_t n); /// Implements setNorm in subclass. virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); virtual void doCommit(MapStringString commitUserData); /// Implements close. virtual void doClose(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MultiSearcher.h000066400000000000000000000051521456444476200232770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTISEARCHER_H #define MULTISEARCHER_H #include "Searcher.h" #include "Collector.h" namespace Lucene { /// Implements search over a set of Searchables. /// /// Applications usually need only call the inherited {@link #search(QueryPtr, int32_t)} or {@link /// #search(QueryPtr, FilterPtr, int32_t)} methods. class LPPAPI MultiSearcher : public Searcher { public: /// Creates a searcher which searches searchers. MultiSearcher(Collection searchables); virtual ~MultiSearcher(); LUCENE_CLASS(MultiSearcher); protected: Collection searchables; Collection starts; int32_t _maxDoc; public: using Searcher::search; /// Return the array of {@link Searchable}s this searches. Collection getSearchables(); virtual void close(); virtual int32_t docFreq(const TermPtr& term); virtual DocumentPtr doc(int32_t n); virtual DocumentPtr doc(int32_t n, const FieldSelectorPtr& fieldSelector); /// Returns index of the searcher for document n in the array used to construct this searcher. int32_t subSearcher(int32_t n); /// Returns the document number of document n within its sub-index. int32_t subDoc(int32_t n); virtual int32_t maxDoc(); virtual TopDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n); virtual TopFieldDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort); virtual void search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& results); virtual QueryPtr rewrite(const QueryPtr& query); virtual ExplanationPtr explain(const WeightPtr& weight, int32_t doc); protected: Collection getStarts(); /// Create weight in multiple index scenario. /// /// Distributed query processing is done in the following steps: /// 1. rewrite query. /// 2. extract necessary terms. /// 3. collect dfs for these terms from the Searchables. /// 4. create query weight using aggregate dfs. /// 5. distribute that weight to Searchables. /// 6. merge results. /// /// Steps 1-4 are done here, 5+6 in the search() methods /// /// @return rewritten queries virtual WeightPtr createWeight(const QueryPtr& query); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MultiTermQuery.h000066400000000000000000000174401456444476200235030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTITERMQUERY_H #define MULTITERMQUERY_H #include "Query.h" namespace Lucene { /// An abstract {@link Query} that matches documents containing a subset of terms provided by a {@link /// FilteredTermEnum} enumeration. /// /// This query cannot be used directly; you must subclass it and define {@link #getEnum} to provide a /// {@link FilteredTermEnum} that iterates through the terms to be matched. /// /// NOTE: if {@link #setRewriteMethod} is either {@link #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} or {@link /// #SCORING_BOOLEAN_QUERY_REWRITE}, you may encounter a {@link BooleanQuery.TooManyClauses} exception /// during searching, which happens when the number of terms to be searched exceeds {@link /// BooleanQuery#getMaxClauseCount()}. Setting {@link #setRewriteMethod} to {@link /// #CONSTANT_SCORE_FILTER_REWRITE} prevents this. /// /// The recommended rewrite method is {@link #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}: it doesn't spend CPU /// computing unhelpful scores, and it tries to pick the most performant rewrite method given the query. /// /// Note that {@link QueryParser} produces MultiTermQueries using {@link #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} /// by default. class LPPAPI MultiTermQuery : public Query { public: MultiTermQuery(); virtual ~MultiTermQuery(); LUCENE_CLASS(MultiTermQuery); protected: RewriteMethodPtr rewriteMethod; int32_t numberOfTerms; public: /// A rewrite method that first creates a private Filter, by visiting each term in sequence and marking /// all docs for that term. Matching documents are assigned a constant score equal to the query's boost. /// /// This method is faster than the BooleanQuery rewrite methods when the number of matched terms or matched /// documents is non-trivial. Also, it will never hit an errant TooManyClauses exception. /// /// @see #setRewriteMethod static RewriteMethodPtr CONSTANT_SCORE_FILTER_REWRITE(); /// A rewrite method that first translates each term into {@link BooleanClause.Occur#SHOULD} clause in a /// BooleanQuery, and keeps the scores as computed by the query. Note that typically such scores are /// meaningless to the user, and require non-trivial CPU to compute, so it's almost always better to use /// {@link #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead. /// /// NOTE: This rewrite method will hit {@link BooleanQuery.TooManyClauses} if the number of terms exceeds /// {@link BooleanQuery#getMaxClauseCount}. /// /// @see #setRewriteMethod static RewriteMethodPtr SCORING_BOOLEAN_QUERY_REWRITE(); /// Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except scores are not computed. Instead, each matching /// document receives a constant score equal to the query's boost. /// /// NOTE: This rewrite method will hit TooManyClauses if the number of terms exceeds {@link /// BooleanQuery#getMaxClauseCount}. /// /// @see #setRewriteMethod static RewriteMethodPtr CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE(); /// Read-only default instance of {@link ConstantScoreAutoRewrite}, with {@link /// ConstantScoreAutoRewrite#setTermCountCutoff} set to {@link ConstantScoreAutoRewrite#DEFAULT_TERM_COUNT_CUTOFF} /// and {@link ConstantScoreAutoRewrite#setDocCountPercent} set to {@link /// ConstantScoreAutoRewrite#DEFAULT_DOC_COUNT_PERCENT}. Note that you cannot alter the configuration of /// this instance; you'll need to create a private instance instead. static RewriteMethodPtr CONSTANT_SCORE_AUTO_REWRITE_DEFAULT(); /// Return the number of unique terms visited during execution of the query. If there are many of them, /// you may consider using another query type or optimize your total term count in index. /// /// This method is not thread safe, be sure to only call it when no query is running! If you re-use the /// same query instance for another search, be sure to first reset the term counter with {@link /// #clearTotalNumberOfTerms}. /// /// On optimized indexes / no MultiReaders, you get the correct number of unique terms for the whole index. /// Use this number to compare different queries. For non-optimized indexes this number can also be achieved /// in non-constant-score mode. In constant-score mode you get the total number of terms seeked for all /// segments / sub-readers. /// @see #clearTotalNumberOfTerms int32_t getTotalNumberOfTerms(); /// Resets the counting of unique terms. Do this before executing the query/filter. /// @see #getTotalNumberOfTerms void clearTotalNumberOfTerms(); virtual QueryPtr rewrite(const IndexReaderPtr& reader); /// @see #setRewriteMethod virtual RewriteMethodPtr getRewriteMethod(); /// Sets the rewrite method to be used when executing the query. You can use one of the four core methods, /// or implement your own subclass of {@link RewriteMethod}. virtual void setRewriteMethod(const RewriteMethodPtr& method); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual int32_t hashCode(); virtual bool equals(const LuceneObjectPtr& other); protected: /// Construct the enumeration to be used, expanding the pattern term. virtual FilteredTermEnumPtr getEnum(const IndexReaderPtr& reader) = 0; void incTotalNumberOfTerms(int32_t inc); friend class MultiTermQueryWrapperFilter; friend class ScoringBooleanQueryRewrite; friend class ConstantScoreAutoRewrite; }; /// Abstract class that defines how the query is rewritten. class LPPAPI RewriteMethod : public LuceneObject { public: virtual ~RewriteMethod(); LUCENE_CLASS(RewriteMethod); public: virtual QueryPtr rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) = 0; }; /// A rewrite method that tries to pick the best constant-score rewrite method based on term and document /// counts from the query. If both the number of terms and documents is small enough, then {@link /// #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} is used. Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is /// used. class LPPAPI ConstantScoreAutoRewrite : public RewriteMethod { public: ConstantScoreAutoRewrite(); virtual ~ConstantScoreAutoRewrite(); LUCENE_CLASS(ConstantScoreAutoRewrite); public: // Defaults derived from rough tests with a 20.0 million doc Wikipedia index. With more than 350 terms // in the query, the filter method is fastest static const int32_t DEFAULT_TERM_COUNT_CUTOFF; // If the query will hit more than 1 in 1000 of the docs in the index (0.1%), the filter method is fastest static const double DEFAULT_DOC_COUNT_PERCENT; protected: int32_t termCountCutoff; double docCountPercent; public: /// If the number of terms in this query is equal to or larger than this setting then {@link /// #CONSTANT_SCORE_FILTER_REWRITE} is used. virtual void setTermCountCutoff(int32_t count); /// @see #setTermCountCutoff virtual int32_t getTermCountCutoff(); /// If the number of documents to be visited in the postings exceeds this specified percentage of the /// maxDoc() for the index, then {@link #CONSTANT_SCORE_FILTER_REWRITE} is used. /// @param percent 0.0 to 100.0 virtual void setDocCountPercent(double percent); /// @see #setDocCountPercent virtual double getDocCountPercent(); virtual QueryPtr rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query); virtual int32_t hashCode(); virtual bool equals(const LuceneObjectPtr& other); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MultiTermQueryWrapperFilter.h000066400000000000000000000043571456444476200262150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTITERMQUERYWRAPPERFILTER_H #define MULTITERMQUERYWRAPPERFILTER_H #include "Filter.h" namespace Lucene { /// A wrapper for {@link MultiTermQuery}, that exposes its functionality as a {@link Filter}. /// /// MultiTermQueryWrapperFilter is not designed to be used by itself. Normally you subclass it to /// provide a Filter counterpart for a {@link MultiTermQuery} subclass. /// /// For example, {@link TermRangeFilter} and {@link PrefixFilter} extend MultiTermQueryWrapperFilter. /// This class also provides the functionality behind {@link MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}; /// this is why it is not abstract. class LPPAPI MultiTermQueryWrapperFilter : public Filter { INTERNAL: /// Wrap a {@link MultiTermQuery} as a Filter. MultiTermQueryWrapperFilter(const MultiTermQueryPtr& query); public: virtual ~MultiTermQueryWrapperFilter(); LUCENE_CLASS(MultiTermQueryWrapperFilter); protected: MultiTermQueryPtr query; public: virtual String toString(); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); /// Return the number of unique terms visited during execution of the filter. If there are many of them, /// you may consider using another filter type or optimize your total term count in index. /// /// This method is not thread safe, be sure to only call it when no filter is running! If you re-use the /// same filter instance for another search, be sure to first reset the term counter with {@link /// #clearTotalNumberOfTerms}. /// @see #clearTotalNumberOfTerms int32_t getTotalNumberOfTerms(); /// Resets the counting of unique terms. Do this before executing the filter. /// @see #getTotalNumberOfTerms void clearTotalNumberOfTerms(); /// Returns a DocIdSet with documents that should be permitted in search results. virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/MultipleTermPositions.h000066400000000000000000000030401456444476200250550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MULTIPLETERMPOSITIONS_H #define MULTIPLETERMPOSITIONS_H #include "TermPositions.h" namespace Lucene { /// Allows you to iterate over the {@link TermPositions} for multiple {@link Term}s as a single /// {@link TermPositions}. class LPPAPI MultipleTermPositions : public TermPositions, public LuceneObject { public: MultipleTermPositions(const IndexReaderPtr& indexReader, Collection terms); virtual ~MultipleTermPositions(); LUCENE_CLASS(MultipleTermPositions); protected: int32_t _doc; int32_t _freq; TermPositionsQueuePtr termPositionsQueue; IntQueuePtr posList; public: virtual bool next(); virtual int32_t nextPosition(); virtual bool skipTo(int32_t target); virtual int32_t doc(); virtual int32_t freq(); virtual void close(); /// Not implemented. virtual void seek(const TermPtr& term); /// Not implemented. virtual void seek(const TermEnumPtr& termEnum); /// Not implemented. virtual int32_t read(Collection& docs, Collection& freqs); /// Not implemented. virtual ByteArray getPayload(ByteArray data, int32_t offset); /// @return false virtual bool isPayloadAvailable(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/NativeFSLockFactory.h000066400000000000000000000031711456444476200243470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NATIVEFSLOCKFACTORY_H #define NATIVEFSLOCKFACTORY_H #include "FSLockFactory.h" namespace Lucene { /// Implements {@link LockFactory} using native file lock. /// @see LockFactory class LPPAPI NativeFSLockFactory : public FSLockFactory { public: /// Create a NativeFSLockFactory instance, storing lock files into /// the specified lockDirName. /// @param lockDirName where lock files are created. NativeFSLockFactory(const String& lockDirName = EmptyString); virtual ~NativeFSLockFactory(); LUCENE_CLASS(NativeFSLockFactory); public: /// Return a new Lock instance identified by lockName. /// @param lockName name of the lock to be created. virtual LockPtr makeLock(const String& lockName); /// Attempt to clear (forcefully unlock and remove) the /// specified lock. Only call this at a time when you are /// certain this lock is no longer in use. /// @param lockName name of the lock to be cleared. virtual void clearLock(const String& lockName); protected: /// Simple test to verify locking system is "working". On NFS, if /// it's mis-configured, you can hit long (35 second) timeouts which /// cause Lock.obtain to take far too long (it assumes the obtain() /// call takes zero time). void acquireTestLock(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/NearSpansOrdered.h000066400000000000000000000065351456444476200237350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NEARSPANSORDERED_H #define NEARSPANSORDERED_H #include "Spans.h" namespace Lucene { /// A Spans that is formed from the ordered subspans of a SpanNearQuery where the subspans do not overlap /// and have a maximum slop between them. /// /// The formed spans only contains minimum slop matches. The matching slop is computed from the distance(s) /// between the non overlapping matching Spans. /// /// Successive matches are always formed from the successive Spans of the SpanNearQuery. /// /// The formed spans may contain overlaps when the slop is at least 1. For example, when querying using ///
t1 t2 t3
/// with slop at least 1, the fragment: ///
t1 t2 t1 t3 t2 t3
/// matches twice: ///
t1 t2 .. t3      
///
      t1 .. t2 t3
/// /// Note: Only public for subclassing. Most implementations should not need this class class LPPAPI NearSpansOrdered : public Spans { public: NearSpansOrdered(const SpanNearQueryPtr& spanNearQuery, const IndexReaderPtr& reader, bool collectPayloads = true); virtual ~NearSpansOrdered(); LUCENE_CLASS(NearSpansOrdered); protected: int32_t allowedSlop; bool firstTime; bool more; /// The spans in the same order as the SpanNearQuery Collection subSpans; /// Indicates that all subSpans have same doc() bool inSameDoc; int32_t matchDoc; int32_t matchStart; int32_t matchEnd; Collection matchPayload; Collection subSpansByDoc; SpanNearQueryPtr query; bool collectPayloads; public: virtual int32_t doc(); virtual int32_t start(); virtual int32_t end(); Collection getSubSpans(); virtual Collection getPayload(); virtual bool isPayloadAvailable(); virtual bool next(); virtual bool skipTo(int32_t target); /// Check whether two Spans in the same document are ordered. /// @return true if spans1 starts before spans2 or the spans start at the same position, and /// spans1 ends before spans2. static bool docSpansOrdered(const SpansPtr& spans1, const SpansPtr& spans2); virtual String toString(); protected: /// Advances the subSpans to just after an ordered match with a minimum slop that is smaller than the /// slop allowed by the SpanNearQuery. /// @return true if there is such a match. bool advanceAfterOrdered(); /// Advance the subSpans to the same document. bool toSameDoc(); // Like {@link #docSpansOrdered(SpansPtr, SpansPtr)}, but use the spans starts and ends as parameters. static bool docSpansOrdered(int32_t start1, int32_t end1, int32_t start2, int32_t end2); /// Order the subSpans within the same document by advancing all later spans after the previous one. bool stretchToOrder(); /// The subSpans are ordered in the same doc, so there is a possible match. Compute the slop while /// making the match as short as possible by advancing all subSpans except the last one in reverse order. bool shrinkToAfterShortestMatch(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/NearSpansUnordered.h000066400000000000000000000036101456444476200242670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NEARSPANSUNORDERED_H #define NEARSPANSUNORDERED_H #include "Spans.h" namespace Lucene { /// Similar to {@link NearSpansOrdered}, but for the unordered case. /// /// Only public for subclassing. Most implementations should not need this class class LPPAPI NearSpansUnordered : public Spans { public: NearSpansUnordered(const SpanNearQueryPtr& query, const IndexReaderPtr& reader); virtual ~NearSpansUnordered(); LUCENE_CLASS(NearSpansUnordered); protected: SpanNearQueryPtr query; IndexReaderPtr reader; Collection ordered; // spans in query order Collection subSpans; int32_t slop; // from query SpansCellPtr first; // linked list of spans SpansCellPtr last; // sorted by doc only int32_t totalLength; // sum of current lengths CellQueuePtr queue; // sorted queue of spans SpansCellPtr max; // max element in queue bool more; // true if not done bool firstTime; // true before first next() public: virtual void initialize(); Collection getSubSpans(); virtual bool next(); virtual bool skipTo(int32_t target); virtual int32_t doc(); virtual int32_t start(); virtual int32_t end(); virtual Collection getPayload(); virtual bool isPayloadAvailable(); virtual String toString(); protected: SpansCellPtr min(); void initList(bool next); void addToList(const SpansCellPtr& cell); void firstToLast(); void queueToList(); void listToQueue(); bool atMatch(); friend class SpansCell; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/NoLockFactory.h000066400000000000000000000022671456444476200232510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NOLOCKFACTORY_H #define NOLOCKFACTORY_H #include "LockFactory.h" namespace Lucene { /// Use this {@link LockFactory} to disable locking entirely. Only one instance of this lock is created. /// You should call {@link #getNoLockFactory()} to get the instance. /// /// @see LockFactory class LPPAPI NoLockFactory : public LockFactory { public: virtual ~NoLockFactory(); LUCENE_CLASS(NoLockFactory); private: static NoLockPtr getSingletonLock(); public: static NoLockFactoryPtr getNoLockFactory(); /// Return a new Lock instance identified by lockName. virtual LockPtr makeLock(const String& lockName); /// Attempt to clear (forcefully unlock and remove) the specified lock. Only call this at a time when you /// are certain this lock is no longer in use. virtual void clearLock(const String& lockName); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/NormalizeCharMap.h000066400000000000000000000021431456444476200237210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NORMALIZECHARMAP_H #define NORMALIZECHARMAP_H #include "LuceneObject.h" namespace Lucene { /// Holds a map of String input to String output, to be used with {@link MappingCharFilter}. class LPPAPI NormalizeCharMap : public LuceneObject { public: NormalizeCharMap(); virtual ~NormalizeCharMap(); LUCENE_CLASS(NormalizeCharMap); public: MapCharNormalizeCharMap submap; String normStr; int32_t diff; public: /// Records a replacement to be applied to the inputs stream. Whenever singleMatch occurs in the input, it /// will be replaced with replacement. /// /// @param singleMatch input String to be replaced /// @param replacement output String void add(const String& singleMatch, const String& replacement); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/NormsWriter.h000066400000000000000000000026661456444476200230320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NORMSWRITER_H #define NORMSWRITER_H #include "InvertedDocEndConsumer.h" namespace Lucene { /// Writes norms. Each thread X field accumulates the norms for the doc/fields it saw, then the flush method /// below merges all of these together into a single _X.nrm file. class NormsWriter : public InvertedDocEndConsumer { public: NormsWriter(); virtual ~NormsWriter(); LUCENE_CLASS(NormsWriter); protected: FieldInfosPtr fieldInfos; public: virtual InvertedDocEndConsumerPerThreadPtr addThread(const DocInverterPerThreadPtr& docInverterPerThread); virtual void abort(); // We only write the _X.nrm file at flush virtual void files(HashSet files); virtual void setFieldInfos(const FieldInfosPtr& fieldInfos); /// Produce _X.nrm if any document had a field with norms not disabled virtual void flush(MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state); virtual void closeDocStore(const SegmentWriteStatePtr& state); protected: static uint8_t getDefaultNorm(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/NormsWriterPerField.h000066400000000000000000000025751456444476200244440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NORMSWRITERPERFIELD_H #define NORMSWRITERPERFIELD_H #include "InvertedDocEndConsumerPerField.h" namespace Lucene { /// Taps into DocInverter, as an InvertedDocEndConsumer, which is called at the end of inverting each field. /// We just look at the length for the field (docState.length) and record the norm. class NormsWriterPerField : public InvertedDocEndConsumerPerField { public: NormsWriterPerField(const DocInverterPerFieldPtr& docInverterPerField, const NormsWriterPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo); virtual ~NormsWriterPerField(); LUCENE_CLASS(NormsWriterPerField); public: NormsWriterPerThreadWeakPtr _perThread; FieldInfoPtr fieldInfo; DocStatePtr docState; // Holds all docID/norm pairs we've seen Collection docIDs; ByteArray norms; int32_t upto; FieldInvertStatePtr fieldState; public: void reset(); virtual void abort(); /// Compare two objects virtual int32_t compareTo(const LuceneObjectPtr& other); virtual void finish(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/NormsWriterPerThread.h000066400000000000000000000020771456444476200246250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NORMSWRITERPERTHREAD_H #define NORMSWRITERPERTHREAD_H #include "InvertedDocEndConsumerPerThread.h" namespace Lucene { class NormsWriterPerThread : public InvertedDocEndConsumerPerThread { public: NormsWriterPerThread(const DocInverterPerThreadPtr& docInverterPerThread, const NormsWriterPtr& normsWriter); virtual ~NormsWriterPerThread(); LUCENE_CLASS(NormsWriterPerThread); public: NormsWriterWeakPtr _normsWriter; DocStatePtr docState; public: virtual InvertedDocEndConsumerPerFieldPtr addField(const DocInverterPerFieldPtr& docInverterPerField, const FieldInfoPtr& fieldInfo); virtual void abort(); virtual void startDocument(); virtual void finishDocument(); bool freeRAM(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/NumberTools.h000066400000000000000000000040621456444476200230000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NUMBERTOOLS_H #define NUMBERTOOLS_H #include "LuceneObject.h" namespace Lucene { /// Provides support for converting longs to Strings, and back again. The strings are structured so that /// lexicographic sorting order is preserved. /// /// That is, if l1 is less than l2 for any two longs l1 and l2, then NumberTools.longToString(l1) is /// lexicographically less than NumberTools.longToString(l2). (Similarly for "greater than" and "equals".) /// /// This class handles all long values (unlike {@link DateField}). /// /// @deprecated For new indexes use {@link NumericUtils} instead, which provides a sortable binary representation /// (prefix encoded) of numeric values. /// To index and efficiently query numeric values use {@link NumericField} and {@link NumericRangeQuery}. This /// class is included for use with existing indices and will be removed in a future release (possibly Lucene 4.0). class LPPAPI NumberTools : public LuceneObject { public: virtual ~NumberTools(); LUCENE_CLASS(NumberTools); protected: static const int32_t RADIX; static const wchar_t NEGATIVE_PREFIX; // NB: NEGATIVE_PREFIX must be < POSITIVE_PREFIX static const wchar_t POSITIVE_PREFIX; public: /// Equivalent to longToString(LLONG_MIN) static const String& MIN_STRING_VALUE(); /// Equivalent to longToString(LLONG_MAX) static const String& MAX_STRING_VALUE(); /// The length of (all) strings returned by {@link #longToString} static int32_t STR_SIZE(); /// Converts a long to a String suitable for indexing. static String longToString(int64_t l); /// Converts a String that was returned by {@link #longToString} back to a long. static int64_t stringToLong(const String& str); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/NumericField.h000066400000000000000000000167211456444476200231020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NUMERICFIELD_H #define NUMERICFIELD_H #include "Field.h" namespace Lucene { /// This class provides a {@link Field} that enables indexing of numeric values for efficient range filtering and /// sorting. The native types int32_t, int64_t and double are directly supported. However, any value that can be /// converted into these native types can also be indexed. For example, date/time values represented by a {@link /// Date} can be translated into a int64_t value. If you don't need millisecond precision, you can quantize the /// value, either by dividing the result or using the separate getters (for year, month, etc.) to construct an int32_t /// or int64_t value. /// /// To perform range querying or filtering against a NumericField, use {@link NumericRangeQuery} or {@link /// NumericRangeFilter}. To sort according to a NumericField, use the normal numeric sort types, eg {@link /// SortField#INT}. NumericField values can also be loaded directly from {@link FieldCache}. /// /// By default, a NumericField's value is not stored but is indexed for range filtering and sorting. You can use the /// {@link #NumericField(String,Field.Store,boolean)} constructor if you need to change these defaults. /// /// You may add the same field name as a NumericField to the same document more than once. Range querying and /// filtering will be the logical OR of all values; so a range query will hit all documents that have at least one /// value in the range. However sort behavior is not defined. If you need to sort, you should separately index a /// single-valued NumericField. /// /// A NumericField will consume somewhat more disk space in the index than an ordinary single-valued field. However, /// for a typical index that includes substantial textual content per document, this increase will likely be in the /// noise. /// /// Within Lucene, each numeric value is indexed as a trie structure, where each term is logically assigned to larger /// and larger pre-defined brackets (which are simply lower-precision representations of the value). The step size /// between each successive bracket is called the precisionStep, measured in bits. Smaller precisionStep values /// result in larger number of brackets, which consumes more disk space in the index but may result in faster range /// search performance. The default value 4 was selected for a reasonable trade off of disk space consumption versus /// performance. You can use the expert constructor {@link #NumericField(String,int,Field.Store,boolean)} if you'd /// like to change the value. Note that you must also specify a congruent value when creating {@link NumericRangeQuery} /// or {@link NumericRangeFilter}. For low cardinality fields larger precision steps are good. If the cardinality /// is < 100, it is fair to use {@link INT_MAX}, which produces one term per value. /// /// For more information on the internals of numeric trie indexing, including the precisionStep configuration, see /// {@link NumericRangeQuery}. The format of indexed values is described in {@link NumericUtils}. /// /// If you only need to sort by numeric value, and never run range querying/filtering, you can index using a /// precisionStep of {@link MAX_INT}. This will minimize disk space consumed. /// /// More advanced users can instead use {@link NumericTokenStream} directly, when indexing numbers. This class is a /// wrapper around this token stream type for easier, more intuitive usage. /// /// NOTE: This class is only used during indexing. When retrieving the stored field value from a {@link Document} /// instance after search, you will get a conventional {@link Fieldable} instance where the numeric values are /// returned as strings (according to toString(value) of the used data type). class LPPAPI NumericField : public AbstractField { public: /// Creates a field for numeric values using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} /// (4). The instance is not yet initialized with a numeric value, before indexing a document containing this field, /// set a value using the various set???Value() methods. /// This constructor creates an indexed, but not stored field. /// @param name the field name NumericField(const String& name); /// Creates a field for numeric values using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} /// (4). The instance is not yet initialized with a numeric value, before indexing a document containing this field, /// set a value using the various set???Value() methods. /// This constructor creates an indexed, but not stored field. /// @param name the field name /// @param store if the field should be stored in plain text form (according to toString(value) of the used /// data type) /// @param index if the field should be indexed using {@link NumericTokenStream} NumericField(const String& name, Field::Store store, bool index); /// Creates a field for numeric values with the specified precisionStep. The instance is not yet initialized with /// a numeric value, before indexing a document containing this field, set a value using the various set???Value() /// methods. This constructor creates an indexed, but not stored field. /// @param name the field name /// @param precisionStep the used precision step NumericField(const String& name, int32_t precisionStep); /// Creates a field for numeric values with the specified precisionStep. The instance is not yet initialized with /// a numeric value, before indexing a document containing this field, set a value using the various set???Value() /// methods. This constructor creates an indexed, but not stored field. /// @param name the field name /// @param precisionStep the used precision step /// @param store if the field should be stored in plain text form (according to toString(value) of the used /// data type) /// @param index if the field should be indexed using {@link NumericTokenStream} NumericField(const String& name, int32_t precisionStep, Field::Store store, bool index); virtual ~NumericField(); LUCENE_CLASS(NumericField); protected: NumericTokenStreamPtr tokenStream; public: /// Returns a {@link NumericTokenStream} for indexing the numeric value. virtual TokenStreamPtr tokenStreamValue(); /// Returns always null for numeric fields virtual ByteArray getBinaryValue(ByteArray result); /// Returns always null for numeric fields virtual ReaderPtr readerValue(); /// Returns the numeric value as a string (how it is stored, when {@link Field.Store#YES} is chosen). virtual String stringValue(); /// Returns the current numeric value. virtual int64_t getNumericValue(); /// Initializes the field with the supplied long value. /// @param value the numeric value virtual NumericFieldPtr setLongValue(int64_t value); /// Initializes the field with the supplied int value. /// @param value the numeric value virtual NumericFieldPtr setIntValue(int32_t value); /// Initializes the field with the supplied double value. /// @param value the numeric value virtual NumericFieldPtr setDoubleValue(double value); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/NumericRangeFilter.h000066400000000000000000000106721456444476200242600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NUMERICRANGEFILTER_H #define NUMERICRANGEFILTER_H #include "MultiTermQueryWrapperFilter.h" namespace Lucene { /// A {@link Filter} that only accepts numeric values within a specified range. To use this, you must first /// index the numeric values using {@link NumericField} ({@link NumericTokenStream}). /// /// You create a new NumericRangeFilter with the static factory methods, eg: ///
/// FilterPtr f = NumericRangeFilter::newDoubleRange(L"weight", 0.3, 0.10, true, true);
/// 
/// accepts all documents whose double valued "weight" field ranges from 0.3 to 0.10, inclusive. /// /// See {@link NumericRangeQuery} for details on how Lucene indexes and searches numeric valued fields. class LPPAPI NumericRangeFilter : public MultiTermQueryWrapperFilter { public: NumericRangeFilter(const NumericRangeQueryPtr& query); virtual ~NumericRangeFilter(); LUCENE_CLASS(NumericRangeFilter); public: /// Factory that creates a NumericRangeFilter, that filters a long range using the given precisionStep. static NumericRangeFilterPtr newLongRange(const String& field, int32_t precisionStep, int64_t min, int64_t max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a long range using the default precisionStep /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). static NumericRangeFilterPtr newLongRange(const String& field, int64_t min, int64_t max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a int range using the given precisionStep. static NumericRangeFilterPtr newIntRange(const String& field, int32_t precisionStep, int32_t min, int32_t max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a int range using the default precisionStep /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). static NumericRangeFilterPtr newIntRange(const String& field, int32_t min, int32_t max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a double range using the given precisionStep. static NumericRangeFilterPtr newDoubleRange(const String& field, int32_t precisionStep, double min, double max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a double range using the default precisionStep /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). static NumericRangeFilterPtr newDoubleRange(const String& field, double min, double max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a int, long or double range using the given /// precisionStep. You can have half-open ranges (which are in fact <= or >= queries) by setting the min /// or max value to VariantUtils::null(). By setting inclusive to false it will match all documents /// excluding the bounds, with inclusive on the boundaries are hits, too. static NumericRangeFilterPtr newNumericRange(const String& field, int32_t precisionStep, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a int, long or double range range using the default /// precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). You can have half-open ranges (which are in /// fact <= or >= queries) by setting the min or max value to VariantUtils::null(). By setting inclusive to false /// it will match all documents excluding the bounds, with inclusive on the boundaries are hits, too. static NumericRangeFilterPtr newNumericRange(const String& field, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); /// Returns the field name for this filter String getField(); /// Returns true if the lower endpoint is inclusive bool includesMin(); /// Returns true if the upper endpoint is inclusive bool includesMax(); /// Returns the lower value of this range filter NumericValue getMin(); /// Returns the upper value of this range filter NumericValue getMax(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/NumericRangeQuery.h000066400000000000000000000262041456444476200241360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NUMERICRANGEQUERY_H #define NUMERICRANGEQUERY_H #include "MultiTermQuery.h" #include "FilteredTermEnum.h" #include "NumericUtils.h" namespace Lucene { /// A {@link Query} that matches numeric values within a specified range. To use this, you must first /// index the numeric values using {@link NumericField} (expert: {@link NumericTokenStream}). If your /// terms are instead textual, you should use {@link TermRangeQuery}. {@link NumericRangeFilter} is the /// filter equivalent of this query. /// /// You create a new NumericRangeQuery with the static factory methods, eg: ///
/// QueryPtr q = NumericRangeQuery::newDoubleRange("weight", 0.3, 0.10, true, true);
/// 
/// matches all documents whose double valued "weight" field ranges from 0.3 to 0.10, inclusive. /// /// The performance of NumericRangeQuery is much better than the corresponding {@link TermRangeQuery} /// because the number of terms that must be searched is usually far fewer, thanks to trie indexing, /// described below. /// /// You can optionally specify a precisionStep when creating this query. This is necessary if you've /// changed this configuration from its default (4) during indexing. Lower values consume more disk /// space but speed up searching. Suitable values are between 1 and 8. A good starting point to test /// is 4, which is the default value for all Numeric* classes. See below for details. /// /// This query defaults to {@linkplain MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} for 32 bit /// integer ranges with precisionStep <=8 and 64 bit (long/double) ranges with precisionStep <=6. /// Otherwise it uses {@linkplain MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE} as the number of terms /// is likely to be high. With precision steps of <=4, this query can be run with one of the BooleanQuery /// rewrite methods without changing BooleanQuery's default max clause count. /// /// How it works /// /// See the publication about panFMP, where this /// algorithm was described (referred to as TrieRangeQuery): ///
Schindler, U, Diepenbroek, M, 2008. /// Generic XML-based Framework for Metadata Portals. /// Computers & Geosciences 34 (12), 1947-1955. /// doi:10.1016/j.cageo.2008.02.023
/// /// A quote from this paper: Because Apache Lucene is a full-text search engine and not a conventional /// database, it cannot handle numerical ranges (eg., field value is inside user defined bounds, even /// dates are numerical values). We have developed an extension to Apache Lucene that stores the /// numerical values in a special string-encoded format with variable precision (all numerical values like /// doubles, longs, and ints are converted to lexicographic sortable string representations and stored /// with different precisions (for a more detailed description of how the values are stored, see {@link /// NumericUtils}). A range is then divided recursively into multiple intervals for searching: /// The center of the range is searched only with the lowest possible precision in the trie, while the /// boundaries are matched more exactly. This reduces the number of terms dramatically. /// /// For the variant that stores long values in 8 different precisions (each reduced by 8 bits) that uses a /// lowest precision of 1 byte, the index contains only a maximum of 256 distinct values in the lowest /// precision. Overall, a range could consist of a theoretical maximum of 7*255*2 + 255 = 3825 distinct /// terms (when there is a term for every distinct value of an 8-byte-number in the index and the range /// covers almost all of them; a maximum of 255 distinct values is used because it would always be possible /// to reduce the full 256 values to one term with degraded precision). In practice, we have seen up to /// 300 terms in most cases (index with 500,000 metadata records and a uniform value distribution). /// /// Precision Step: /// You can choose any precisionStep when encoding values. Lower step values mean more precisions and so /// more terms in index (and index gets larger). On the other hand, the maximum number of terms to match /// reduces, which optimized query speed. The formula to calculate the maximum term count is: ///
/// n = [ (bitsPerValue/precisionStep - 1) * (2 ^ precisionStep - 1 ) * 2 ] + (2 ^ precisionStep - 1 )
/// 
/// /// (this formula is only correct, when bitsPerValue/precisionStep is an integer; in other cases, the value /// must be rounded up and the last summand must contain the modulo of the division as precision step). /// For longs stored using a precision step of 4, n = 15*15*2 + 15 = 465, and for a precision step of 2, /// n = 31*3*2 + 3 = 189. But the faster search speed is reduced by more seeking in the term enum of the /// index. Because of this, the ideal precisionStep value can only be found out by testing. Important: You /// can index with a lower precision step value and test search speed using a multiple of the original step /// value. /// /// Good values for precisionStep are depending on usage and data type: ///
    ///
  • The default for all data types is 4, which is used, when no precisionStep is given. ///
  • Ideal value in most cases for 64 bit data types (long, double) is 6 or 8. ///
  • Ideal value in most cases for 32 bit data types (int) is 4. ///
  • For low cardinality fields larger precision steps are good. If the cardinality is < 100, it is /// fair to use {@link Integer#MAX_VALUE} (see below). ///
  • Steps >=64 for long/double and >=32 for int/float produces one token per value in the index and /// querying is as slow as a conventional {@link TermRangeQuery}. But it can be used to produce fields, /// that are solely used for sorting (in this case simply use {@link Integer#MAX_VALUE} as precisionStep). /// Using {@link NumericField NumericFields} for sorting is ideal, because building the field cache is much /// faster than with text-only numbers. These fields have one term per value and therefore also work with /// term enumeration for building distinct lists (eg. facets / preselected values to search for). /// Sorting is also possible with range query optimized fields using one of the above precisionSteps. ///
/// /// Comparisons of the different types of RangeQueries on an index with about 500,000 docs showed that /// {@link TermRangeQuery} in boolean rewrite mode (with raised {@link BooleanQuery} clause count) took /// about 30-40 secs to complete, {@link TermRangeQuery} in constant score filter rewrite mode took 5 secs /// and executing this class took <100ms to complete (on an Opteron64 machine, 8 bit precision step). This /// query type was developed for a geographic portal, where the performance for eg. bounding boxes or exact /// date/time stamps is important. class LPPAPI NumericRangeQuery : public MultiTermQuery { public: NumericRangeQuery(const String& field, int32_t precisionStep, int32_t valSize, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); virtual ~NumericRangeQuery(); LUCENE_CLASS(NumericRangeQuery); INTERNAL: String field; int32_t precisionStep; int32_t valSize; NumericValue min; NumericValue max; bool minInclusive; bool maxInclusive; public: using MultiTermQuery::toString; /// Factory that creates a NumericRangeFilter, that filters a long range using the given precisionStep. static NumericRangeQueryPtr newLongRange(const String& field, int32_t precisionStep, int64_t min, int64_t max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a long range using the default precisionStep /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). static NumericRangeQueryPtr newLongRange(const String& field, int64_t min, int64_t max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a int range using the given precisionStep. static NumericRangeQueryPtr newIntRange(const String& field, int32_t precisionStep, int32_t min, int32_t max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a int range using the default precisionStep /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). static NumericRangeQueryPtr newIntRange(const String& field, int32_t min, int32_t max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a double range using the given precisionStep. static NumericRangeQueryPtr newDoubleRange(const String& field, int32_t precisionStep, double min, double max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeFilter, that filters a double range using the default precisionStep /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). static NumericRangeQueryPtr newDoubleRange(const String& field, double min, double max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeQuery, that queries a int, long or double range using the given /// precisionStep. You can have half-open ranges (which are in fact <= or >= queries) by setting the min /// or max value to VariantUtils::null(). By setting inclusive to false it will match all documents /// excluding the bounds, with inclusive on the boundaries are hits, too. static NumericRangeQueryPtr newNumericRange(const String& field, int32_t precisionStep, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); /// Factory that creates a NumericRangeQuery, that queries a int, long or double range using the default /// precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). You can have half-open ranges (which /// are in fact <= or >= queries) by setting the min or max value to VariantUtils::null(). By setting /// inclusive to false it will match all documents excluding the bounds, with inclusive on the boundaries /// are hits, too. static NumericRangeQueryPtr newNumericRange(const String& field, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); /// Returns the field name for this query String getField(); /// Returns true if the lower endpoint is inclusive bool includesMin(); /// Returns true if the upper endpoint is inclusive bool includesMax(); /// Returns the lower value of this range query NumericValue getMin(); /// Returns the upper value of this range query NumericValue getMax(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual String toString(const String& field); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); protected: virtual FilteredTermEnumPtr getEnum(const IndexReaderPtr& reader); friend class NumericRangeTermEnum; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/NumericTokenStream.h000066400000000000000000000122621456444476200243070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NUMERICTOKENSTREAM_H #define NUMERICTOKENSTREAM_H #include "TokenStream.h" namespace Lucene { /// This class provides a {@link TokenStream} for indexing numeric values that can be used by {@link NumericRangeQuery} /// or {@link NumericRangeFilter}. /// /// Note that for simple usage, {@link NumericField} is recommended. {@link NumericField} disables norms and term freqs, /// as they are not usually needed during searching. If you need to change these settings, you should use this class. /// /// See {@link NumericField} for capabilities of fields indexed numerically. /// /// Here's an example usage, for an int field: /// /// FieldPtr field = newLucene(name, newLucene(precisionStep)->setIntValue(value)); /// field->setOmitNorms(true); /// field->setOmitTermFreqAndPositions(true); /// document->add(field); /// /// For optimal performance, re-use the TokenStream and Field instance for more than one document: /// /// NumericTokenStreamPtr stream = newLucene(precisionStep); /// FieldPtr field = newLucene(name, stream); /// field->setOmitNorms(true); /// field->setOmitTermFreqAndPositions(true); /// DocumentPtr document = newLucene(); /// document->add(field); /// /// for (all documents) /// { /// stream->setIntValue(value); /// writer->addDocument(document); /// } /// /// This stream is not intended to be used in analyzers; it's more for iterating the different precisions during /// indexing a specific numeric value. /// /// NOTE: as token streams are only consumed once the document is added to the index, if you index more than one /// numeric field, use a separate NumericTokenStream * instance for each. /// /// See {@link NumericRangeQuery} for more details on the precisionStep /// parameter as well as how numeric fields work under the hood. class LPPAPI NumericTokenStream : public TokenStream { public: /// Creates a token stream for numeric values using the default precisionStep {@link /// NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized, before using set a /// value using the various setValue() methods. NumericTokenStream(); /// Creates a token stream for numeric values with the specified precisionStep. The stream is not yet /// initialized, before using set a value using the various setValue() methods. NumericTokenStream(int32_t precisionStep); /// Creates a token stream for numeric values with the specified precisionStep using the given {@link /// AttributeSource}. The stream is not yet initialized, before using set a value using the various /// setValue() methods. NumericTokenStream(const AttributeSourcePtr& source, int32_t precisionStep); /// Creates a token stream for numeric values with the specified precisionStep using the given {@link /// AttributeFactory}. The stream is not yet initialized, before using set a value using the various /// setValue() methods. NumericTokenStream(const AttributeFactoryPtr& factory, int32_t precisionStep); virtual ~NumericTokenStream(); LUCENE_CLASS(NumericTokenStream); protected: TermAttributePtr termAtt; TypeAttributePtr typeAtt; PositionIncrementAttributePtr posIncrAtt; int32_t shift; int32_t valSize; // valSize == 0 means not initialized int32_t precisionStep; int64_t value; public: /// The full precision token gets this token type assigned. static const String& TOKEN_TYPE_FULL_PREC(); /// The lower precision tokens gets this token type assigned. static const String& TOKEN_TYPE_LOWER_PREC(); /// Initializes the token stream with the supplied long value. /// @param value the value, for which this TokenStream should enumerate tokens. /// @return this instance, because of this you can use it the following way: /// newLucene(name, newLucene(precisionStep)->setLongValue(value)) NumericTokenStreamPtr setLongValue(int64_t value); /// Initializes the token stream with the supplied int value. /// @param value the value, for which this TokenStream should enumerate tokens. /// @return this instance, because of this you can use it the following way: /// newLucene(name, newLucene(precisionStep)->setIntValue(value)) NumericTokenStreamPtr setIntValue(int32_t value); /// Initializes the token stream with the supplied double value. /// @param value the value, for which this TokenStream should enumerate tokens. /// @return this instance, because of this you can use it the following way: /// newLucene(name, newLucene(precisionStep)->setDoubleValue(value)) NumericTokenStreamPtr setDoubleValue(double value); virtual void reset(); virtual bool incrementToken(); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/NumericUtils.h000066400000000000000000000224071456444476200231550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NUMERICUTILS_H #define NUMERICUTILS_H #include "LuceneObject.h" namespace Lucene { /// This is a helper class to generate prefix-encoded representations for numerical values and supplies converters /// to represent double values as sortable integers/longs. /// /// To quickly execute range queries in Apache Lucene, a range is divided recursively into multiple intervals for /// searching: The center of the range is searched only with the lowest possible precision in the trie, while the /// boundaries are matched more exactly. This reduces the number of terms dramatically. /// /// This class generates terms to achieve this: First the numerical integer values need to be converted to strings. /// For that integer values (32 bit or 64 bit) are made unsigned and the bits are converted to ASCII chars with each /// 7 bit. The resulting string is sortable like the original integer value. Each value is also prefixed (in the /// first char) by the shift value (number of bits removed) used during encoding. /// /// To also index floating point numbers, this class supplies two methods to convert them to integer values by /// changing their bit layout: {@link #doubleToSortableLong}, {@link #doubleToSortableInt}. You will have no precision /// loss by converting floating point numbers to integers and back (only that the integer form is not usable). Other /// data types like dates can easily converted to longs or ints (eg. date to long). /// /// For easy usage, the trie algorithm is implemented for indexing inside {@link NumericTokenStream} that can index /// int, long, and double. For querying, {@link NumericRangeQuery} and {@link NumericRangeFilter} implement the query /// part for the same data types. /// /// This class can also be used, to generate lexicographically sortable (according {@link std::string#compare}) /// representations of numeric data types for other usages (eg. sorting). class LPPAPI NumericUtils : public LuceneObject { public: virtual ~NumericUtils(); LUCENE_CLASS(NumericUtils); public: /// The default precision step used by {@link NumericField}, {@link NumericTokenStream}, {@link NumericRangeQuery}, /// and {@link NumericRangeFilter} as default. static const int32_t PRECISION_STEP_DEFAULT; /// Longs are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_LONG + /// shift in the first character. static const wchar_t SHIFT_START_LONG; /// The maximum term length (used for char[] buffer size) for encoding long values. /// @see #longToPrefixCoded(long,int,char[]) static const int32_t BUF_SIZE_LONG; /// Integers are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_INT + /// shift in the first character. static const wchar_t SHIFT_START_INT; /// The maximum term length (used for char[] buffer size) for encoding int values. /// @see #intToPrefixCoded(int,int,char[]) static const int32_t BUF_SIZE_INT; public: /// Returns prefix coded bits after reducing the precision by shift bits. This is method is used by /// {@link NumericTokenStream}. /// @param val the numeric value /// @param shift how many bits to strip from the right /// @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_LONG} length /// @return number of chars written to buffer static int32_t longToPrefixCoded(int64_t val, int32_t shift, CharArray buffer); /// Returns prefix coded bits after reducing the precision by shift bits. This is method is used by /// {@link LongRangeBuilder}. /// @param val the numeric value /// @param shift how many bits to strip from the right static String longToPrefixCoded(int64_t val, int32_t shift); /// This is a convenience method, that returns prefix coded bits of a long without reducing the precision. /// It can be used to store the full precision value as a stored field in index. /// To decode, use {@link #prefixCodedToLong}. static String longToPrefixCoded(int64_t val); /// Returns prefix coded bits after reducing the precision by shift bits. This is method is used by {@link /// NumericTokenStream}. /// @param val the numeric value /// @param shift how many bits to strip from the right /// @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_INT} length /// @return number of chars written to buffer static int32_t intToPrefixCoded(int32_t val, int32_t shift, CharArray buffer); /// Returns prefix coded bits after reducing the precision by shift bits. This is method is used by {@link /// IntRangeBuilder}. /// @param val the numeric value /// @param shift how many bits to strip from the right static String intToPrefixCoded(int32_t val, int32_t shift); /// This is a convenience method, that returns prefix coded bits of an int without reducing the precision. /// It can be used to store the full precision value as a stored field in index. /// To decode, use {@link #prefixCodedToInt}. static String intToPrefixCoded(int32_t val); /// Returns a long from prefixCoded characters. Rightmost bits will be zero for lower precision codes. /// This method can be used to decode eg. a stored field. /// @see #longToPrefixCoded(int64_t) static int64_t prefixCodedToLong(const String& prefixCoded); /// Returns an int from prefixCoded characters. Rightmost bits will be zero for lower precision codes. /// This method can be used to decode eg. a stored field. /// @see #intToPrefixCoded(int32_t) static int32_t prefixCodedToInt(const String& prefixCoded); /// Converts a double value to a sortable signed long. The value is converted by getting their IEEE 754 /// floating-point "double format" bit layout and then some bits are swapped, to be able to compare the /// result as int64_t. By this the precision is not reduced, but the value can easily used as a int64_t. /// @see #sortableLongToDouble static int64_t doubleToSortableLong(double val); /// Convenience method: this just returns: longToPrefixCoded(doubleToSortableLong(val)) static String doubleToPrefixCoded(double val); /// Converts a sortable long back to a double. /// @see #doubleToSortableLong static double sortableLongToDouble(int64_t val); /// Convenience method: this just returns: sortableLongToDouble(prefixCodedToLong(val)) static double prefixCodedToDouble(const String& val); /// Splits a int64_t range recursively. You may implement a builder that adds clauses to a {@link BooleanQuery} /// for each call to its {@link LongRangeBuilder#addRange(String,String)} method. /// This method is used by {@link NumericRangeQuery}. static void splitLongRange(const LongRangeBuilderPtr& builder, int32_t precisionStep, int64_t minBound, int64_t maxBound); /// Splits an int32_t range recursively. You may implement a builder that adds clauses to a {@link BooleanQuery} /// for each call to its {@link IntRangeBuilder#addRange(String,String)} method. /// This method is used by {@link NumericRangeQuery}. static void splitIntRange(const IntRangeBuilderPtr& builder, int32_t precisionStep, int32_t minBound, int32_t maxBound); /// This helper does the splitting for both 32 and 64 bit. static void splitRange(const LuceneObjectPtr& builder, int32_t valSize, int32_t precisionStep, int64_t minBound, int64_t maxBound); /// Helper that delegates to correct range builder static void addRange(const LuceneObjectPtr& builder, int32_t valSize, int64_t minBound, int64_t maxBound, int32_t shift); }; /// Callback for {@link #splitLongRange}. You need to overwrite only one of the methods. /// NOTE: This is a very low-level interface, the method signatures may change in later versions. class LPPAPI LongRangeBuilder : public LuceneObject { public: virtual ~LongRangeBuilder(); public: /// Overwrite this method, if you like to receive the already prefix encoded range bounds. You can directly build /// classical (inclusive) range queries from them. virtual void addRange(const String& minPrefixCoded, const String& maxPrefixCoded); /// Overwrite this method, if you like to receive the raw long range bounds. You can use this for eg. debugging /// purposes (print out range bounds). virtual void addRange(int64_t min, int64_t max, int32_t shift); }; class LPPAPI IntRangeBuilder : public LuceneObject { public: virtual ~IntRangeBuilder(); public: /// Overwrite this method, if you like to receive the already prefix encoded range bounds. You can directly build /// classical range (inclusive) queries from them. virtual void addRange(const String& minPrefixCoded, const String& maxPrefixCoded); /// Overwrite this method, if you like to receive the raw int range bounds. You can use this for eg. debugging /// purposes (print out range bounds). virtual void addRange(int32_t min, int32_t max, int32_t shift); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/OffsetAttribute.h000066400000000000000000000034031456444476200236370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef OFFSETATTRIBUTE_H #define OFFSETATTRIBUTE_H #include "Attribute.h" namespace Lucene { /// The start and end character offset of a Token. class LPPAPI OffsetAttribute : public Attribute { public: OffsetAttribute(); virtual ~OffsetAttribute(); LUCENE_CLASS(OffsetAttribute); protected: int32_t _startOffset; int32_t _endOffset; public: virtual String toString(); /// Returns this Token's starting offset, the position of the first character corresponding to this token /// in the source text. /// /// Note that the difference between endOffset() and startOffset() may not be equal to termText.length(), /// as the term text may have been altered by a stemmer or some other filter. virtual int32_t startOffset(); /// Set the starting and ending offset. /// @see #startOffset() and #endOffset() virtual void setOffset(int32_t startOffset, int32_t endOffset); /// Returns this Token's ending offset, one greater than the position of the last character corresponding /// to this token in the source text. The length of the token in the source text is (endOffset - startOffset). virtual int32_t endOffset(); virtual void clear(); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual void copyTo(const AttributePtr& target); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/OpenBitSet.h000066400000000000000000000205111456444476200225400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef OPENBITSET_H #define OPENBITSET_H #include "DocIdSet.h" namespace Lucene { /// An "open" BitSet implementation that allows direct access to the array of words storing the bits. /// /// The goals of OpenBitSet are the fastest implementation possible, and maximum code reuse. Extra /// safety and encapsulation may always be built on top, but if that's built in, the cost can never /// be removed (and hence people re-implement their own version in order to get better performance). class LPPAPI OpenBitSet : public DocIdSet { public: /// Constructs an OpenBitSet large enough to hold numBits. OpenBitSet(int64_t numBits = 64); /// Constructs an OpenBitSet from an existing LongArray. /// /// The first 64 bits are in long[0], with bit index 0 at the least significant bit, and bit /// index 63 at the most significant. Given a bit index, the word containing it is long[index/64], /// and it is at bit number index%64 within that word. /// /// numWords are the number of elements in the array that contain set bits (non-zero longs). /// numWords should be <= bits.length(), and any existing words in the array at position >= /// numWords should be zero. OpenBitSet(LongArray bits, int32_t numWords); virtual ~OpenBitSet(); LUCENE_CLASS(OpenBitSet); protected: LongArray bits; int32_t wlen; // number of words (elements) used in the array public: virtual DocIdSetIteratorPtr iterator(); /// This DocIdSet implementation is cacheable. virtual bool isCacheable(); /// Returns the current capacity in bits (1 greater than the index of the last bit) int64_t capacity(); /// Returns the current capacity of this set. Included for compatibility. This is *not* /// equal to {@link #cardinality} int64_t size(); /// Returns true if there are no set bits bool isEmpty(); /// Returns the long[] storing the bits LongArray getBits(); /// Sets a new long[] to use as the bit storage void setBits(LongArray bits); /// Gets the number of longs in the array that are in use int32_t getNumWords(); /// Sets the number of longs in the array that are in use void setNumWords(int32_t numWords); /// Returns true or false for the specified bit index. bool get(int32_t index); /// Returns true or false for the specified bit index. /// The index should be less than the OpenBitSet size bool fastGet(int32_t index); /// Returns true or false for the specified bit index bool get(int64_t index); /// Returns true or false for the specified bit index. /// The index should be less than the OpenBitSet size. bool fastGet(int64_t index); /// Returns 1 if the bit is set, 0 if not. /// The index should be less than the OpenBitSet size int32_t getBit(int32_t index); /// Sets a bit, expanding the set size if necessary void set(int64_t index); /// Sets the bit at the specified index. /// The index should be less than the OpenBitSet size. void fastSet(int32_t index); /// Sets the bit at the specified index. /// The index should be less than the OpenBitSet size. void fastSet(int64_t index); /// Sets a range of bits, expanding the set size if necessary /// @param startIndex lower index /// @param endIndex one-past the last bit to set void set(int64_t startIndex, int64_t endIndex); /// Clears a bit. /// The index should be less than the OpenBitSet size. void fastClear(int32_t index); /// Clears a bit. /// The index should be less than the OpenBitSet size. void fastClear(int64_t index); /// Clears a bit, allowing access beyond the current set size without changing the size. void clear(int64_t index); /// Clears a range of bits. Clearing past the end does not change the size of the set. /// @param startIndex lower index /// @param endIndex one-past the last bit to clear void clear(int32_t startIndex, int32_t endIndex); /// Clears a range of bits. Clearing past the end does not change the size of the set. /// @param startIndex lower index /// @param endIndex one-past the last bit to clear void clear(int64_t startIndex, int64_t endIndex); /// Sets a bit and returns the previous value. /// The index should be less than the OpenBitSet size. bool getAndSet(int32_t index); /// Sets a bit and returns the previous value. /// The index should be less than the OpenBitSet size. bool getAndSet(int64_t index); /// Flips a bit. /// The index should be less than the OpenBitSet size. void fastFlip(int32_t index); /// Flips a bit. /// The index should be less than the OpenBitSet size. void fastFlip(int64_t index); /// Flips a bit, expanding the set size if necessary void flip(int64_t index); /// Flips a bit and returns the resulting bit value. /// The index should be less than the OpenBitSet size. bool flipAndGet(int32_t index); /// Flips a bit and returns the resulting bit value. /// The index should be less than the OpenBitSet size. bool flipAndGet(int64_t index); /// Flips a range of bits, expanding the set size if necessary /// @param startIndex lower index /// @param endIndex one-past the last bit to flip void flip(int64_t startIndex, int64_t endIndex); /// @return the number of set bits int64_t cardinality(); /// Returns the popcount or cardinality of the intersection of the two sets. /// Neither set is modified. static int64_t intersectionCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b); /// Returns the popcount or cardinality of the union of the two sets. /// Neither set is modified. static int64_t unionCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b); /// Returns the popcount or cardinality of "a and not b" or "intersection(a, not(b))". /// Neither set is modified. static int64_t andNotCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b); /// Returns the popcount or cardinality of the exclusive-or of the two sets. /// Neither set is modified. static int64_t xorCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b); /// Returns the index of the first set bit starting at the index specified. /// -1 is returned if there are no more set bits. int32_t nextSetBit(int32_t index); /// Returns the index of the first set bit starting at the index specified. /// -1 is returned if there are no more set bits. int64_t nextSetBit(int64_t index); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); /// this = this AND other void intersect(const OpenBitSetPtr& other); /// this = this OR other void _union(const OpenBitSetPtr& other); /// Remove all elements set in other. this = this AND_NOT other void remove(const OpenBitSetPtr& other); /// this = this XOR other void _xor(const OpenBitSetPtr& other); /// see {@link intersect} void _and(const OpenBitSetPtr& other); /// see {@link union} void _or(const OpenBitSetPtr& other); /// see {@link remove} void andNot(const OpenBitSetPtr& other); /// Returns true if the sets have any elements in common bool intersects(const OpenBitSetPtr& other); /// Expand the LongArray with the size given as a number of words (64 bit longs). /// getNumWords() is unchanged by this call. void ensureCapacityWords(int32_t numWords); /// Ensure that the LongArray is big enough to hold numBits, expanding it if necessary. /// getNumWords() is unchanged by this call. void ensureCapacity(int64_t numBits); /// Lowers numWords, the number of words in use, by checking for trailing zero words. void trimTrailingZeros(); /// Returns the number of 64 bit words it would take to hold numBits. static int32_t bits2words(int64_t numBits); /// Returns true if both sets have the same bits set virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); protected: int32_t expandingWordNum(int64_t index); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/OpenBitSetDISI.h000066400000000000000000000042241456444476200232140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef OPENBITSETDISI_H #define OPENBITSETDISI_H #include "OpenBitSet.h" namespace Lucene { class LPPAPI OpenBitSetDISI : public OpenBitSet { public: /// Construct an OpenBitSetDISI with its bits set from the doc ids of the given DocIdSetIterator. /// Also give a maximum size one larger than the largest doc id for which a bit may ever be set on /// this OpenBitSetDISI. OpenBitSetDISI(const DocIdSetIteratorPtr& disi, int32_t maxSize); /// Construct an OpenBitSetDISI with no bits set, and a given maximum size one larger than the largest /// doc id for which a bit may ever be set on this OpenBitSetDISI. OpenBitSetDISI(int32_t maxSize); virtual ~OpenBitSetDISI(); LUCENE_CLASS(OpenBitSetDISI); public: /// Perform an in-place OR with the doc ids from a given DocIdSetIterator, setting the bit for each /// such doc id. These doc ids should be smaller than the maximum size passed to the constructor. void inPlaceOr(const DocIdSetIteratorPtr& disi); /// Perform an in-place AND with the doc ids from a given DocIdSetIterator, leaving only the bits set /// for which the doc ids are in common. These doc ids should be smaller than the maximum size passed /// to the constructor. void inPlaceAnd(const DocIdSetIteratorPtr& disi); /// Perform an in-place NOT with the doc ids from a given DocIdSetIterator, clearing all the bits for /// each such doc id. These doc ids should be smaller than the maximum size passed to the constructor. void inPlaceNot(const DocIdSetIteratorPtr& disi); /// Perform an inplace XOR with the doc ids from a given DocIdSetIterator, flipping all the bits for /// each such doc id. These doc ids should be smaller than the maximum size passed to the constructor. void inPlaceXor(const DocIdSetIteratorPtr& disi); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/OpenBitSetIterator.h000066400000000000000000000030731456444476200242560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef OPENBITSETITERATOR_H #define OPENBITSETITERATOR_H #include "DocIdSetIterator.h" namespace Lucene { /// An iterator to iterate over set bits in an OpenBitSet. /// This is faster than nextSetBit() for iterating over the complete set of bits, /// especially when the density of the bits set is high. class LPPAPI OpenBitSetIterator : public DocIdSetIterator { public: OpenBitSetIterator(const OpenBitSetPtr& bitSet); OpenBitSetIterator(LongArray bits, int32_t numWords); virtual ~OpenBitSetIterator(); LUCENE_CLASS(OpenBitSetIterator); protected: LongArray arr; int32_t words; int32_t i; int64_t word; int32_t wordShift; int32_t indexArray; int32_t curDocId; /// The General Idea: instead of having an array per byte that has the offsets of the /// next set bit, that array could be packed inside a 32 bit integer (8 4 bit numbers). /// That should be faster than accessing an array for each index, and the total array /// size is kept smaller (256*sizeof(int32_t))=1K static const int32_t bitlist[]; public: virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); virtual int32_t docID(); protected: /// 64 bit shifts void shift(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/OrdFieldSource.h000066400000000000000000000040011456444476200233710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ORDFIELDSOURCE_H #define ORDFIELDSOURCE_H #include "ValueSource.h" namespace Lucene { /// Obtains the ordinal of the field value from the default Lucene {@link FieldCache} using getStringIndex(). /// /// The native lucene index order is used to assign an ordinal value for each field value. /// /// Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1. /// Example: /// If there were only three field values: "apple","banana","pear" then ord("apple")=1, ord("banana")=2, /// ord("pear")=3 /// /// WARNING: ord() depends on the position in an index and can thus change when other documents are inserted /// or deleted, or if a MultiSearcher is used. /// /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's /// best to switch your application to pass only atomic (single segment) readers to this API. Alternatively, /// for a short-term fix, you could wrap your ValueSource using {@link MultiValueSource}, which costs more CPU /// per lookup but will not consume double the FieldCache RAM. class LPPAPI OrdFieldSource : public ValueSource { public: /// Constructor for a certain field. ///@param field field whose values order is used. OrdFieldSource(const String& field); virtual ~OrdFieldSource(); LUCENE_CLASS(OrdFieldSource); protected: String field; public: virtual String description(); virtual DocValuesPtr getValues(const IndexReaderPtr& reader); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ParallelMultiSearcher.h000066400000000000000000000033021456444476200247470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PARALLELMULTISEARCHER_H #define PARALLELMULTISEARCHER_H #include "MultiSearcher.h" namespace Lucene { /// Implements parallel search over a set of Searchables. /// /// Applications usually need only call the inherited {@link #search(QueryPtr, int32_t)} or /// {@link #search(QueryPtr, FilterPtr, int32_t)} methods. class LPPAPI ParallelMultiSearcher : public MultiSearcher { public: /// Creates a {@link Searchable} which searches searchables. ParallelMultiSearcher(Collection searchables); virtual ~ParallelMultiSearcher(); LUCENE_CLASS(ParallelMultiSearcher); public: /// Executes each {@link Searchable}'s docFreq() in its own thread and waits for each search to /// complete and merge the results back together. virtual int32_t docFreq(const TermPtr& term); /// A search implementation which executes each {@link Searchable} in its own thread and waits /// for each search to complete and merge the results back together. virtual TopDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n); /// A search implementation allowing sorting which spans a new thread for each Searchable, waits /// for each search to complete and merges the results back together. virtual TopFieldDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ParallelReader.h000066400000000000000000000172001456444476200234040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PARALLELREADER_H #define PARALLELREADER_H #include "IndexReader.h" namespace Lucene { /// An IndexReader which reads multiple, parallel indexes. Each index added must have the same number of /// documents, but typically each contains different fields. Each document contains the union of the fields /// of all documents with the same document number. When searching, matches for a query term are from the /// first index added that has the field. /// /// This is useful, eg., with collections that have large fields which change rarely and small fields that /// change more frequently. The smaller fields may be re-indexed in a new index and both indexes may be /// searched together. /// /// Warning: It is up to you to make sure all indexes are created and modified the same way. For example, /// if you add documents to one index, you need to add the same documents in the same order to the other /// indexes. Failure to do so will result in undefined behavior class LPPAPI ParallelReader : public IndexReader { public: /// Construct a ParallelReader. /// @param closeSubReaders indicates whether the subreaders should be closed when this ParallelReader /// is closed ParallelReader(bool closeSubReaders = true); virtual ~ParallelReader(); LUCENE_CLASS(ParallelReader); protected: Collection readers; Collection decrefOnClose; // remember which subreaders to decRef on close bool incRefReaders; MapStringIndexReader fieldToReader; MapIndexReaderSetString readerToFields; Collection storedFieldReaders; int32_t _maxDoc; int32_t _numDocs; bool _hasDeletions; public: /// Add an IndexReader. void add(const IndexReaderPtr& reader); /// Add an IndexReader whose stored fields will not be returned. This can accelerate search when stored /// fields are only needed from a subset of the IndexReaders. void add(const IndexReaderPtr& reader, bool ignoreStoredFields); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); /// Tries to reopen the subreaders. /// /// If one or more subreaders could be re-opened (ie. subReader.reopen() returned a new instance != subReader), /// then a new ParallelReader instance is returned, otherwise this instance is returned. /// /// A re-opened instance might share one or more subreaders with the old instance. Index modification /// operations result in undefined behavior when performed before the old instance is closed. /// (see {@link IndexReader#reopen()}). /// /// If subreaders are shared, then the reference count of those readers is increased to ensure that the /// subreaders remain open until the last referring reader is closed. virtual IndexReaderPtr reopen(); /// Returns the number of documents in this index. virtual int32_t numDocs(); /// Returns one greater than the largest possible document number. This may be used to, eg., determine /// how big to allocate an array which will have an element for every document number in an index. virtual int32_t maxDoc(); /// Returns true if any documents have been deleted virtual bool hasDeletions(); /// Returns true if document n has been deleted virtual bool isDeleted(int32_t n); /// Get the {@link Document} at the n'th position. virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector); /// Return an array of term frequency vectors for the specified document. virtual Collection getTermFreqVectors(int32_t docNumber); /// Return a term frequency vector for the specified document and field. virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays /// of the {@link TermFreqVector}. virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper); /// Map all the term vectors for all fields in a Document virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper); /// Returns true if there are norms stored for this field. virtual bool hasNorms(const String& field); /// Returns the byte-encoded normalization factor for the named field of every document. virtual ByteArray norms(const String& field); /// Reads the byte-encoded normalization factor for the named field of every document. virtual void norms(const String& field, ByteArray norms, int32_t offset); /// Returns an enumeration of all the terms in the index. The enumeration is ordered by /// Term::compareTo(). Each term is greater than all that precede it in the enumeration. /// Note that after calling terms(), {@link TermEnum#next()} must be called on the resulting /// enumeration before calling other methods such as {@link TermEnum#term()}. virtual TermEnumPtr terms(); /// Returns an enumeration of all terms starting at a given term. If the given term does not /// exist, the enumeration is positioned at the first term greater than the supplied term. /// The enumeration is ordered by Term::compareTo(). Each term is greater than all that precede /// it in the enumeration. virtual TermEnumPtr terms(const TermPtr& t); /// Returns the number of documents containing the term t. virtual int32_t docFreq(const TermPtr& t); /// Returns an enumeration of all the documents which contain term. For each document, the /// document number, the frequency of the term in that document is also provided, for use in /// search scoring. If term is null, then all non-deleted docs are returned with freq=1. /// The enumeration is ordered by document number. Each document number is greater than all /// that precede it in the enumeration. virtual TermDocsPtr termDocs(const TermPtr& term); /// Returns an unpositioned {@link TermDocs} enumerator. virtual TermDocsPtr termDocs(); /// Returns an enumeration of all the documents which contain term. virtual TermPositionsPtr termPositions(const TermPtr& term); /// Returns an unpositioned {@link TermPositions} enumerator. virtual TermPositionsPtr termPositions(); /// Checks recursively if all subreaders are up to date. virtual bool isCurrent(); /// Checks recursively if all subindexes are optimized virtual bool isOptimized(); /// Not implemented. virtual int64_t getVersion(); Collection getSubReaders(); /// Get a list of unique field names that exist in this index and have the specified field option /// information. virtual HashSet getFieldNames(FieldOption fieldOption); protected: IndexReaderPtr doReopen(bool doClone); /// Implements deletion of the document numbered docNum. virtual void doDelete(int32_t docNum); /// Implements actual undeleteAll(). virtual void doUndeleteAll(); /// Implements setNorm in subclass. virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); /// Implements commit. virtual void doCommit(MapStringString commitUserData); /// Implements close. virtual void doClose(); friend class ParallelTermEnum; friend class ParallelTermDocs; friend class ParallelTermPositions; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Payload.h000066400000000000000000000057431456444476200221270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PAYLOAD_H #define PAYLOAD_H #include "LuceneObject.h" namespace Lucene { /// A Payload is metadata that can be stored together with each occurrence of a term. This metadata is stored /// inline in the posting list of the specific term. /// /// To store payloads in the index a {@link TokenStream} has to be used that produces payload data. /// /// Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)} to retrieve /// the payloads from the index. class LPPAPI Payload : public LuceneObject { public: /// Creates an empty payload and does not allocate a byte array. Payload(); /// Creates a new payload with the the given array as data. A reference to the passed-in array is held, /// ie. no copy is made. /// @param data the data of this payload Payload(ByteArray data); /// Creates a new payload with the the given array as data. A reference to the passed-in array is held, /// ie. no copy is made. /// @param data the data of this payload /// @param offset the offset in the data byte array /// @param length the length of the data Payload(ByteArray data, int32_t offset, int32_t length); virtual ~Payload(); LUCENE_CLASS(Payload); protected: /// the byte array containing the payload data ByteArray data; /// the offset within the byte array int32_t offset; /// the length of the payload data int32_t _length; public: /// Sets this payloads data. A reference to the passed-in array is held, ie. no copy is made. void setData(ByteArray data); /// Sets this payloads data. A reference to the passed-in array is held, ie. no copy is made. void setData(ByteArray data, int32_t offset, int32_t length); /// Returns a reference to the underlying byte array that holds this payloads data. ByteArray getData(); /// Returns the offset in the underlying byte array int32_t getOffset(); /// Returns the length of the payload data. int32_t length(); /// Returns the byte at the given index. uint8_t byteAt(int32_t index); /// Allocates a new byte array, copies the payload data into it and returns it. ByteArray toByteArray(); /// Copies the payload data to a byte array. /// @param target the target byte array /// @param targetOffset the offset in the target byte array void copyTo(ByteArray target, int32_t targetOffset); /// Clones this payload by creating a copy of the underlying byte array. virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PayloadAttribute.h000066400000000000000000000024401456444476200240020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PAYLOADATTRIBUTE_H #define PAYLOADATTRIBUTE_H #include "Attribute.h" namespace Lucene { /// The start and end character offset of a Token. class LPPAPI PayloadAttribute : public Attribute { public: /// Initialize this attribute with no payload. PayloadAttribute(); /// Initialize this attribute with the given payload. PayloadAttribute(const PayloadPtr& payload); virtual ~PayloadAttribute(); LUCENE_CLASS(PayloadAttribute); protected: PayloadPtr payload; public: virtual String toString(); /// Returns this Token's payload. virtual PayloadPtr getPayload(); /// Sets this Token's payload. virtual void setPayload(const PayloadPtr& payload); virtual void clear(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual void copyTo(const AttributePtr& target); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PayloadFunction.h000066400000000000000000000042131456444476200236240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PAYLOADFUNCTION_H #define PAYLOADFUNCTION_H #include "LuceneObject.h" namespace Lucene { /// An abstract class that defines a way for Payload*Query instances to transform the cumulative /// effects of payload scores for a document. /// /// @see PayloadTermQuery for more information class LPPAPI PayloadFunction : public LuceneObject { protected: PayloadFunction(); public: virtual ~PayloadFunction(); LUCENE_CLASS(PayloadFunction); public: /// Calculate the score up to this point for this doc and field /// @param docId The current doc /// @param field The field /// @param start The start position of the matching Span /// @param end The end position of the matching Span /// @param numPayloadsSeen The number of payloads seen so far /// @param currentScore The current score so far /// @param currentPayloadScore The score for the current payload /// @return The new current Score /// /// @see Spans virtual double currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, double currentScore, double currentPayloadScore) = 0; /// Calculate the final score for all the payloads seen so far for this doc/field /// @param docId The current doc /// @param field The current field /// @param numPayloadsSeen The total number of payloads seen on this document /// @param payloadScore The raw score for those payloads /// @return The final score for the payloads virtual double docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore) = 0; /// Return hash code for this object. virtual int32_t hashCode() = 0; /// Return whether two objects are equal virtual bool equals(const LuceneObjectPtr& other) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PayloadNearQuery.h000066400000000000000000000060761456444476200237630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PAYLOADNEARQUERY_H #define PAYLOADNEARQUERY_H #include "SpanNearQuery.h" #include "SpanWeight.h" #include "SpanScorer.h" namespace Lucene { /// This class is very similar to {@link SpanNearQuery} except that it factors in the value of the payloads /// located at each of the positions where the {@link TermSpans} occurs. /// /// In order to take advantage of this, you must override {@link Similarity#scorePayload} which returns 1 /// by default. /// /// Payload scores are aggregated using a pluggable {@link PayloadFunction}. /// /// @see Similarity#scorePayload class LPPAPI PayloadNearQuery : public SpanNearQuery { public: PayloadNearQuery(Collection clauses, int32_t slop, bool inOrder); PayloadNearQuery(Collection clauses, int32_t slop, bool inOrder, const PayloadFunctionPtr& function); virtual ~PayloadNearQuery(); LUCENE_CLASS(PayloadNearQuery); protected: String fieldName; PayloadFunctionPtr function; public: using SpanNearQuery::toString; virtual WeightPtr createWeight(const SearcherPtr& searcher); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual String toString(const String& field); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); friend class PayloadNearSpanWeight; friend class PayloadNearSpanScorer; }; class LPPAPI PayloadNearSpanWeight : public SpanWeight { public: PayloadNearSpanWeight(const SpanQueryPtr& query, const SearcherPtr& searcher); virtual ~PayloadNearSpanWeight(); LUCENE_CLASS(PayloadNearSpanWeight); public: virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); }; class LPPAPI PayloadNearSpanScorer : public SpanScorer { public: PayloadNearSpanScorer(const SpansPtr& spans, const WeightPtr& weight, const SimilarityPtr& similarity, ByteArray norms); virtual ~PayloadNearSpanScorer(); LUCENE_CLASS(PayloadNearSpanScorer); public: SpansPtr spans; SimilarityPtr similarity; protected: double payloadScore; int32_t payloadsSeen; public: /// Get the payloads associated with all underlying subspans void getPayloads(Collection subSpans); virtual double score(); protected: /// By default, uses the {@link PayloadFunction} to score the payloads, but can be overridden to do /// other things. /// @param payLoads The payloads /// @param start The start position of the span being scored /// @param end The end position of the span being scored /// @see Spans void processPayloads(Collection payLoads, int32_t start, int32_t end); virtual bool setFreqCurrentDoc(); virtual ExplanationPtr explain(int32_t doc); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PayloadSpanUtil.h000066400000000000000000000023711456444476200236010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PAYLOADSPANUTIL_H #define PAYLOADSPANUTIL_H #include "LuceneObject.h" namespace Lucene { /// Experimental class to get set of payloads for most standard Lucene queries. Operates like Highlighter - /// IndexReader should only contain doc of interest, best to use MemoryIndex. class LPPAPI PayloadSpanUtil : public LuceneObject { public: /// @param reader That contains doc with payloads to extract PayloadSpanUtil(const IndexReaderPtr& reader); virtual ~PayloadSpanUtil(); LUCENE_CLASS(PayloadSpanUtil); protected: IndexReaderPtr reader; public: /// Query should be rewritten for wild/fuzzy support. /// @return payloads Collection Collection getPayloadsForQuery(const QueryPtr& query); protected: void queryToSpanQuery(const QueryPtr& query, Collection payloads); void getPayloads(Collection payloads, const SpanQueryPtr& query); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PayloadTermQuery.h000066400000000000000000000030131456444476200237710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PAYLOADTERMQUERY_H #define PAYLOADTERMQUERY_H #include "SpanTermQuery.h" namespace Lucene { /// This class is very similar to {@link SpanTermQuery} except that it factors in the value of the payload /// located at each of the positions where the {@link Term} occurs. /// /// In order to take advantage of this, you must override {@link Similarity#scorePayload(int32_t, const String&, /// int32_t, int32_t, ByteArray, int32_t, int32_t)} which returns 1 by default. /// /// Payload scores are aggregated using a pluggable {@link PayloadFunction}. class LPPAPI PayloadTermQuery : public SpanTermQuery { public: PayloadTermQuery(const TermPtr& term, const PayloadFunctionPtr& function, bool includeSpanScore = true); virtual ~PayloadTermQuery(); LUCENE_CLASS(PayloadTermQuery); protected: PayloadFunctionPtr function; bool includeSpanScore; public: virtual WeightPtr createWeight(const SearcherPtr& searcher); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); friend class PayloadTermWeight; friend class PayloadTermSpanScorer; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PerFieldAnalyzerWrapper.h000066400000000000000000000055011456444476200252670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PERFIELDANALYZERWRAPPER_H #define PERFIELDANALYZERWRAPPER_H #include "Analyzer.h" namespace Lucene { /// This analyzer is used to facilitate scenarios where different fields require different analysis techniques. /// Use {@link #addAnalyzer} to add a non-default analyzer on a field name basis. /// /// Example usage: /// ///
/// PerFieldAnalyzerWrapperPtr aWrapper = newLucene(newLucene());
/// aWrapper->addAnalyzer(L"firstname", newLucene());
/// aWrapper->addAnalyzer(L"lastname", newLucene());
/// 
/// /// In this example, StandardAnalyzer will be used for all fields except "firstname" and "lastname", for which /// KeywordAnalyzer will be used. /// /// A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing and query parsing. class LPPAPI PerFieldAnalyzerWrapper : public Analyzer { public: /// Constructs with default analyzer. /// @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use the /// one provided here. PerFieldAnalyzerWrapper(const AnalyzerPtr& defaultAnalyzer); /// Constructs with default analyzer and a map of analyzers to use for specific fields. /// @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use the one provided here. /// @param fieldAnalyzers a Map (String field name to the Analyzer) to be used for those fields PerFieldAnalyzerWrapper(const AnalyzerPtr& defaultAnalyzer, MapStringAnalyzer fieldAnalyzers); virtual ~PerFieldAnalyzerWrapper(); LUCENE_CLASS(PerFieldAnalyzerWrapper); protected: AnalyzerPtr defaultAnalyzer; MapStringAnalyzer analyzerMap; public: /// Defines an analyzer to use for the specified field. /// @param fieldName field name requiring a non-default analyzer /// @param analyzer non-default analyzer to use for field void addAnalyzer(const String& fieldName, const AnalyzerPtr& analyzer); virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); /// Return the positionIncrementGap from the analyzer assigned to fieldName. virtual int32_t getPositionIncrementGap(const String& fieldName); /// Return the offsetGap from the analyzer assigned to field virtual int32_t getOffsetGap(const FieldablePtr& field); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PhrasePositions.h000066400000000000000000000022671456444476200236660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PHRASEPOSITIONS_H #define PHRASEPOSITIONS_H #include "LuceneObject.h" namespace Lucene { /// Position of a term in a document that takes into account the term offset within the phrase. class PhrasePositions : public LuceneObject { public: PhrasePositions(const TermPositionsPtr& t, int32_t o); virtual ~PhrasePositions(); LUCENE_CLASS(PhrasePositions); public: int32_t doc; // current doc int32_t position; // position in doc int32_t count; // remaining pos in this doc int32_t offset; // position in phrase TermPositionsPtr tp; // stream of positions PhrasePositions* __next = nullptr; // used to make lists bool repeats; // there's other pp for same term (eg. query="1st word 2nd word"~1) public: bool next(); bool skipTo(int32_t target); void firstPosition(); bool nextPosition(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PhraseQuery.h000066400000000000000000000056411456444476200230030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PHRASEQUERY_H #define PHRASEQUERY_H #include "Query.h" namespace Lucene { /// A Query that matches documents containing a particular sequence of terms. A PhraseQuery is built by /// QueryParser for input like "new york". /// /// This query may be combined with other terms or queries with a {@link BooleanQuery}. class LPPAPI PhraseQuery : public Query { public: /// Constructs an empty phrase query. PhraseQuery(); virtual ~PhraseQuery(); LUCENE_CLASS(PhraseQuery); protected: String field; Collection terms; Collection positions; int32_t maxPosition; int32_t slop; public: using Query::toString; /// Sets the number of other words permitted between words in query phrase. If zero, then this is an /// exact phrase search. For larger values this works like a WITHIN or NEAR operator. /// /// The slop is in fact an edit-distance, where the units correspond to moves of terms in the query phrase /// out of position. For example, to switch the order of two words requires two moves (the first move /// places the words atop one another), so to permit re-orderings of phrases, the slop must be at least two. /// /// More exact matches are scored higher than sloppier matches, thus search results are sorted by exactness. /// /// The slop is zero by default, requiring exact matches. void setSlop(int32_t slop); /// Returns the slop. /// @see #setSlop() int32_t getSlop(); /// Adds a term to the end of the query phrase. /// The relative position of the term is the one immediately after the last term added. void add(const TermPtr& term); /// Adds a term to the end of the query phrase. /// The relative position of the term within the phrase is specified explicitly. This allows eg. phrases /// with more than one term at the same position or phrases with gaps (eg. in connection with stopwords). void add(const TermPtr& term, int32_t position); /// Returns the set of terms in this phrase. Collection getTerms(); /// Returns the relative positions of terms in this phrase. Collection getPositions(); virtual WeightPtr createWeight(const SearcherPtr& searcher); virtual void extractTerms(SetTerm terms); /// Prints a user-readable version of this query. virtual String toString(const String& field); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); friend class PhraseWeight; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PhraseQueue.h000066400000000000000000000014141456444476200227540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PHRASEQUEUE_H #define PHRASEQUEUE_H #include "PriorityQueue.h" namespace Lucene { // raw pointer typedef PhrasePositions* PhrasePositionsStar; class PhraseQueue : public PriorityQueue { public: PhraseQueue(int32_t size); virtual ~PhraseQueue(); LUCENE_CLASS(PhraseQueue); protected: virtual bool lessThan(const PhrasePositionsStar& first, const PhrasePositionsStar& second); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PhraseScorer.h000066400000000000000000000050601456444476200231260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PHRASESCORER_H #define PHRASESCORER_H #include "Scorer.h" #include namespace Lucene { /// Scoring functionality for phrase queries. A document is considered matching if it contains the /// phrase-query terms at "valid" positions. What "valid positions" are depends on the type of the /// phrase query: for an exact phrase query terms are required to appear in adjacent locations, while /// for a sloppy phrase query some distance between the terms is allowed. The abstract method {@link /// #phraseFreq()} of extending classes is invoked for each document containing all the phrase query /// terms, in order to compute the frequency of the phrase query in that document. A non zero frequency /// means a match. class PhraseScorer : public Scorer { public: PhraseScorer(const WeightPtr& weight, Collection tps, Collection offsets, const SimilarityPtr& similarity, ByteArray norms); virtual ~PhraseScorer(); LUCENE_CLASS(PhraseScorer); protected: WeightPtr weight; Weight* __weight = nullptr; ByteArray norms; double value; bool firstTime; bool more; PhraseQueuePtr pq; std::vector _holds; PhrasePositions* __first = nullptr; PhrasePositions* __last = nullptr; double freq; // phrase frequency in current doc as computed by phraseFreq(). public: virtual int32_t docID(); virtual int32_t nextDoc(); virtual double score(); virtual int32_t advance(int32_t target); /// Phrase frequency in current doc as computed by phraseFreq(). double currentFreq(); virtual float termFreq(){ return currentFreq(); } virtual String toString(); protected: /// Next without initial increment bool doNext(); /// For a document containing all the phrase query terms, compute the frequency of the phrase in /// that document. A non zero frequency means a match. /// Note, that containing all phrase terms does not guarantee a match - they have to be found in /// matching locations. /// @return frequency of the phrase in current doc, 0 if not found. virtual double phraseFreq() = 0; void init(); void sort(); void pqToList(); void firstToLast(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PorterStemFilter.h000066400000000000000000000030161456444476200237770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PORTERSTEMFILTER_H #define PORTERSTEMFILTER_H #include "TokenFilter.h" namespace Lucene { /// Transforms the token stream as per the Porter stemming algorithm. Note: the input to the stemming filter must /// already be in lower case, so you will need to use LowerCaseFilter or LowerCaseTokenizer further down the Tokenizer /// chain in order for this to work properly. /// /// To use this filter with other analyzers, you'll want to write an Analyzer class that sets up the TokenStream chain /// as you want it. To use this with LowerCaseTokenizer, for example, you'd write an analyzer like this: /// ///
/// class MyAnalyzer : public Analyzer
/// {
/// public:
///     virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader)
///     {
///         return newLucene(newLucene(reader));
///     }
/// };
/// 
class LPPAPI PorterStemFilter : public TokenFilter { public: PorterStemFilter(const TokenStreamPtr& input); virtual ~PorterStemFilter(); LUCENE_CLASS(PorterStemFilter); protected: PorterStemmerPtr stemmer; TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PorterStemmer.h000066400000000000000000000103071456444476200233360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PORTERSTEMMER_H #define PORTERSTEMMER_H #include "LuceneObject.h" namespace Lucene { /// This is the Porter stemming algorithm, coded up as thread-safe ANSI C by the author. /// /// It may be be regarded as canonical, in that it follows the algorithm presented in Porter, 1980, An algorithm /// for suffix stripping, Program, Vol. 14, no. 3, pp 130-137, only differing from it at the points marked DEPARTURE. /// /// See also http://www.tartarus.org/~martin/PorterStemmer /// /// The algorithm as described in the paper could be exactly replicated by adjusting the points of DEPARTURE, but /// this is barely necessary, because (a) the points of DEPARTURE are definitely improvements, and (b) no encoding /// of the Porter stemmer I have seen is anything like as exact as this version, even with the points of DEPARTURE! /// /// Release 2 (the more old-fashioned, non-thread-safe version may be regarded as release 1.) class PorterStemmer : public LuceneObject { public: PorterStemmer(); virtual ~PorterStemmer(); LUCENE_CLASS(PorterStemmer); protected: wchar_t* b; // buffer for word to be stemmed int32_t k; // offset to the end of the string int32_t j; // a general offset into the string int32_t i; // initial length of word bool dirty; public: bool stem(CharArray word); /// In stem(b, k), b is a char pointer, and the string to be stemmed is from b[0] to b[k] inclusive. /// Possibly b[k+1] == '\0', but it is not important. The stemmer adjusts the characters b[0] ... b[k] and /// stores the new end-point of the string, k'. Stemming never increases word length, so 0 <= k' <= k. bool stem(wchar_t* b, int32_t k); wchar_t* getResultBuffer(); int32_t getResultLength(); protected: /// Returns true if b[i] is a consonant. ('b' means 'z->b', but here and below we drop 'z->' in comments. bool cons(int32_t i); /// Measures the number of consonant sequences between 0 and j. If c is a consonant sequence and v a vowel /// sequence, and <..> indicates arbitrary presence, /// /// gives 0 /// vc gives 1 /// vcvc gives 2 /// vcvcvc gives 3 /// ... int32_t m(); /// Return true if 0,...j contains a vowel bool vowelinstem(); /// Return true if j,(j-1) contain a double consonant. bool doublec(int32_t j); /// Return true if i-2,i-1,i has the form consonant - vowel - consonant and also if the second c is not w,x or y. /// This is used when trying to restore an e at the end of a short word. /// /// eg. cav(e), lov(e), hop(e), crim(e), but /// snow, box, tray. bool cvc(int32_t i); /// Returns true if 0,...k ends with the string s. bool ends(const wchar_t* s); /// Sets (j+1),...k to the characters in the string s, readjusting k. void setto(const wchar_t* s); void r(const wchar_t* s); /// step1ab() gets rid of plurals and -ed or -ing. eg. /// /// caresses -> caress /// ponies -> poni /// ties -> ti /// caress -> caress /// cats -> cat /// /// feed -> feed /// agreed -> agree /// disabled -> disable /// /// matting -> mat /// mating -> mate /// meeting -> meet /// milling -> mill /// messing -> mess /// /// meetings -> meet void step1ab(); /// Turns terminal y to i when there is another vowel in the stem. void step1c(); /// Maps double suffices to single ones. so -ization ( = -ize plus -ation) maps to -ize etc. note that the /// string before the suffix must give m() > 0. void step2(); /// Deals with -ic-, -full, -ness etc. similar strategy to step2. void step3(); /// Takes off -ant, -ence etc., in context vcvc. void step4(); /// Removes a final -e if m() > 1, and changes -ll to -l if m() > 1. void step5(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PositionBasedTermVectorMapper.h000066400000000000000000000052561456444476200264600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef POSITIONBASEDTERMVECTORMAPPER_H #define POSITIONBASEDTERMVECTORMAPPER_H #include "TermVectorMapper.h" namespace Lucene { class LPPAPI PositionBasedTermVectorMapper : public TermVectorMapper { public: PositionBasedTermVectorMapper(bool ignoringOffsets = false); virtual ~PositionBasedTermVectorMapper(); LUCENE_CLASS(PositionBasedTermVectorMapper); protected: MapStringMapIntTermVectorsPositionInfo fieldToTerms; String currentField; /// A Map of Integer and TermVectorsPositionInfo MapIntTermVectorsPositionInfo currentPositions; bool storeOffsets; public: /// Never ignores positions. This mapper doesn't make much sense unless there are positions. /// @return false virtual bool isIgnoringPositions(); /// Callback for the TermVectorReader. virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions); /// Callback mechanism used by the TermVectorReader. virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions); /// Get the mapping between fields and terms, sorted by the comparator /// @return A map between field names and a Map. The sub-Map key is the position as the integer, the value is /// {@link PositionBasedTermVectorMapper}. MapStringMapIntTermVectorsPositionInfo getFieldToTerms(); }; /// Container for a term at a position class LPPAPI TermVectorsPositionInfo : public LuceneObject { public: TermVectorsPositionInfo(int32_t position, bool storeOffsets); virtual ~TermVectorsPositionInfo(); LUCENE_CLASS(TermVectorsPositionInfo); protected: int32_t position; Collection terms; Collection offsets; public: void addTerm(const String& term, const TermVectorOffsetInfoPtr& info); /// @return The position of the term int32_t getPosition(); /// Note, there may be multiple terms at the same position /// @return A List of Strings Collection getTerms(); /// Parallel list (to {@link #getTerms()}) of TermVectorOffsetInfo objects. There may be multiple /// entries since there may be multiple terms at a position. /// @return A List of TermVectorOffsetInfo objects, if offsets are stored. Collection getOffsets(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PositionIncrementAttribute.h000066400000000000000000000045511456444476200260670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef POSITIONINCREMENTATTRIBUTE_H #define POSITIONINCREMENTATTRIBUTE_H #include "Attribute.h" namespace Lucene { /// The positionIncrement determines the position of this token relative to the previous Token in a /// TokenStream, used in phrase searching. /// /// The default value is one. /// /// Some common uses for this are: /// /// Set it to zero to put multiple terms in the same position. This is useful if, eg., a word has multiple /// stems. Searches for phrases including either stem will match. In this case, all but the first stem's /// increment should be set to zero: the increment of the first instance should be one. Repeating a token /// with an increment of zero can also be used to boost the scores of matches on that token. /// /// Set it to values greater than one to inhibit exact phrase matches. If, for example, one does not want /// phrases to match across removed stop words, then one could build a stop word filter that removes stop /// words and also sets the increment to the number of stop words removed before each non-stop word. Then /// exact phrase queries will only match when the terms occur with no intervening stop words. /// /// @see TermPositions class LPPAPI PositionIncrementAttribute : public Attribute { public: PositionIncrementAttribute(); virtual ~PositionIncrementAttribute(); LUCENE_CLASS(PositionIncrementAttribute); protected: int32_t positionIncrement; public: virtual String toString(); /// Set the position increment. The default value is one. /// @param positionIncrement the distance from the prior term virtual void setPositionIncrement(int32_t positionIncrement); /// Returns the position increment of this Token. /// @see #setPositionIncrement virtual int32_t getPositionIncrement(); virtual void clear(); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual void copyTo(const AttributePtr& target); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PositiveScoresOnlyCollector.h000066400000000000000000000021361456444476200262210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef POSITIVESCORESONLYCOLLECTOR_H #define POSITIVESCORESONLYCOLLECTOR_H #include "Collector.h" namespace Lucene { /// A {@link Collector} implementation which wraps another {@link Collector} and makes sure only /// documents with scores > 0 are collected. class LPPAPI PositiveScoresOnlyCollector : public Collector { public: PositiveScoresOnlyCollector(const CollectorPtr& c); virtual ~PositiveScoresOnlyCollector(); LUCENE_CLASS(PositiveScoresOnlyCollector); protected: CollectorPtr collector; ScorerPtr scorer; public: virtual void collect(int32_t doc); virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); virtual void setScorer(const ScorerPtr& scorer); virtual bool acceptsDocsOutOfOrder(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PrefixFilter.h000066400000000000000000000014431456444476200231320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PREFIXFILTER_H #define PREFIXFILTER_H #include "MultiTermQueryWrapperFilter.h" namespace Lucene { /// A Filter that restricts search results to values that have a matching prefix in a given field. class LPPAPI PrefixFilter : public MultiTermQueryWrapperFilter { public: PrefixFilter(const TermPtr& prefix); virtual ~PrefixFilter(); LUCENE_CLASS(PrefixFilter); public: TermPtr getPrefix(); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PrefixQuery.h000066400000000000000000000026071456444476200230150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PREFIXQUERY_H #define PREFIXQUERY_H #include "MultiTermQuery.h" namespace Lucene { /// A Query that matches documents containing terms with a specified prefix. A PrefixQuery is built by /// QueryParser for input like app*. /// /// This query uses the {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} rewrite method. class LPPAPI PrefixQuery : public MultiTermQuery { public: /// Constructs a query for terms starting with prefix. PrefixQuery(const TermPtr& prefix); virtual ~PrefixQuery(); LUCENE_CLASS(PrefixQuery); protected: TermPtr prefix; public: using MultiTermQuery::toString; /// Returns the prefix of this query. TermPtr getPrefix(); /// Prints a user-readable version of this query. virtual String toString(const String& field); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual int32_t hashCode(); virtual bool equals(const LuceneObjectPtr& other); protected: virtual FilteredTermEnumPtr getEnum(const IndexReaderPtr& reader); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PrefixTermEnum.h000066400000000000000000000021311456444476200234340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PREFIXTERMENUM_H #define PREFIXTERMENUM_H #include "FilteredTermEnum.h" namespace Lucene { /// Subclass of FilteredTermEnum for enumerating all terms that match the specified prefix filter term. /// /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than /// all that precede it. class LPPAPI PrefixTermEnum : public FilteredTermEnum { public: PrefixTermEnum(const IndexReaderPtr& reader, const TermPtr& prefix); virtual ~PrefixTermEnum(); LUCENE_CLASS(PrefixTermEnum); protected: TermPtr prefix; bool _endEnum; public: virtual double difference(); protected: virtual bool endEnum(); virtual bool termCompare(const TermPtr& term); TermPtr getPrefixTerm(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/PriorityQueue.h000066400000000000000000000151441456444476200233600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PRIORITYQUEUE_H #define PRIORITYQUEUE_H #include "LuceneObject.h" #include "MiscUtils.h" namespace Lucene { /// A PriorityQueue maintains a partial ordering of its elements such that the least element can always /// be found in constant time. Put()'s and pop()'s require log(size) time. /// /// NOTE: This class pre-allocates a full array of length maxSize + 1. template class PriorityQueue : public LuceneObject { public: typedef typename std::vector heap_type; PriorityQueue(int32_t maxSize) { this->_size = 0; this->_maxSize = maxSize; } virtual ~PriorityQueue() { } protected: heap_type heap; int32_t _size; int32_t _maxSize; public: virtual void initialize() { bool empty = heap.empty(); if (empty) { int32_t heapSize = 0; if (_maxSize == 0) { // We allocate 1 extra to avoid if statement in top() heapSize = 2; } else if (_maxSize == INT_MAX) { // Don't wrap heapSize to -1, in this case, which causes a confusing NegativeArraySizeException. // Note that very likely this will simply then hit an OOME, but at least that's more indicative // to caller that this values is too big. We don't +1 in this case, but it's very unlikely in // practice one will actually insert this many objects into the PQ heapSize = INT_MAX; } else { // NOTE: we add +1 because all access to heap is 1-based not 0-based. heap[0] is unused. heapSize = _maxSize + 1; } this->heap.resize(heapSize); } // If sentinel objects are supported, populate the queue with them TYPE sentinel = getSentinelObject(); if (empty && sentinel) { heap[1] = sentinel; for (int32_t i = 2; i < (int32_t)heap.size(); ++i) { heap[i] = getSentinelObject(); } _size = _maxSize; } } /// Return maximum size of queue int32_t maxSize() { return _maxSize; } /// Adds an Object to a PriorityQueue in log(size) time. If one tries to add more objects /// than maxSize from initialize an {@link IndexOutOfBoundsException} is thrown. TYPE add(const TYPE& type) { ++_size; if (_size < 0 || _size >= (int32_t)heap.size()) { boost::throw_exception(IndexOutOfBoundsException()); } heap[_size] = type; upHeap(); return heap[1]; } /// Adds an Object to a PriorityQueue in log(size) time. It returns the object (if any) that was /// dropped off the heap because it was full. This can be the given parameter (in case it is /// smaller than the full heap's minimum, and couldn't be added), or another object that was /// previously the smallest value in the heap and now has been replaced by a larger one, or null /// if the queue wasn't yet full with maxSize elements. TYPE addOverflow(const TYPE& type) { if (_size < _maxSize) { add(type); return TYPE(); } else if (_size > 0 && !lessThan(type, heap[1])) { TYPE result = heap[1]; heap[1] = type; updateTop(); return result; } else { return type; } } /// Returns the least element of the PriorityQueue. TYPE top() { // We don't need to check size here: if maxSize is 0, then heap is length 2 array with both // entries null. If size is 0 then heap[1] is already null. return heap[1]; } /// Removes and returns the least element of the PriorityQueue. TYPE pop() { if (_size > 0) { TYPE result = heap[1]; // save first value heap[1] = heap[_size]; // move last to first heap[_size--] = TYPE(); downHeap(); // adjust heap return result; } else { return TYPE(); } } /// Should be called when the Object at top changes values. TYPE updateTop() { downHeap(); return heap[1]; } /// Returns the number of elements currently stored in the PriorityQueue. int32_t size() const { return _size; } /// Returns whether PriorityQueue is currently empty. bool empty() const { return (_size == 0); } /// Removes all entries from the PriorityQueue. void clear() { for (int32_t i = 0; i <= _size; ++i) { heap[i] = TYPE(); } _size = 0; } protected: void upHeap() { int32_t i = _size; TYPE node = heap[i]; // save bottom node int32_t j = MiscUtils::unsignedShift(i, 1); while (j > 0 && lessThan(node, heap[j])) { heap[i] = heap[j]; // shift parents down i = j; j = MiscUtils::unsignedShift(j, 1); } heap[i] = node; // install saved node } void downHeap() { int32_t i = 1; TYPE node = heap[i]; // save top node int32_t j = i << 1; // find smaller child int32_t k = j + 1; if (k <= _size && lessThan(heap[k], heap[j])) { j = k; } while (j <= _size && lessThan(heap[j], node)) { heap[i] = heap[j]; // shift up child i = j; j = i << 1; k = j + 1; if (k <= _size && lessThan(heap[k], heap[j])) { j = k; } } heap[i] = node; // install saved node } /// Determines the ordering of objects in this priority queue. Subclasses must define this one method. virtual bool lessThan(const TYPE& first, const TYPE& second) { return std::less()(first, second); } /// This method can be overridden by extending classes to return a sentinel object which will be used by /// {@link #initialize} to fill the queue, so that the code which uses that queue can always assume it's /// full and only change the top without attempting to insert any new object. /// /// Those sentinel values should always compare worse than any non-sentinel value (ie., {@link #lessThan} /// should always favour the non-sentinel values). virtual TYPE getSentinelObject() { return TYPE(); } }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Query.h000066400000000000000000000106211456444476200216320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERY_H #define QUERY_H #include "LuceneObject.h" namespace Lucene { /// The abstract base class for queries. /// /// Instantiable subclasses are: /// /// {@link TermQuery} /// {@link MultiTermQuery} /// {@link BooleanQuery} /// {@link WildcardQuery} /// {@link PhraseQuery} /// {@link PrefixQuery} /// {@link MultiPhraseQuery} /// {@link FuzzyQuery} /// {@link TermRangeQuery} /// {@link NumericRangeQuery} /// {@link org.apache.lucene.search.spans.SpanQuery} /// /// A parser for queries is contained in: {@link QueryParser} class LPPAPI Query : public LuceneObject { public: Query(); virtual ~Query(); LUCENE_CLASS(Query); protected: double boost; // query boost factor public: /// Sets the boost for this query clause to b. Documents matching this clause will (in addition to /// the normal weightings) have their score multiplied by b. virtual void setBoost(double b); /// Gets the boost for this clause. Documents matching this clause will (in addition to the normal /// weightings) have their score multiplied by b. The boost is 1.0 by default. virtual double getBoost(); /// Prints a query to a string, with field assumed to be the default field and omitted. /// /// The representation used is one that is supposed to be readable by {@link QueryParser}. However, /// there are the following limitations: /// /// If the query was created by the parser, the printed representation may not be exactly what was /// parsed. For example, characters that need to be escaped will be represented without the required /// backslash. /// /// Some of the more complicated queries (eg. span queries) don't have a representation that can be /// parsed by QueryParser. virtual String toString(const String& field); /// Prints a query to a string. virtual String toString(); /// Constructs an appropriate Weight implementation for this query. /// Only implemented by primitive queries, which re-write to themselves. virtual WeightPtr createWeight(const SearcherPtr& searcher); /// Constructs and initializes a Weight for a top-level query. virtual WeightPtr weight(const SearcherPtr& searcher); /// Called to re-write queries into primitive queries. For example, a PrefixQuery will be rewritten /// into a BooleanQuery that consists of TermQuerys. virtual QueryPtr rewrite(const IndexReaderPtr& reader); /// Called when re-writing queries under MultiSearcher. /// /// Create a single query suitable for use by all subsearchers (in 1-1 correspondence with queries). /// This is an optimization of the OR of all queries. We handle the common optimization cases of equal /// queries and overlapping clauses of boolean OR queries (as generated by MultiTermQuery.rewrite()). /// Be careful overriding this method as queries[0] determines which method will be called and is not /// necessarily of the same type as the other queries. virtual QueryPtr combine(Collection queries); /// Adds all terms occurring in this query to the terms set. Only works if this query is in its /// {@link #rewrite rewritten} form. virtual void extractTerms(SetTerm terms); /// Merges the clauses of a set of BooleanQuery's into a single BooleanQuery. /// /// A utility for use by {@link #combine(Query[])} implementations. static QueryPtr mergeBooleanQueries(Collection queries); /// Returns the Similarity implementation to be used for this query. Subclasses may override this method /// to specify their own Similarity implementation, perhaps one that delegates through that of the Searcher. /// By default the Searcher's Similarity implementation is returned. virtual SimilarityPtr getSimilarity(const SearcherPtr& searcher); /// Returns a clone of this query. virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual int32_t hashCode(); virtual bool equals(const LuceneObjectPtr& other); /// Return given boost value as a string. String boostString(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/QueryParseError.h000066400000000000000000000044531456444476200236450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYPARSEERROR_H #define QUERYPARSEERROR_H #include "LuceneObject.h" namespace Lucene { /// Utility class to handle query parse errors class QueryParseError : public LuceneObject { public: virtual ~QueryParseError(); LUCENE_CLASS(QueryParseError); public: /// Returns a detailed message for the Error when it is thrown by the token manager to indicate a /// lexical error. /// @param EOFSeen Indicates if EOF caused the lexical error /// @param curLexState Lexical state in which this error occurred /// @param errorLine Line number when the error occurred /// @param errorColumn Column number when the error occurred /// @param errorAfter Prefix that was seen before this error occurred /// @param curChar The offending character static String lexicalError(bool EOFSeen, int32_t lexState, int32_t errorLine, int32_t errorColumn, const String& errorAfter, wchar_t curChar); /// Generate a parse error message and returns it. /// @param currentToken This is the last token that has been consumed successfully. If this object /// has been created due to a parse error, the token following this token will (therefore) be the first /// error token. /// @param expectedTokenSequences Each entry in this array is an array of integers. Each array of /// integers represents a sequence of tokens (by their ordinal values) that is expected at this point /// of the parse. /// @param tokenImage This is a reference to the "tokenImage" array of the generated parser within /// which the parse error occurred. static String parseError(const QueryParserTokenPtr& currentToken, Collection< Collection > expectedTokenSequences, Collection tokenImage); protected: /// Replaces unprintable characters by their escaped (or unicode escaped) equivalents in the /// given string static String addEscapes(const String& str); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/QueryParser.h000066400000000000000000000462541456444476200230220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYPARSER_H #define QUERYPARSER_H #include "QueryParserConstants.h" #include "DateTools.h" #include "BooleanClause.h" namespace Lucene { typedef HashMap MapStringResolution; /// The most important method is {@link #parse(const String&)}. /// /// The syntax for query strings is as follows: /// A Query is a series of clauses. /// A clause may be prefixed by: ///
    ///
  • a plus (+) or a minus (-) sign, indicating that the clause is required or prohibited respectively; or ///
  • a term followed by a colon, indicating the field to be searched. This enables one to construct queries /// which search multiple fields. ///
/// /// A clause may be either: ///
    ///
  • a term, indicating all the documents that contain this term; or ///
  • a nested query, enclosed in parentheses. Note that this may be used with a +/- prefix to require any /// of a set of terms. ///
/// /// Thus, in BNF, the query grammar is: ///
/// Query  ::= ( Clause )*
/// Clause ::= ["+", "-"] [ ":"] (  | "(" Query ")" )
/// 
/// /// Examples of appropriately formatted queries can be found in the query syntax documentation. /// /// In {@link TermRangeQuery}s, QueryParser tries to detect date values, eg. /// date:[6/1/2005 TO 6/4/2005] produces a range query that searches for "date" fields between /// 2005-06-01 and 2005-06-04. Note that the format of the accepted input depends on {@link #setLocale(Locale) /// the locale}. /// /// By default a date is converted into a search term using the deprecated {@link DateField} for compatibility /// reasons. To use the new {@link DateTools} to convert dates, a {@link Resolution} has to be set. /// /// The date resolution that shall be used for RangeQueries can be set using {@link #setDateResolution(Resolution)} /// or {@link #setDateResolution(const String&, Resolution)}. The former sets the default date resolution for /// all fields, whereas the latter can be used to set field specific date resolutions. Field specific date /// resolutions take, if set, precedence over the default date resolution. /// /// If you use neither {@link DateField} nor {@link DateTools} in your index, you can create your own query /// parser that inherits QueryParser and overwrites {@link #getRangeQuery(const String&, const String&, /// const String&, bool)} to use a different method for date conversion. /// /// Note that QueryParser is not thread-safe. /// /// NOTE: there is a new QueryParser in contrib, which matches the same syntax as this class, but is more modular, /// enabling substantial customization to how a query is created. /// /// NOTE: You must specify the required {@link Version} compatibility when creating QueryParser: ///
    ///
  • As of 2.9, {@link #setEnablePositionIncrements} is true by default. ///
class LPPAPI QueryParser : public QueryParserConstants, public LuceneObject { public: /// Constructs a query parser. /// @param matchVersion Lucene version to match. /// @param field The default field for query terms. /// @param analyzer Used to find terms in the query text. QueryParser(LuceneVersion::Version matchVersion, const String& field, const AnalyzerPtr& analyzer); /// Constructor with user supplied QueryParserCharStream. QueryParser(const QueryParserCharStreamPtr& stream); /// Constructor with generated Token Manager. QueryParser(const QueryParserTokenManagerPtr& tokenMgr); virtual ~QueryParser(); LUCENE_CLASS(QueryParser); /// The default operator for parsing queries. Use {@link QueryParser#setDefaultOperator} to change it. enum Operator { OR_OPERATOR, AND_OPERATOR }; protected: static const int32_t CONJ_NONE; static const int32_t CONJ_AND; static const int32_t CONJ_OR; static const int32_t MOD_NONE; static const int32_t MOD_NOT; static const int32_t MOD_REQ; /// The actual operator that parser uses to combine query terms Operator _operator; /// Next token. int32_t _jj_ntk; QueryParserTokenPtr jj_scanpos; QueryParserTokenPtr jj_lastpos; int32_t jj_la; int32_t jj_gen; Collection jj_la1; static const int32_t jj_la1_0[]; static const int32_t jj_la1_1[]; struct JJCalls; typedef boost::shared_ptr JJCallsPtr; struct JJCalls { JJCalls() { gen = 0; arg = 0; } int32_t gen; QueryParserTokenPtr first; int32_t arg; JJCallsPtr next; }; Collection jj_2_rtns; bool jj_rescan; int32_t jj_gc; Collection< Collection > jj_expentries; Collection jj_expentry; int32_t jj_kind; Collection jj_lasttokens; int32_t jj_endpos; public: bool lowercaseExpandedTerms; RewriteMethodPtr multiTermRewriteMethod; bool allowLeadingWildcard; bool enablePositionIncrements; AnalyzerPtr analyzer; String field; int32_t phraseSlop; double fuzzyMinSim; int32_t fuzzyPrefixLength; std::locale locale; // the default date resolution DateTools::Resolution dateResolution; // maps field names to date resolutions MapStringResolution fieldToDateResolution; // The collator to use when determining range inclusion, for use when constructing RangeQuerys CollatorPtr rangeCollator; /// Generated Token Manager. QueryParserTokenManagerPtr token_source; /// Current token. QueryParserTokenPtr token; /// Next token. QueryParserTokenPtr jj_nt; public: /// Parses a query string, returning a {@link Query}. /// @param query The query string to be parsed. QueryPtr parse(const String& query); /// @return Returns the analyzer. AnalyzerPtr getAnalyzer(); /// @return Returns the field. String getField(); /// Get the minimal similarity for fuzzy queries. double getFuzzyMinSim(); /// Set the minimum similarity for fuzzy queries. Default is 0.5. void setFuzzyMinSim(double fuzzyMinSim); /// Get the prefix length for fuzzy queries. /// @return Returns the fuzzyPrefixLength. int32_t getFuzzyPrefixLength(); /// Set the prefix length for fuzzy queries. Default is 0. /// @param fuzzyPrefixLength The fuzzyPrefixLength to set. void setFuzzyPrefixLength(int32_t fuzzyPrefixLength); /// Sets the default slop for phrases. If zero, then exact phrase matches are required. /// Default value is zero. void setPhraseSlop(int32_t phraseSlop); /// Gets the default slop for phrases. int32_t getPhraseSlop(); /// Set to true to allow leading wildcard characters. /// /// When set, * or ? are allowed as the first character of a PrefixQuery and WildcardQuery. /// Note that this can produce very slow queries on big indexes. Default: false. void setAllowLeadingWildcard(bool allowLeadingWildcard); /// @see #setAllowLeadingWildcard(bool) bool getAllowLeadingWildcard(); /// Set to true to enable position increments in result query. /// /// When set, result phrase and multi-phrase queries will be aware of position increments. /// Useful when eg. a StopFilter increases the position increment of the token that follows an /// omitted token. Default: false. void setEnablePositionIncrements(bool enable); /// @see #setEnablePositionIncrements(bool) bool getEnablePositionIncrements(); /// Sets the boolean operator of the QueryParser. In default mode (OR_OPERATOR) terms without /// any modifiers are considered optional: for example capital of Hungary is equal to capital /// OR of OR Hungary. /// In AND_OPERATOR mode terms are considered to be in conjunction: the above mentioned query is /// parsed as capital AND of AND Hungary void setDefaultOperator(Operator op); /// Gets implicit operator setting, which will be either AND_OPERATOR or OR_OPERATOR. Operator getDefaultOperator(); /// Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically lower-cased /// or not. Default is true. void setLowercaseExpandedTerms(bool lowercaseExpandedTerms); /// @see #setLowercaseExpandedTerms(bool) bool getLowercaseExpandedTerms(); /// By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} when /// creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable /// because it a) Runs faster b) Does not have the scarcity of terms unduly influence score c) avoids /// any "TooManyClauses" exception. However, if your application really needs to use the old- /// fashioned BooleanQuery expansion rewriting and the above points are not relevant then use this /// to change the rewrite method. void setMultiTermRewriteMethod(const RewriteMethodPtr& method); /// @see #setMultiTermRewriteMethod RewriteMethodPtr getMultiTermRewriteMethod(); /// Set locale used by date range parsing. void setLocale(std::locale locale); /// Returns current locale, allowing access by subclasses. std::locale getLocale(); /// Sets the default date resolution used by RangeQueries for fields for which no specific date /// resolutions has been set. Field specific resolutions can be set with {@link /// #setDateResolution(const String&, DateTools::Resolution)}. /// @param dateResolution The default date resolution to set void setDateResolution(DateTools::Resolution dateResolution); /// Sets the date resolution used by RangeQueries for a specific field. /// @param fieldName Field for which the date resolution is to be set /// @param dateResolution Date resolution to set void setDateResolution(const String& fieldName, DateTools::Resolution dateResolution); /// Returns the date resolution that is used by RangeQueries for the given field. Returns null, if /// no default or field specific date resolution has been set for the given field. DateTools::Resolution getDateResolution(const String& fieldName); /// Sets the collator used to determine index term inclusion in ranges for RangeQuerys. /// /// WARNING: Setting the rangeCollator to a non-null collator using this method will cause every /// single index Term in the Field referenced by lowerTerm and/or upperTerm to be examined. Depending /// on the number of index Terms in this Field, the operation could be very slow. /// @param rc The collator to use when constructing RangeQuerys void setRangeCollator(const CollatorPtr& rc); /// @return the collator used to determine index term inclusion in ranges for RangeQuerys. CollatorPtr getRangeCollator(); /// Command line tool to test QueryParser, using {@link SimpleAnalyzer}. static int main(Collection args); /// Query ::= ( Clause )* /// Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) int32_t Conjunction(); int32_t Modifiers(); /// This makes sure that there is no garbage after the query string virtual QueryPtr TopLevelQuery(const String& field); virtual QueryPtr ParseQuery(const String& field); virtual QueryPtr ParseClause(const String& field); virtual QueryPtr ParseTerm(const String& field); /// Reinitialise. virtual void ReInit(const QueryParserCharStreamPtr& stream); /// Reinitialise. virtual void ReInit(const QueryParserTokenManagerPtr& tokenMgr); /// Get the next Token. virtual QueryParserTokenPtr getNextToken(); /// Get the specific Token. virtual QueryParserTokenPtr getToken(int32_t index); /// Generate QueryParserError exception. virtual void generateParseException(); /// Enable tracing. virtual void enable_tracing(); /// Disable tracing. virtual void disable_tracing(); protected: /// Construct query parser with supplied QueryParserCharStream or TokenManager void ConstructParser(const QueryParserCharStreamPtr& stream, const QueryParserTokenManagerPtr& tokenMgr); virtual void addClause(Collection clauses, int32_t conj, int32_t mods, const QueryPtr& q); /// Use the analyzer to get all the tokens, and then build a TermQuery, PhraseQuery, or nothing /// based on the term count. virtual QueryPtr getFieldQuery(const String& field, const String& queryText); /// Base implementation delegates to {@link #getFieldQuery(const String&, const String&)}. /// This method may be overridden, for example, to return a SpanNearQuery instead of a PhraseQuery. virtual QueryPtr getFieldQuery(const String& field, const String& queryText, int32_t slop); /// Builds a new TermRangeQuery instance for given min/max parts virtual QueryPtr getRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive); /// Builds a new BooleanQuery instance /// @param disableCoord disable coord BooleanQueryPtr newBooleanQuery(bool disableCoord); /// Builds a new BooleanClause instance /// @param q sub query /// @param occur how this clause should occur when matching documents /// @return new BooleanClause instance BooleanClausePtr newBooleanClause(const QueryPtr& q, BooleanClause::Occur occur); /// Builds a new TermQuery instance /// @param term term /// @return new TermQuery instance QueryPtr newTermQuery(const TermPtr& term); /// Builds a new PhraseQuery instance /// @return new PhraseQuery instance PhraseQueryPtr newPhraseQuery(); /// Builds a new MultiPhraseQuery instance /// @return new MultiPhraseQuery instance MultiPhraseQueryPtr newMultiPhraseQuery(); /// Builds a new PrefixQuery instance /// @param prefix Prefix term /// @return new PrefixQuery instance QueryPtr newPrefixQuery(const TermPtr& prefix); /// Builds a new FuzzyQuery instance /// @param term Term /// @param minimumSimilarity minimum similarity /// @param prefixLength prefix length /// @return new FuzzyQuery Instance QueryPtr newFuzzyQuery(const TermPtr& term, double minimumSimilarity, int32_t prefixLength); /// Builds a new TermRangeQuery instance /// @param field Field /// @param part1 min /// @param part2 max /// @param inclusive true if range is inclusive /// @return new TermRangeQuery instance QueryPtr newRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive); /// Builds a new MatchAllDocsQuery instance /// @return new MatchAllDocsQuery instance QueryPtr newMatchAllDocsQuery(); /// Builds a new WildcardQuery instance /// @param t wildcard term /// @return new WildcardQuery instance QueryPtr newWildcardQuery(const TermPtr& term); /// Factory method for generating query, given a set of clauses. By default creates a boolean query /// composed of clauses passed in. /// /// Can be overridden by extending classes, to modify query being returned. /// /// @param clauses List that contains {@link BooleanClause} instances to join. /// @return Resulting {@link Query} object. virtual QueryPtr getBooleanQuery(Collection clauses); /// Factory method for generating query, given a set of clauses. By default creates a boolean query /// composed of clauses passed in. /// /// Can be overridden by extending classes, to modify query being returned. /// /// @param clauses List that contains {@link BooleanClause} instances to join. /// @param disableCoord true if coord scoring should be disabled. /// @return Resulting {@link Query} object. virtual QueryPtr getBooleanQuery(Collection clauses, bool disableCoord); /// Factory method for generating a query. Called when parser parses an input term token that contains /// one or more wildcard characters (? and *), but is not a prefix term token (one that has just a /// single * character at the end) /// /// Depending on settings, prefix term may be lower-cased automatically. It will not go through the /// default Analyzer, however, since normal Analyzers are unlikely to work properly with wildcard /// templates. /// /// Can be overridden by extending classes, to provide custom handling for wildcard queries, which may /// be necessary due to missing analyzer calls. /// /// @param field Name of the field query will use. /// @param termStr Term token that contains one or more wild card characters (? or *), but is not simple /// prefix term /// @return Resulting {@link Query} built for the term virtual QueryPtr getWildcardQuery(const String& field, const String& termStr); /// Factory method for generating a query (similar to {@link #getWildcardQuery}). Called when parser /// parses an input term token that uses prefix notation; that is, contains a single '*' wildcard /// character as its last character. Since this is a special case of generic wildcard term, and such /// a query can be optimized easily, this usually results in a different query object. /// /// Depending on settings, a prefix term may be lower-cased automatically. It will not go through the /// default Analyzer, however, since normal Analyzers are unlikely to work properly with wildcard templates. /// /// Can be overridden by extending classes, to provide custom handling for wild card queries, which may be /// necessary due to missing analyzer calls. /// /// @param field Name of the field query will use. /// @param termStr Term token to use for building term for the query (without trailing '*' character) /// @return Resulting {@link Query} built for the term virtual QueryPtr getPrefixQuery(const String& field, const String& termStr); /// Factory method for generating a query (similar to {@link #getWildcardQuery}). Called when parser /// parses an input term token that has the fuzzy suffix (~) appended. /// /// @param field Name of the field query will use. /// @param termStr Term token to use for building term for the query /// @return Resulting {@link Query} built for the term virtual QueryPtr getFuzzyQuery(const String& field, const String& termStr, double minSimilarity); /// Returns a String where the escape char has been removed, or kept only once if there was a double /// escape. Supports escaped unicode characters, eg. translates \\u0041 to A. String discardEscapeChar(const String& input); /// Returns the numeric value of the hexadecimal character static int32_t hexToInt(wchar_t c); /// Returns a String where those characters that QueryParser expects to be escaped are escaped by /// a preceding \. static String escape(const String& s); bool jj_2_1(int32_t xla); bool jj_3R_2(); bool jj_3_1(); bool jj_3R_3(); QueryParserTokenPtr jj_consume_token(int32_t kind); bool jj_scan_token(int32_t kind); int32_t jj_ntk(); void jj_add_error_token(int32_t kind, int32_t pos); void jj_rescan_token(); void jj_save(int32_t index, int32_t xla); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/QueryParserCharStream.h000066400000000000000000000074631456444476200247730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYPARSERCHARSTREAM_H #define QUERYPARSERCHARSTREAM_H #include "LuceneObject.h" namespace Lucene { /// This interface describes a character stream that maintains line and column number positions of /// the characters. It also has the capability to backup the stream to some extent. An implementation /// of this interface is used in the QueryParserTokenManager. /// /// All the methods except backup can be implemented in any fashion. backup needs to be implemented /// correctly for the correct operation of the lexer. Rest of the methods are all used to get information /// like line number, column number and the String that constitutes a token and are not used by the lexer. /// Hence their implementation won't affect the generated lexer's operation. class LPPAPI QueryParserCharStream { public: LUCENE_INTERFACE(QueryParserCharStream); public: /// Returns the next character from the selected input. The method of selecting the input is the /// responsibility of the class implementing this interface. virtual wchar_t readChar() = 0; /// Returns the column position of the character last read. /// @deprecated /// @see #getEndColumn virtual int32_t getColumn() = 0; /// Returns the line number of the character last read. /// @deprecated /// @see #getEndLine virtual int32_t getLine() = 0; /// Returns the column number of the last character for current token (being matched after the last /// call to BeginToken). virtual int32_t getEndColumn() = 0; /// Returns the line number of the last character for current token (being matched after the last call /// to BeginToken). virtual int32_t getEndLine() = 0; /// Returns the column number of the first character for current token (being matched after the last /// call to BeginToken). virtual int32_t getBeginColumn() = 0; /// Returns the line number of the first character for current token (being matched after the last call /// to BeginToken). virtual int32_t getBeginLine() = 0; /// Backs up the input stream by amount steps. Lexer calls this method if it had already read some /// characters, but could not use them to match a (longer) token. So, they will be used again as the /// prefix of the next token and it is the implementation's's responsibility to do this right. virtual void backup(int32_t amount) = 0; /// Returns the next character that marks the beginning of the next token. All characters must remain /// in the buffer between two successive calls to this method to implement backup correctly. virtual wchar_t BeginToken() = 0; /// Returns a string made up of characters from the marked token beginning to the current buffer position. /// Implementations have the choice of returning anything that they want to. For example, for efficiency, /// one might decide to just return null, which is a valid implementation. virtual String GetImage() = 0; /// Returns an array of characters that make up the suffix of length for the currently matched token. /// This is used to build up the matched string for use in actions in the case of MORE. virtual CharArray GetSuffix(int32_t length) = 0; /// The lexer calls this function to indicate that it is done with the stream and hence implementations /// can free any resources held by this class. Again, the body of this function can be just empty and it /// will not affect the lexer's operation. virtual void Done() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/QueryParserConstants.h000066400000000000000000000033271456444476200247110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYPARSERCONSTANTS_H #define QUERYPARSERCONSTANTS_H #include "LuceneObject.h" namespace Lucene { /// Token literal values and constants. class LPPAPI QueryParserConstants { protected: QueryParserConstants(); public: virtual ~QueryParserConstants(); LUCENE_INTERFACE(QueryParserConstants); public: enum RegularExpressionId { _EOF = 0, _NUM_CHAR = 1, _ESCAPED_CHAR = 2, _TERM_START_CHAR = 3, _TERM_CHAR = 4, _WHITESPACE = 5, _QUOTED_CHAR = 6, AND = 8, OR = 9, NOT = 10, PLUS = 11, MINUS = 12, LPAREN = 13, RPAREN = 14, COLON = 15, STAR = 16, CARAT = 17, QUOTED = 18, TERM = 19, FUZZY_SLOP = 20, PREFIXTERM = 21, WILDTERM = 22, RANGEIN_START = 23, RANGEEX_START = 24, NUMBER = 25, RANGEIN_TO = 26, RANGEIN_END = 27, RANGEIN_QUOTED = 28, RANGEIN_GOOP = 29, RANGEEX_TO = 30, RANGEEX_END = 31, RANGEEX_QUOTED = 32, RANGEEX_GOOP = 33 }; enum LexicalState { Boost = 0, RangeEx = 1, RangeIn = 2, DEFAULT = 3 }; /// Literal token values. static Collection tokenImage; protected: /// Literal token values. static const wchar_t* _tokenImage[]; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/QueryParserToken.h000066400000000000000000000053521456444476200240150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYPARSERTOKEN_H #define QUERYPARSERTOKEN_H #include "LuceneObject.h" namespace Lucene { /// Describes the input token stream. class LPPAPI QueryParserToken : public LuceneObject { public: /// Constructs a new token for the specified Image and Kind. QueryParserToken(int32_t kind = 0, const String& image = EmptyString); virtual ~QueryParserToken(); LUCENE_CLASS(QueryParserToken); public: /// An integer that describes the kind of this token. int32_t kind; /// The line number of the first character of this Token. int32_t beginLine; /// The column number of the first character of this Token. int32_t beginColumn; /// The line number of the last character of this Token. int32_t endLine; /// The column number of the last character of this Token. int32_t endColumn; /// The string image of the token. String image; /// A reference to the next regular (non-special) token from the input stream. If this is the last /// token from the input stream, or if the token manager has not read tokens beyond this one, this /// field is set to null. This is true only if this token is also a regular token. Otherwise, see /// below for a description of the contents of this field. QueryParserTokenPtr next; /// This field is used to access special tokens that occur prior to this token, but after the /// immediately preceding regular (non-special) token. If there are no such special tokens, this /// field is set to null. When there are more than one such special token, this field refers to the /// last of these special tokens, which in turn refers to the next previous special token through /// its specialToken field, and so on until the first special token (whose specialToken field is /// null). The next fields of special tokens refer to other special tokens that immediately follow /// it (without an intervening regular token). If there is no such token, this field is null. QueryParserTokenPtr specialToken; public: /// Returns the image. virtual String toString(); /// Returns a new Token object, by default. However, if you want, you can create and return subclass /// objects based on the value of ofKind. Simply add the cases to the switch for all those special /// cases. static QueryParserTokenPtr newToken(int32_t ofKind, const String& image = EmptyString); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/QueryParserTokenManager.h000066400000000000000000000073761456444476200253200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYPARSERTOKENMANAGER_H #define QUERYPARSERTOKENMANAGER_H #include "QueryParserConstants.h" namespace Lucene { /// Token Manager. class LPPAPI QueryParserTokenManager : public QueryParserConstants, public LuceneObject { public: QueryParserTokenManager(const QueryParserCharStreamPtr& stream); QueryParserTokenManager(const QueryParserCharStreamPtr& stream, int32_t lexState); virtual ~QueryParserTokenManager(); LUCENE_CLASS(QueryParserTokenManager); public: /// Debug output. InfoStreamPtr debugStream; protected: static const int64_t jjbitVec0[]; static const int64_t jjbitVec1[]; static const int64_t jjbitVec3[]; static const int64_t jjbitVec4[]; static const int32_t jjnextStates[]; /// Token literal values. static const wchar_t* jjstrLiteralImages[]; /// Lexer state names. static const wchar_t* lexStateNames[]; /// Lex State array. static const int32_t jjnewLexState[]; static const int64_t jjtoToken[]; static const int64_t jjtoSkip[]; int32_t curLexState; int32_t defaultLexState; int32_t jjnewStateCnt; int32_t jjround; int32_t jjmatchedPos; int32_t jjmatchedKind; QueryParserCharStreamPtr input_stream; IntArray jjrounds; IntArray jjstateSet; wchar_t curChar; public: /// Set debug output. void setDebugStream(const InfoStreamPtr& debugStream); /// Reinitialise parser. void ReInit(const QueryParserCharStreamPtr& stream); /// Reinitialise parser. void ReInit(const QueryParserCharStreamPtr& stream, int32_t lexState); /// Switch to specified lex state. void SwitchTo(int32_t lexState); /// Get the next Token. QueryParserTokenPtr getNextToken(); protected: int32_t jjStopStringLiteralDfa_3(int32_t pos, int64_t active0); int32_t jjStartNfa_3(int32_t pos, int64_t active0); int32_t jjStopAtPos(int32_t pos, int32_t kind); int32_t jjMoveStringLiteralDfa0_3(); int32_t jjStartNfaWithStates_3(int32_t pos, int32_t kind, int32_t state); int32_t jjMoveNfa_3(int32_t startState, int32_t curPos); int32_t jjStopStringLiteralDfa_1(int32_t pos, int64_t active0); int32_t jjStartNfa_1(int32_t pos, int64_t active0); int32_t jjMoveStringLiteralDfa0_1(); int32_t jjMoveStringLiteralDfa1_1(int64_t active0); int32_t jjStartNfaWithStates_1(int32_t pos, int32_t kind, int32_t state); int32_t jjMoveNfa_1(int32_t startState, int32_t curPos); int32_t jjMoveStringLiteralDfa0_0(); int32_t jjMoveNfa_0(int32_t startState, int32_t curPos); int32_t jjStopStringLiteralDfa_2(int32_t pos, int64_t active0); int32_t jjStartNfa_2(int32_t pos, int64_t active0); int32_t jjMoveStringLiteralDfa0_2(); int32_t jjMoveStringLiteralDfa1_2(int64_t active0); int32_t jjStartNfaWithStates_2(int32_t pos, int32_t kind, int32_t state); int32_t jjMoveNfa_2(int32_t startState, int32_t curPos); static bool jjCanMove_0(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2); static bool jjCanMove_1(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2); static bool jjCanMove_2(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2); void ReInitRounds(); QueryParserTokenPtr jjFillToken(); void jjCheckNAdd(int32_t state); void jjAddStates(int32_t start, int32_t end); void jjCheckNAddTwoStates(int32_t state1, int32_t state2); void jjCheckNAddStates(int32_t start, int32_t end); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/QueryTermVector.h000066400000000000000000000023371456444476200236520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYTERMVECTOR_H #define QUERYTERMVECTOR_H #include "TermFreqVector.h" namespace Lucene { class LPPAPI QueryTermVector : public TermFreqVector, public LuceneObject { public: /// @param queryTerms The original list of terms from the query, can contain duplicates QueryTermVector(Collection queryTerms); QueryTermVector(const String& queryString, const AnalyzerPtr& analyzer); virtual ~QueryTermVector(); LUCENE_CLASS(QueryTermVector); protected: Collection terms; Collection termFreqs; public: virtual String toString(); int32_t size(); Collection getTerms(); Collection getTermFrequencies(); int32_t indexOf(const String& term); Collection indexesOf(Collection terms, int32_t start, int32_t length); protected: void processTerms(Collection queryTerms); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/QueryWrapperFilter.h000066400000000000000000000025121456444476200243410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYWRAPPERFILTER_H #define QUERYWRAPPERFILTER_H #include "Filter.h" namespace Lucene { /// Constrains search results to only match those which also match a provided query. /// /// This could be used, for example, with a {@link TermRangeQuery} on a suitably formatted date field to /// implement date filtering. One could re-use a single QueryFilter that matches, eg., only documents /// modified within the last week. The QueryFilter and TermRangeQuery would only need to be reconstructed /// once per day. class LPPAPI QueryWrapperFilter : public Filter { public: /// Constructs a filter which only matches documents matching query. QueryWrapperFilter(const QueryPtr& query); virtual ~QueryWrapperFilter(); LUCENE_CLASS(QueryWrapperFilter); protected: QueryPtr query; public: virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader); virtual String toString(); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/RAMDirectory.h000066400000000000000000000054411456444476200230350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RAMDIRECTORY_H #define RAMDIRECTORY_H #include "Directory.h" namespace Lucene { /// A memory-resident {@link Directory} implementation. Locking implementation is by default the /// {@link SingleInstanceLockFactory} but can be changed with {@link #setLockFactory}. /// Lock acquisition sequence: RAMDirectory, then RAMFile class LPPAPI RAMDirectory : public Directory { public: /// Constructs an empty {@link Directory}. RAMDirectory(); /// Creates a new RAMDirectory instance from a different Directory implementation. /// This can be used to load a disk-based index into memory. /// /// This should be used only with indices that can fit into memory. /// /// Note that the resulting RAMDirectory instance is fully independent from the /// original Directory (it is a complete copy). Any subsequent changes to the /// original Directory will not be visible in the RAMDirectory instance. /// @param dir a Directory value RAMDirectory(const DirectoryPtr& dir); RAMDirectory(const DirectoryPtr& dir, bool closeDir); virtual ~RAMDirectory(); LUCENE_CLASS(RAMDirectory); INTERNAL: int64_t _sizeInBytes; MapStringRAMFile fileMap; protected: DirectoryWeakPtr _dirSource; bool copyDirectory; bool closeDir; public: virtual void initialize(); /// Returns an array of strings, one for each file in the directory. virtual HashSet listAll(); /// Returns true if a file with the given name exists. virtual bool fileExists(const String& name); /// Returns the time the named file was last modified. virtual uint64_t fileModified(const String& name); /// Set the modified time of an existing file to now. virtual void touchFile(const String& name); /// Returns the length of a file in the directory. virtual int64_t fileLength(const String& name); /// Return total size in bytes of all files in this directory. /// This is currently quantized to RAMOutputStream::BUFFER_SIZE. int64_t sizeInBytes(); /// Removes an existing file in the directory. virtual void deleteFile(const String& name); /// Creates a new, empty file in the directory with the given name. /// Returns a stream writing this file. virtual IndexOutputPtr createOutput(const String& name); /// Returns a stream reading an existing file. virtual IndexInputPtr openInput(const String& name); /// Closes the store. virtual void close(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/RAMFile.h000066400000000000000000000027641456444476200217550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RAMFILE_H #define RAMFILE_H #include "LuceneObject.h" namespace Lucene { /// File used as buffer in RAMDirectory class LPPAPI RAMFile : public LuceneObject { public: RAMFile(); // File used as buffer, in no RAMDirectory RAMFile(const RAMDirectoryPtr& directory); virtual ~RAMFile(); LUCENE_CLASS(RAMFile); INTERNAL: int64_t length; RAMDirectoryWeakPtr _directory; protected: Collection buffers; int64_t sizeInBytes; /// This is publicly modifiable via Directory.touchFile(), so direct access not supported int64_t lastModified; public: /// For non-stream access from thread that might be concurrent with writing. int64_t getLength(); void setLength(int64_t length); /// For non-stream access from thread that might be concurrent with writing int64_t getLastModified(); void setLastModified(int64_t lastModified); int64_t getSizeInBytes(); ByteArray addBuffer(int32_t size); ByteArray getBuffer(int32_t index); int32_t numBuffers(); protected: /// Allocate a new buffer. Subclasses can allocate differently. virtual ByteArray newBuffer(int32_t size); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/RAMInputStream.h000066400000000000000000000040211456444476200233350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RAMINPUTSTREAM_H #define RAMINPUTSTREAM_H #include "IndexInput.h" namespace Lucene { /// A memory-resident {@link IndexInput} implementation. class LPPAPI RAMInputStream : public IndexInput { public: RAMInputStream(); RAMInputStream(const RAMFilePtr& f); virtual ~RAMInputStream(); LUCENE_CLASS(RAMInputStream); public: static const int32_t BUFFER_SIZE; protected: RAMFilePtr file; int64_t _length; ByteArray currentBuffer; int32_t currentBufferIndex; int32_t bufferPosition; int64_t bufferStart; int32_t bufferLength; public: /// Closes the stream to further operations. virtual void close(); /// The number of bytes in the file. virtual int64_t length(); /// Reads and returns a single byte. /// @see IndexOutput#writeByte(uint8_t) virtual uint8_t readByte(); /// Reads a specified number of bytes into an array at the specified offset. /// @param b the array to read bytes into. /// @param offset the offset in the array to start storing bytes. /// @param length the number of bytes to read. /// @see IndexOutput#writeBytes(const uint8_t*,int) virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); /// Returns the current position in this file, where the next read will occur. /// @see #seek(int64_t) virtual int64_t getFilePointer(); /// Sets current position in this file, where the next read will occur. /// @see #getFilePointer() virtual void seek(int64_t pos); /// Returns a clone of this stream. virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); protected: void switchCurrentBuffer(bool enforceEOF); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/RAMOutputStream.h000066400000000000000000000041471456444476200235470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RAMOUTPUTSTREAM_H #define RAMOUTPUTSTREAM_H #include "IndexOutput.h" namespace Lucene { /// A memory-resident {@link IndexOutput} implementation. class LPPAPI RAMOutputStream : public IndexOutput { public: /// Construct an empty output buffer. RAMOutputStream(); RAMOutputStream(const RAMFilePtr& f); virtual ~RAMOutputStream(); LUCENE_CLASS(RAMOutputStream); public: static const int32_t BUFFER_SIZE; protected: RAMFilePtr file; ByteArray currentBuffer; int32_t currentBufferIndex; int32_t bufferPosition; int64_t bufferStart; int32_t bufferLength; public: /// Copy the current contents of this buffer to the named output. void writeTo(const IndexOutputPtr& out); /// Resets this to an empty file. void reset(); /// Closes this stream to further operations. virtual void close(); /// Sets current position in this file, where the next write will occur. /// @see #getFilePointer() virtual void seek(int64_t pos); /// The number of bytes in the file. virtual int64_t length(); /// Writes a single byte. /// @see IndexInput#readByte() virtual void writeByte(uint8_t b); /// Writes an array of bytes. /// @param b the bytes to write. /// @param length the number of bytes to write. /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length); /// Forces any buffered output to be written. virtual void flush(); /// Returns the current position in this file, where the next write will occur. virtual int64_t getFilePointer(); /// Returns byte usage of all buffers. int64_t sizeInBytes(); protected: void switchCurrentBuffer(); void setFileLength(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Random.h000066400000000000000000000014411456444476200217450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RANDOM_H #define RANDOM_H #include "LuceneObject.h" namespace Lucene { /// Utility class to generate a stream of pseudorandom numbers. class LPPAPI Random : public LuceneObject { public: Random(); Random(int64_t seed); virtual ~Random(); protected: int64_t seed; public: void setSeed(int64_t seed); int32_t nextInt(int32_t limit = INT_MAX); double nextDouble(); protected: int32_t next(int32_t bits); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/RawPostingList.h000066400000000000000000000022361456444476200234610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RAWPOSTINGLIST_H #define RAWPOSTINGLIST_H #include "LuceneObject.h" namespace Lucene { /// This is the base class for an in-memory posting list, keyed by a Token. {@link TermsHash} maintains a /// hash table holding one instance of this per unique Token. Consumers of TermsHash ({@link TermsHashConsumer}) /// must subclass this class with its own concrete class. FreqProxTermsWriterPostingList is a private inner /// class used for the freq/prox postings, and TermVectorsTermsWriterPostingList is a private inner class used /// to hold TermVectors postings. class RawPostingList : public LuceneObject { public: RawPostingList(); virtual ~RawPostingList(); LUCENE_CLASS(RawPostingList); public: static const int32_t BYTES_SIZE; int32_t textStart; int32_t intStart; int32_t byteStart; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ReadOnlyDirectoryReader.h000066400000000000000000000025201456444476200252510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef READONLYDIRECTORYREADER_H #define READONLYDIRECTORYREADER_H #include "DirectoryReader.h" namespace Lucene { class LPPAPI ReadOnlyDirectoryReader : public DirectoryReader { public: ReadOnlyDirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& sis, const IndexDeletionPolicyPtr& deletionPolicy, int32_t termInfosIndexDivisor); ReadOnlyDirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& infos, Collection oldReaders, Collection oldStarts, MapStringByteArray oldNormsCache, bool doClone, int32_t termInfosIndexDivisor); ReadOnlyDirectoryReader(const IndexWriterPtr& writer, const SegmentInfosPtr& infos, int32_t termInfosIndexDivisor); virtual ~ReadOnlyDirectoryReader(); LUCENE_CLASS(ReadOnlyDirectoryReader); public: /// Tries to acquire the WriteLock on this directory. this method is only valid if this /// IndexReader is directory owner. virtual void acquireWriteLock(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ReadOnlySegmentReader.h000066400000000000000000000013271456444476200247130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef READONLYSEGMENTREADER_H #define READONLYSEGMENTREADER_H #include "SegmentReader.h" namespace Lucene { class LPPAPI ReadOnlySegmentReader : public SegmentReader { public: virtual ~ReadOnlySegmentReader(); LUCENE_CLASS(ReadOnlySegmentReader); public: static void noWrite(); virtual void acquireWriteLock(); virtual bool isDeleted(int32_t n); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Reader.h000066400000000000000000000031501456444476200217260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef READER_H #define READER_H #include "LuceneObject.h" namespace Lucene { /// Abstract class for reading character streams. class LPPAPI Reader : public LuceneObject { protected: Reader(); public: virtual ~Reader(); LUCENE_CLASS(Reader); public: static const int32_t READER_EOF; /// Read a single character. virtual int32_t read(); /// Read characters into a portion of an array. virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length) = 0; /// Skip characters. virtual int64_t skip(int64_t n); /// Close the stream. virtual void close() = 0; /// Tell whether this stream supports the mark() operation virtual bool markSupported(); /// Mark the present position in the stream. Subsequent calls to reset() will attempt to reposition the /// stream to this point. virtual void mark(int32_t readAheadLimit); /// Reset the stream. If the stream has been marked, then attempt to reposition it at the mark. If the stream /// has not been marked, then attempt to reset it in some way appropriate to the particular stream, for example /// by repositioning it to its starting point. virtual void reset(); /// The number of bytes in the stream. virtual int64_t length(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ReaderUtil.h000066400000000000000000000030221456444476200225620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef READERUTIL_H #define READERUTIL_H #include "LuceneObject.h" namespace Lucene { /// Common util methods for dealing with {@link IndexReader}s. class LPPAPI ReaderUtil : public LuceneObject { public: virtual ~ReaderUtil(); LUCENE_CLASS(ReaderUtil); public: /// Gathers sub-readers from reader into a List. static void gatherSubReaders(Collection allSubReaders, const IndexReaderPtr& reader); /// Returns sub IndexReader that contains the given document id. /// /// @param doc Id of document /// @param reader Parent reader /// @return Sub reader of parent which contains the specified doc id static IndexReaderPtr subReader(int32_t doc, const IndexReaderPtr& reader); /// Returns sub-reader subIndex from reader. /// /// @param reader Parent reader /// @param subIndex Index of desired sub reader /// @return The subreader at subIndex static IndexReaderPtr subReader(const IndexReaderPtr& reader, int32_t subIndex); /// Returns index of the searcher/reader for document n in the array used to construct this /// searcher/reader. static int32_t subIndex(int32_t n, Collection docStarts); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ReqExclScorer.h000066400000000000000000000034671456444476200232600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef REQEXCLSCORER_H #define REQEXCLSCORER_H #include "Scorer.h" namespace Lucene { /// A Scorer for queries with a required subscorer and an excluding (prohibited) sub DocIdSetIterator. /// This Scorer implements {@link Scorer#skipTo(int32_t)}, and it uses the skipTo() on the given scorers. class ReqExclScorer : public Scorer { public: /// Construct a ReqExclScorer. /// @param reqScorer The scorer that must match, except where /// @param exclDisi indicates exclusion. ReqExclScorer(const ScorerPtr& reqScorer, const DocIdSetIteratorPtr& exclDisi); virtual ~ReqExclScorer(); LUCENE_CLASS(ReqExclScorer); protected: ScorerPtr reqScorer; DocIdSetIteratorPtr exclDisi; int32_t doc; public: virtual int32_t nextDoc(); virtual int32_t docID(); /// Returns the score of the current document matching the query. Initially invalid, until {@link #next()} /// is called the first time. /// @return The score of the required scorer. virtual double score(); virtual int32_t advance(int32_t target); protected: /// Advance to non excluded doc. /// /// On entry: ///
    ///
  • reqScorer != null, ///
  • exclScorer != null, ///
  • reqScorer was advanced once via next() or skipTo() and reqScorer.doc() may still be excluded. ///
/// Advances reqScorer a non excluded required doc, if any. /// @return true iff there is a non excluded required doc. int32_t toNonExcluded(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ReqOptSumScorer.h000066400000000000000000000024761456444476200236130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef REQOPTSUMSCORER_H #define REQOPTSUMSCORER_H #include "Scorer.h" namespace Lucene { /// A Scorer for queries with a required part and an optional part. Delays skipTo() on the optional part /// until a score() is needed. This Scorer implements {@link Scorer#skipTo(int32_t)}. class ReqOptSumScorer : public Scorer { public: ReqOptSumScorer(const ScorerPtr& reqScorer, const ScorerPtr& optScorer); virtual ~ReqOptSumScorer(); LUCENE_CLASS(ReqOptSumScorer); protected: ScorerPtr reqScorer; ScorerPtr optScorer; public: virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); virtual int32_t docID(); /// Returns the score of the current document matching the query. Initially invalid, until {@link #next()} /// is called the first time. /// @return The score of the required scorer, eventually increased by the score of the optional scorer when /// it also matches the current document. virtual double score(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ReusableStringReader.h000066400000000000000000000020761456444476200246060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef REUSABLESTRINGREADER_H #define REUSABLESTRINGREADER_H #include "Reader.h" namespace Lucene { /// Used by DocumentsWriter to implemented a StringReader that can be reset to a new string; we use this /// when tokenizing the string value from a Field. class ReusableStringReader : public Reader { public: ReusableStringReader(); virtual ~ReusableStringReader(); LUCENE_CLASS(ReusableStringReader); public: int32_t upto; int32_t left; String s; public: virtual void init(const String& s); using Reader::read; /// Read characters into a portion of an array. virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); /// Close the stream. virtual void close(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ReverseOrdFieldSource.h000066400000000000000000000036631456444476200247420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef REVERSEORDFIELDSOURCE_H #define REVERSEORDFIELDSOURCE_H #include "ValueSource.h" namespace Lucene { /// Obtains the ordinal of the field value from the default Lucene {@link FieldCache} using getStringIndex() /// and reverses the order. /// /// The native lucene index order is used to assign an ordinal value for each field value. /// /// Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1. Example /// of reverse ordinal (rord): /// /// If there were only three field values: "apple","banana","pear" then rord("apple")=3, rord("banana")=2, /// ord("pear")=1 /// /// WARNING: rord() depends on the position in an index and can thus change when other documents are inserted /// or deleted, or if a MultiSearcher is used. /// /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's /// best to switch your application to pass only atomic (single segment) readers to this API. class LPPAPI ReverseOrdFieldSource : public ValueSource { public: /// Constructor for a certain field. /// @param field field whose values reverse order is used. ReverseOrdFieldSource(const String& field); virtual ~ReverseOrdFieldSource(); LUCENE_CLASS(ReverseOrdFieldSource); protected: String field; public: virtual String description(); virtual DocValuesPtr getValues(const IndexReaderPtr& reader); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ScoreCachingWrappingScorer.h000066400000000000000000000034221456444476200257440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SCORECACHINGWRAPPINGSCORER_H #define SCORECACHINGWRAPPINGSCORER_H #include "Scorer.h" namespace Lucene { /// A {@link Scorer} which wraps another scorer and caches the score of the current document. Successive /// calls to {@link #score()} will return the same result and will not invoke the wrapped Scorer's score() /// method, unless the current document has changed. /// /// This class might be useful due to the changes done to the {@link Collector} interface, in which the /// score is not computed for a document by default, only if the collector requests it. Some collectors /// may need to use the score in several places, however all they have in hand is a {@link Scorer} object, /// and might end up computing the score of a document more than once. class LPPAPI ScoreCachingWrappingScorer : public Scorer { public: /// Creates a new instance by wrapping the given scorer. ScoreCachingWrappingScorer(const ScorerPtr& scorer); virtual ~ScoreCachingWrappingScorer(); LUCENE_CLASS(ScoreCachingWrappingScorer); protected: ScorerWeakPtr _scorer; int32_t curDoc; double curScore; public: SimilarityPtr getSimilarity(); virtual double score(); virtual int32_t docID(); virtual int32_t nextDoc(); virtual void score(const CollectorPtr& collector); virtual int32_t advance(int32_t target); protected: virtual bool score(const CollectorPtr& collector, int32_t max, int32_t firstDocID); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ScoreDoc.h000066400000000000000000000015421456444476200222300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SCOREDOC_H #define SCOREDOC_H #include "LuceneObject.h" namespace Lucene { /// Expert: Returned by low-level search implementations. /// @see TopDocs class LPPAPI ScoreDoc : public LuceneObject { public: ScoreDoc(int32_t doc, double score); virtual ~ScoreDoc(); LUCENE_CLASS(ScoreDoc); public: /// The score of this document for the query. double score; /// A hit document's number. /// @see Searcher#doc(int32_t) int32_t doc; public: virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Scorer.h000066400000000000000000000061131456444476200217630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SCORER_H #define SCORER_H #include "DocIdSetIterator.h" #include "BooleanClause.h" #include "Weight.h" namespace Lucene { class LPPAPI ScorerVisitor{ public: virtual void visitOptional(QueryPtr parent,QueryPtr child,ScorerPtr scorer)=0; virtual void visitRequired(QueryPtr parent,QueryPtr child,ScorerPtr scorer)=0; virtual void visitProhibited(QueryPtr parent,QueryPtr child,ScorerPtr scorer)=0; }; /// Common scoring functionality for different types of queries. /// /// A Scorer iterates over documents matching a query in increasing order of doc Id. /// /// Document scores are computed using a given Similarity implementation. /// /// NOTE: The values NEGATIVE_INFINITY and POSITIVE_INFINITY are not valid scores. Certain collectors /// (eg {@link TopScoreDocCollector}) will not properly collect hits with these scores. class LPPAPI Scorer : public DocIdSetIterator { public: /// Constructs a Scorer. /// @param similarity The Similarity implementation used by this scorer. Scorer(const SimilarityPtr& similarity); Scorer(const WeightPtr& weight); virtual ~Scorer(); LUCENE_CLASS(Scorer); WeightPtr weight; protected: SimilarityPtr similarity; public: /// Returns the Similarity implementation used by this scorer. SimilarityPtr getSimilarity(); /// Scores and collects all matching documents. /// @param collector The collector to which all matching documents are passed. virtual void score(const CollectorPtr& collector); /// Returns the score of the current document matching the query. Initially invalid, until {@link /// #nextDoc()} or {@link #advance(int32_t)} is called the first time, or when called from within /// {@link Collector#collect}. virtual double score() = 0; void visitSubScorers(QueryPtr parent, BooleanClause::Occur relationship, ScorerVisitor *visitor); void visitScorers(ScorerVisitor *visitor); virtual float termFreq(){ boost::throw_exception(RuntimeException(L"Freq not implemented")); } protected: /// Collects matching documents in a range. Hook for optimization. /// Note, firstDocID is added to ensure that {@link #nextDoc()} was called before this method. /// /// @param collector The collector to which all matching documents are passed. /// @param max Do not score documents past this. /// @param firstDocID The first document ID (ensures {@link #nextDoc()} is called before this method. /// @return true if more matching documents may remain. virtual bool score(const CollectorPtr& collector, int32_t max, int32_t firstDocID); friend class BooleanScorer; friend class ScoreCachingWrappingScorer; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ScorerDocQueue.h000066400000000000000000000047451456444476200234270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SCORERDOCQUEUE_H #define SCORERDOCQUEUE_H #include "LuceneObject.h" namespace Lucene { /// A ScorerDocQueue maintains a partial ordering of its Scorers such that the least Scorer can always be /// found in constant time. Put()'s and pop()'s require log(size) time. The ordering is by Scorer::doc(). class LPPAPI ScorerDocQueue : public LuceneObject { public: ScorerDocQueue(int32_t maxSize); virtual ~ScorerDocQueue(); LUCENE_CLASS(ScorerDocQueue); protected: Collection heap; int32_t maxSize; int32_t _size; HeapedScorerDocPtr topHSD; // same as heap[1], only for speed public: /// Adds a Scorer to a ScorerDocQueue in log(size) time. If one tries to add more Scorers than maxSize /// ArrayIndexOutOfBound exception is thrown. void put(const ScorerPtr& scorer); /// Adds a Scorer to the ScorerDocQueue in log(size) time if either the ScorerDocQueue is not full, or /// not lessThan(scorer, top()). /// @return true if scorer is added, false otherwise. bool insert(const ScorerPtr& scorer); /// Returns the least Scorer of the ScorerDocQueue in constant time. Should not be used when the queue /// is empty. ScorerPtr top(); /// Returns document number of the least Scorer of the ScorerDocQueue in constant time. /// Should not be used when the queue is empty. int32_t topDoc(); double topScore(); bool topNextAndAdjustElsePop(); bool topSkipToAndAdjustElsePop(int32_t target); /// Removes and returns the least scorer of the ScorerDocQueue in log(size) time. Should not be used /// when the queue is empty. ScorerPtr pop(); /// Should be called when the scorer at top changes doc() value. void adjustTop(); /// Returns the number of scorers currently stored in the ScorerDocQueue. int32_t size(); /// Removes all entries from the ScorerDocQueue. void clear(); protected: bool checkAdjustElsePop(bool cond); /// Removes the least scorer of the ScorerDocQueue in log(size) time. Should not be used when the /// queue is empty. void popNoResult(); void upHeap(); void downHeap(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Searchable.h000066400000000000000000000122601456444476200225570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEARCHABLE_H #define SEARCHABLE_H #include "LuceneObject.h" namespace Lucene { /// The interface for search implementations. /// /// Searchable is the abstract network protocol for searching. Implementations provide search over a single /// index, over multiple indices, and over indices on remote servers. /// /// Queries, filters and sort criteria are designed to be compact so that they may be efficiently passed to a /// remote index, with only the top-scoring hits being returned, rather than every matching hit. /// /// NOTE: this interface is kept public for convenience. Since it is not expected to be implemented directly, /// it may be changed unexpectedly between releases. class LPPAPI Searchable { public: LUCENE_INTERFACE(Searchable); virtual ~Searchable() {} public: /// Lower-level search API. /// /// {@link Collector#collect(int32_t)} is called for every document. Collector-based access to remote /// indexes is discouraged. /// /// Applications should only use this if they need all of the matching documents. The high-level search /// API ({@link Searcher#search(QueryPtr, int32_t)}) is usually more efficient, as it skips non-high-scoring /// hits. /// /// @param weight To match documents /// @param filter If non-null, used to permit documents to be collected. /// @param collector To receive hits virtual void search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& collector) = 0; /// Frees resources associated with this Searcher. Be careful not to call this method while you are still /// using objects that reference this Searchable. virtual void close() = 0; /// Returns the number of documents containing term. /// @see IndexReader#docFreq(TermPtr) virtual int32_t docFreq(const TermPtr& term) = 0; /// For each term in the terms array, calculates the number of documents containing term. Returns an array /// with these document frequencies. Used to minimize number of remote calls. virtual Collection docFreqs(Collection terms) = 0; /// Returns one greater than the largest possible document number. /// @see IndexReader#maxDoc() virtual int32_t maxDoc() = 0; /// Low-level search implementation. Finds the top n hits for query, applying filter if non-null. /// Applications should usually call {@link Searcher#search(QueryPtr, int32_t)} or {@link /// Searcher#search(QueryPtr, FilterPtr, int32_t)} instead. virtual TopDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n) = 0; /// Returns the stored fields of document i. /// @see IndexReader#document(int32_t) virtual DocumentPtr doc(int32_t n) = 0; /// Get the {@link Document} at the n'th position. The {@link FieldSelector} may be used to determine what /// {@link Field}s to load and how they should be loaded. /// /// NOTE: If the underlying Reader (more specifically, the underlying FieldsReader) is closed before the /// lazy {@link Field} is loaded an exception may be thrown. If you want the value of a lazy {@link Field} /// to be available after closing you must explicitly load it or fetch the Document again with a new loader. /// /// @param n Get the document at the n'th position /// @param fieldSelector The {@link FieldSelector} to use to determine what Fields should be loaded on the /// Document. May be null, in which case all Fields will be loaded. /// @return The stored fields of the {@link Document} at the n'th position /// /// @see IndexReader#document(int32_t, FieldSelectorPtr) /// @see Fieldable /// @see FieldSelector /// @see SetBasedFieldSelector /// @see LoadFirstFieldSelector virtual DocumentPtr doc(int32_t n, const FieldSelectorPtr& fieldSelector) = 0; /// Called to re-write queries into primitive queries. virtual QueryPtr rewrite(const QueryPtr& query) = 0; /// Low-level implementation method. Returns an Explanation that describes how doc scored against weight. /// /// This is intended to be used in developing Similarity implementations, and for good performance, should /// not be displayed with every hit. Computing an explanation is as expensive as executing the query over /// the entire index. /// /// Applications should call {@link Searcher#explain(QueryPtr, int32_t)}. virtual ExplanationPtr explain(const WeightPtr& weight, int32_t doc) = 0; /// Low-level search implementation with arbitrary sorting. Finds the top n hits for query, applying filter /// if non-null, and sorting the hits by the criteria in sort. /// /// Applications should usually call {@link Searcher#search(QueryPtr, FilterPtr, int32_t, SortPtr)} instead. virtual TopFieldDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Searcher.h000066400000000000000000000107521456444476200222660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEARCHER_H #define SEARCHER_H #include "Searchable.h" namespace Lucene { /// An abstract base class for search implementations. Implements the main search methods. /// /// Note that you can only access hits from a Searcher as long as it is not yet closed, otherwise an IO /// exception will be thrown. class LPPAPI Searcher : public Searchable, public LuceneObject { public: Searcher(); virtual ~Searcher(); LUCENE_CLASS(Searcher); protected: /// The Similarity implementation used by this searcher. SimilarityPtr similarity; public: /// Search implementation with arbitrary sorting. Finds the top n hits for query, applying filter if /// non-null, and sorting the hits by the criteria in sort. /// /// NOTE: this does not compute scores by default; use {@link IndexSearcher#setDefaultFieldSortScoring} /// to enable scoring. virtual TopFieldDocsPtr search(const QueryPtr& query, const FilterPtr& filter, int32_t n, const SortPtr& sort); /// Lower-level search API. /// /// {@link Collector#collect(int32_t)} is called for every matching document. /// /// Applications should only use this if they need all of the matching documents. The high-level /// search API ({@link Searcher#search(QueryPtr, int32_t)}) is usually more efficient, as it skips /// non-high-scoring hits. /// /// Note: The score passed to this method is a raw score. In other words, the score will not necessarily /// be a double whose value is between 0 and 1. virtual void search(const QueryPtr& query, const CollectorPtr& results); /// Lower-level search API. /// /// {@link Collector#collect(int32_t)} is called for every matching document. Collector-based access to /// remote indexes is discouraged. /// /// Applications should only use this if they need all of the matching documents. The high-level search /// API ({@link Searcher#search(QueryPtr, FilterPtr, int32_t)}) is usually more efficient, as it skips /// non-high-scoring hits. /// /// @param query To match documents /// @param filter If non-null, used to permit documents to be collected. /// @param results To receive hits virtual void search(const QueryPtr& query, const FilterPtr& filter, const CollectorPtr& results); /// Finds the top n hits for query, applying filter if non-null. virtual TopDocsPtr search(const QueryPtr& query, const FilterPtr& filter, int32_t n); /// Finds the top n hits for query. virtual TopDocsPtr search(const QueryPtr& query, int32_t n); /// Returns an Explanation that describes how doc scored against query. /// /// This is intended to be used in developing Similarity implementations, and for good performance, /// should not be displayed with every hit. Computing an explanation is as expensive as executing the /// query over the entire index. virtual ExplanationPtr explain(const QueryPtr& query, int32_t doc); /// Set the Similarity implementation used by this Searcher. virtual void setSimilarity(const SimilarityPtr& similarity); /// Return the Similarity implementation used by this Searcher. /// /// This defaults to the current value of {@link Similarity#getDefault()}. virtual SimilarityPtr getSimilarity(); virtual Collection docFreqs(Collection terms); virtual void search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& results) = 0; virtual void close() = 0; virtual int32_t docFreq(const TermPtr& term) = 0; virtual int32_t maxDoc() = 0; virtual TopDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n) = 0; virtual DocumentPtr doc(int32_t n) = 0; virtual DocumentPtr doc(int32_t n, const FieldSelectorPtr& fieldSelector) = 0; virtual QueryPtr rewrite(const QueryPtr& query) = 0; virtual ExplanationPtr explain(const WeightPtr& weight, int32_t doc) = 0; virtual TopFieldDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort) = 0; protected: /// Creates a weight for query. /// @return New weight virtual WeightPtr createWeight(const QueryPtr& query); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SegmentInfo.h000066400000000000000000000145521456444476200227520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTINFO_H #define SEGMENTINFO_H #include "LuceneObject.h" namespace Lucene { /// Information about a segment such as it's name, directory, and files /// related to the segment. class LPPAPI SegmentInfo : public LuceneObject { public: SegmentInfo(const String& name, int32_t docCount, const DirectoryPtr& dir); SegmentInfo(const String& name, int32_t docCount, const DirectoryPtr& dir, bool isCompoundFile, bool hasSingleNormFile); SegmentInfo(const String& name, int32_t docCount, const DirectoryPtr& dir, bool isCompoundFile, bool hasSingleNormFile, int32_t docStoreOffset, const String& docStoreSegment, bool docStoreIsCompoundFile, bool hasProx); /// Construct a new SegmentInfo instance by reading a previously saved SegmentInfo from input. /// @param dir directory to load from. /// @param format format of the segments info file. /// @param input input handle to read segment info from. SegmentInfo(const DirectoryPtr& dir, int32_t format, const IndexInputPtr& input); virtual ~SegmentInfo(); LUCENE_CLASS(SegmentInfo); public: static const int32_t NO; // no norms; no deletes; static const int32_t YES; // have norms; have deletes; static const int32_t CHECK_DIR; // must check dir to see if there are norms/deletions static const int32_t WITHOUT_GEN; // a file name that has no GEN in it. protected: // true if this is a segments file written before lock-less commits (2.1) bool preLockless; // current generation of del file; NO if there are no deletes; CHECK_DIR if it's a pre-2.1 segment // (and we must check filesystem); YES or higher if there are deletes at generation N int64_t delGen; // current generation of each field's norm file. If this array is null, for lockLess this means no // separate norms. For preLockLess this means we must check filesystem. If this array is not null, // its values mean: NO says this field has no separate norms; CHECK_DIR says it is a preLockLess // segment and filesystem must be checked; >= YES says this field has separate norms with the // specified generation Collection normGen; // NO if it is not; YES if it is; CHECK_DIR if it's pre-2.1 (ie, must check file system to see if // .cfs and .nrm exist) uint8_t isCompoundFile; // true if this segment maintains norms in a single file; false otherwise this is currently false for // segments populated by DocumentWriter and true for newly created merged segments (both compound and // non compound). bool hasSingleNormFile; // cached list of files that this segment uses in the Directory HashSet _files; // total byte size of all of our files (computed on demand) int64_t _sizeInBytes; // if this segment shares stored fields & vectors, this offset is where in that file this segment's // docs begin int32_t docStoreOffset; // name used to derive fields/vectors file we share with other segments String docStoreSegment; // whether doc store files are stored in compound file (*.cfx) bool docStoreIsCompoundFile; // How many deleted docs in this segment, or -1 if not yet known (if it's an older index) int32_t delCount; // True if this segment has any fields with omitTermFreqAndPositions == false bool hasProx; MapStringString diagnostics; public: String name; // unique name in dir int32_t docCount; // number of docs in seg DirectoryPtr dir; // where segment resides public: /// Copy everything from src SegmentInfo into our instance. void reset(const SegmentInfoPtr& src); void setDiagnostics(MapStringString diagnostics); MapStringString getDiagnostics(); void setNumFields(int32_t numFields); /// Returns total size in bytes of all of files used by this segment. int64_t sizeInBytes(); bool hasDeletions(); void advanceDelGen(); void clearDelGen(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); String getDelFileName(); /// Returns true if this field for this segment has saved a separate norms file (__N.sX). /// @param fieldNumber the field index to check bool hasSeparateNorms(int32_t fieldNumber); /// Returns true if any fields in this segment have separate norms. bool hasSeparateNorms(); /// Increment the generation count for the norms file for this field. /// @param fieldIndex field whose norm file will be rewritten void advanceNormGen(int32_t fieldIndex); /// Get the file name for the norms file for this field. /// @param number field index String getNormFileName(int32_t number); /// Mark whether this segment is stored as a compound file. /// @param isCompoundFile true if this is a compound file; else, false void setUseCompoundFile(bool isCompoundFile); /// Returns true if this segment is stored as a compound file; else, false. bool getUseCompoundFile(); int32_t getDelCount(); void setDelCount(int32_t delCount); int32_t getDocStoreOffset(); bool getDocStoreIsCompoundFile(); void setDocStoreIsCompoundFile(bool v); String getDocStoreSegment(); void setDocStoreOffset(int32_t offset); void setDocStore(int32_t offset, const String& segment, bool isCompoundFile); /// Save this segment's info. void write(const IndexOutputPtr& output); void setHasProx(bool hasProx); bool getHasProx(); /// Return all files referenced by this SegmentInfo. The returns List is a locally cached List so /// you should not modify it. HashSet files(); /// Used for debugging. String segString(const DirectoryPtr& dir); /// We consider another SegmentInfo instance equal if it has the same dir and same name. virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); protected: void addIfExists(HashSet files, const String& fileName); /// Called whenever any change is made that affects which files this segment has. void clearFiles(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SegmentInfoCollection.h000066400000000000000000000024441456444476200247630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTINFOCOLLECTION_H #define SEGMENTINFOCOLLECTION_H #include "LuceneObject.h" namespace Lucene { /// A collection of SegmentInfo objects to be used as a base class for {@link SegmentInfos} class LPPAPI SegmentInfoCollection : public LuceneObject { public: SegmentInfoCollection(); virtual ~SegmentInfoCollection(); LUCENE_CLASS(SegmentInfoCollection); protected: Collection segmentInfos; public: int32_t size(); bool empty(); void clear(); void add(const SegmentInfoPtr& info); void add(int32_t pos, const SegmentInfoPtr& info); void addAll(const SegmentInfoCollectionPtr& segmentInfos); bool equals(const LuceneObjectPtr& other); int32_t find(const SegmentInfoPtr& info); bool contains(const SegmentInfoPtr& info); void remove(int32_t pos); void remove(int32_t start, int32_t end); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SegmentInfos.h000066400000000000000000000173641456444476200231410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTINFOS_H #define SEGMENTINFOS_H #include "SegmentInfoCollection.h" namespace Lucene { /// A collection of SegmentInfo objects with methods for operating on those segments in relation to the file system. class LPPAPI SegmentInfos : public SegmentInfoCollection { public: SegmentInfos(); virtual ~SegmentInfos(); LUCENE_CLASS(SegmentInfos); public: /// The file format version, a negative number. Works since counter, the old 1st entry, is always >= 0 static const int32_t FORMAT; /// This format adds details used for lockless commits. It differs slightly from the previous format in that file names /// are never re-used (write once). Instead, each file is written to the next generation. For example, segments_1, /// segments_2, etc. This allows us to not use a commit lock. /// See fileformats for details. static const int32_t FORMAT_LOCKLESS; /// This format adds a "hasSingleNormFile" flag into each segment info. static const int32_t FORMAT_SINGLE_NORM_FILE; /// This format allows multiple segments to share a single vectors and stored fields file. static const int32_t FORMAT_SHARED_DOC_STORE; /// This format adds a checksum at the end of the file to ensure all bytes were successfully written. static const int32_t FORMAT_CHECKSUM; /// This format adds the deletion count for each segment. This way IndexWriter can efficiently report numDocs(). static const int32_t FORMAT_DEL_COUNT; /// This format adds the boolean hasProx to record if any fields in the segment store prox information (ie, have /// omitTermFreqAndPositions == false) static const int32_t FORMAT_HAS_PROX; /// This format adds optional commit userData storage. static const int32_t FORMAT_USER_DATA; /// This format adds optional per-segment string diagnostics storage, and switches userData to Map static const int32_t FORMAT_DIAGNOSTICS; /// This must always point to the most recent file format. static const int32_t CURRENT_FORMAT; int32_t counter; // used to name new segments private: /// Advanced configuration of retry logic in loading segments_N file. static int32_t defaultGenFileRetryCount; static int32_t defaultGenFileRetryPauseMsec; static int32_t defaultGenLookaheadCount; /// Counts how often the index has been changed by adding or deleting docs. /// Starting with the current time in milliseconds forces to create unique version numbers. int64_t version; int64_t generation; // generation of the "segments_N" for the next commit int64_t lastGeneration; // generation of the "segments_N" file we last successfully read // or wrote; this is normally the same as generation except if // there was an exception that had interrupted a commit MapStringString userData; // Opaque map that user can specify during IndexWriter::commit static MapStringString singletonUserData; static InfoStreamPtr infoStream; ChecksumIndexOutputPtr pendingSegnOutput; public: SegmentInfoPtr info(int32_t i); String getCurrentSegmentFileName(); String getNextSegmentFileName(); /// Read a particular segmentFileName. Note that this may throw an IOException if a commit is in process. void read(const DirectoryPtr& directory, const String& segmentFileName); /// This version of read uses the retry logic (for lock-less commits) to find the right segments file to load. void read(const DirectoryPtr& directory); /// Returns a copy of this instance, also copying each SegmentInfo. virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); /// Version number when this SegmentInfos was generated. int64_t getVersion(); int64_t getGeneration(); int64_t getLastGeneration(); /// Returns a new SegmentInfos containing the SegmentInfo instances in the specified range first (inclusive) to /// last (exclusive), so total number of segments returned is last-first. SegmentInfosPtr range(int32_t first, int32_t last); /// Carry over generation numbers from another SegmentInfos. void updateGeneration(const SegmentInfosPtr& other); void rollbackCommit(const DirectoryPtr& dir); /// Call this to start a commit. This writes the new segments file, but writes an invalid checksum at the end, so /// that it is not visible to readers. Once this is called you must call. /// {@link #finishCommit} to complete the commit or /// {@link #rollbackCommit} to abort it. void prepareCommit(const DirectoryPtr& dir); /// Returns all file names referenced by SegmentInfo instances matching the provided Directory (ie files associated /// with any "external" segments are skipped). The returned collection is recomputed on each invocation. HashSet files(const DirectoryPtr& dir, bool includeSegmentsFile); void finishCommit(const DirectoryPtr& dir); /// Writes & syncs to the Directory dir, taking care to remove the segments file on exception. void commit(const DirectoryPtr& dir); String segString(const DirectoryPtr& directory); MapStringString getUserData(); void setUserData(MapStringString data); /// Replaces all segments in this instance, but keeps generation, version, counter so that future commits remain /// write once. void replace(const SegmentInfosPtr& other); bool hasExternalSegments(const DirectoryPtr& dir); static int64_t getCurrentSegmentGeneration(HashSet files); static int64_t getCurrentSegmentGeneration(const DirectoryPtr& directory); static String getCurrentSegmentFileName(HashSet files); static String getCurrentSegmentFileName(const DirectoryPtr& directory); static int64_t generationFromSegmentsFileName(const String& fileName); /// Current version number from segments file. static int64_t readCurrentVersion(const DirectoryPtr& directory); /// Returns userData from latest segments file. static MapStringString readCurrentUserData(const DirectoryPtr& directory); /// If non-null, information about retries when loading the segments file will be printed to this. static void setInfoStream(const InfoStreamPtr& infoStream); /// Set how many times to try loading the segments.gen file contents to determine current segment generation. This file /// is only referenced when the primary method (listing the directory) fails. static void setDefaultGenFileRetryCount(int32_t count); /// @see #setDefaultGenFileRetryCount static int32_t getDefaultGenFileRetryCount(); /// Set how many milliseconds to pause in between attempts to load the segments.gen file. static void setDefaultGenFileRetryPauseMsec(int32_t msec); /// @see #setDefaultGenFileRetryPauseMsec static int32_t getDefaultGenFileRetryPauseMsec(); /// Set how many times to try incrementing the gen when loading the segments file. This only runs if the primary /// (listing directory) and secondary (opening segments.gen file) methods fail to find the segments file. static void setDefaultGenLookaheadCount(int32_t count); /// @see #setDefaultGenLookaheadCount static int32_t getDefaultGenLookahedCount(); /// @see #setInfoStream static InfoStreamPtr getInfoStream(); static void message(const String& message); protected: void write(const DirectoryPtr& directory); friend class FindSegmentsFile; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SegmentMergeInfo.h000066400000000000000000000020651456444476200237260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTMERGEINFO_H #define SEGMENTMERGEINFO_H #include "Term.h" namespace Lucene { class SegmentMergeInfo : public LuceneObject { public: SegmentMergeInfo(int32_t b, const TermEnumPtr& te, const IndexReaderPtr& r); virtual ~SegmentMergeInfo(); LUCENE_CLASS(SegmentMergeInfo); protected: TermPositionsPtr postings; // use getPositions() Collection docMap; // use getDocMap() public: TermPtr term; int32_t base; int32_t ord; // the position of the segment in a MultiReader TermEnumPtr termEnum; IndexReaderWeakPtr _reader; int32_t delCount; public: Collection getDocMap(); TermPositionsPtr getPositions(); bool next(); void close(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SegmentMergeQueue.h000066400000000000000000000014141456444476200241140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTMERGEQUEUE_H #define SEGMENTMERGEQUEUE_H #include "PriorityQueue.h" namespace Lucene { class SegmentMergeQueue : public PriorityQueue { public: SegmentMergeQueue(int32_t size); virtual ~SegmentMergeQueue(); LUCENE_CLASS(SegmentMergeQueue); public: void close(); protected: virtual bool lessThan(const SegmentMergeInfoPtr& first, const SegmentMergeInfoPtr& second); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SegmentMerger.h000066400000000000000000000131641456444476200232760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTMERGER_H #define SEGMENTMERGER_H #include "LuceneObject.h" namespace Lucene { /// The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add}, into a single /// Segment. After adding the appropriate readers, call the merge method to combine the segments. /// /// If the compoundFile flag is set, then the segments will be merged into a compound file. /// @see #merge /// @see #add class LPPAPI SegmentMerger : public LuceneObject { public: SegmentMerger(const DirectoryPtr& dir, const String& name); SegmentMerger(const IndexWriterPtr& writer, const String& name, const OneMergePtr& merge); virtual ~SegmentMerger(); LUCENE_CLASS(SegmentMerger); protected: DirectoryPtr directory; String segment; int32_t termIndexInterval; Collection readers; FieldInfosPtr fieldInfos; int32_t mergedDocs; CheckAbortPtr checkAbort; /// Whether we should merge doc stores (stored fields and vectors files). When all segments we /// are merging already share the same doc store files, we don't need to merge the doc stores. bool mergeDocStores; /// Maximum number of contiguous documents to bulk-copy when merging stored fields static const int32_t MAX_RAW_MERGE_DOCS; Collection matchingSegmentReaders; Collection rawDocLengths; Collection rawDocLengths2; SegmentMergeQueuePtr queue; bool omitTermFreqAndPositions; ByteArray payloadBuffer; Collection< Collection > docMaps; Collection delCounts; public: /// norms header placeholder static const uint8_t NORMS_HEADER[]; static const int32_t NORMS_HEADER_LENGTH; public: bool hasProx(); /// Add an IndexReader to the collection of readers that are to be merged void add(const IndexReaderPtr& reader); /// @param i The index of the reader to return /// @return The i'th reader to be merged IndexReaderPtr segmentReader(int32_t i); /// Merges the readers specified by the {@link #add} method into the directory passed to the constructor. /// @return The number of documents that were merged int32_t merge(); /// Merges the readers specified by the {@link #add} method into the directory passed to the constructor. /// @param mergeDocStores if false, we will not merge the stored fields nor vectors files /// @return The number of documents that were merged int32_t merge(bool mergeDocStores); /// close all IndexReaders that have been added. Should not be called before merge(). void closeReaders(); HashSet getMergedFiles(); HashSet createCompoundFile(const String& fileName); /// @return The number of documents in all of the readers int32_t mergeFields(); Collection< Collection > getDocMaps(); Collection getDelCounts(); protected: void addIndexed(const IndexReaderPtr& reader, const FieldInfosPtr& fInfos, HashSet names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions); void setMatchingSegmentReaders(); int32_t copyFieldsWithDeletions(const FieldsWriterPtr& fieldsWriter, const IndexReaderPtr& reader, const FieldsReaderPtr& matchingFieldsReader); int32_t copyFieldsNoDeletions(const FieldsWriterPtr& fieldsWriter, const IndexReaderPtr& reader, const FieldsReaderPtr& matchingFieldsReader); /// Merge the TermVectors from each of the segments into the new one. void mergeVectors(); void copyVectorsWithDeletions(const TermVectorsWriterPtr& termVectorsWriter, const TermVectorsReaderPtr& matchingVectorsReader, const IndexReaderPtr& reader); void copyVectorsNoDeletions(const TermVectorsWriterPtr& termVectorsWriter, const TermVectorsReaderPtr& matchingVectorsReader, const IndexReaderPtr& reader); void mergeTerms(); void mergeTermInfos(const FormatPostingsFieldsConsumerPtr& consumer); /// Process postings from multiple segments all positioned on the same term. Writes out merged entries /// into freqOutput and the proxOutput streams. /// @param smis array of segments /// @param n number of cells in the array actually occupied /// @return number of documents across all segments where this term was found int32_t appendPostings(const FormatPostingsTermsConsumerPtr& termsConsumer, Collection smis, int32_t n); void mergeNorms(); }; class CheckAbort : public LuceneObject { public: CheckAbort(const OneMergePtr& merge, const DirectoryPtr& dir); virtual ~CheckAbort(); LUCENE_CLASS(CheckAbort); protected: double workCount; OneMergePtr merge; DirectoryWeakPtr _dir; public: /// Records the fact that roughly units amount of work have been done since this method was last called. /// When adding time-consuming code into SegmentMerger, you should test different values for units to /// ensure that the time in between calls to merge.checkAborted is up to ~ 1 second. virtual void work(double units); }; class CheckAbortNull : public CheckAbort { public: CheckAbortNull(); virtual ~CheckAbortNull(); LUCENE_CLASS(CheckAbortNull); public: /// do nothing virtual void work(double units); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SegmentReader.h000066400000000000000000000200761456444476200232570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTREADER_H #define SEGMENTREADER_H #include "IndexReader.h" #include "CloseableThreadLocal.h" namespace Lucene { class LPPAPI SegmentReader : public IndexReader { public: SegmentReader(); virtual ~SegmentReader(); LUCENE_CLASS(SegmentReader); protected: bool readOnly; INTERNAL: BitVectorPtr deletedDocs; SegmentReaderRefPtr deletedDocsRef; CoreReadersPtr core; FieldsReaderLocalPtr fieldsReaderLocal; SegmentInfoPtr rollbackSegmentInfo; CloseableThreadLocal termVectorsLocal; FieldInfosPtr fieldInfos(); /// Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. /// @return TermVectorsReader TermVectorsReaderPtr getTermVectorsReader(); TermVectorsReaderPtr getTermVectorsReaderOrig(); FieldsReaderPtr getFieldsReader(); MapStringNorm _norms; private: SegmentInfoPtr si; int32_t readBufferSize; bool deletedDocsDirty; bool normsDirty; int32_t pendingDeleteCount; bool rollbackHasChanges; bool rollbackDeletedDocsDirty; bool rollbackNormsDirty; int32_t rollbackPendingDeleteCount; // optionally used for the .nrm file shared by multiple norms IndexInputPtr singleNormStream; SegmentReaderRefPtr singleNormRef; public: virtual void initialize(); using IndexReader::document; using IndexReader::termPositions; static SegmentReaderPtr get(bool readOnly, const SegmentInfoPtr& si, int32_t termInfosIndexDivisor); static SegmentReaderPtr get(bool readOnly, const DirectoryPtr& dir, const SegmentInfoPtr& si, int32_t readBufferSize, bool doOpenStores, int32_t termInfosIndexDivisor); void openDocStores(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual LuceneObjectPtr clone(bool openReadOnly, const LuceneObjectPtr& other = LuceneObjectPtr()); SegmentReaderPtr reopenSegment(const SegmentInfoPtr& si, bool doClone, bool openReadOnly); static bool hasDeletions(const SegmentInfoPtr& si); /// Returns true if any documents have been deleted virtual bool hasDeletions(); static bool usesCompoundFile(const SegmentInfoPtr& si); static bool hasSeparateNorms(const SegmentInfoPtr& si); HashSet files(); /// Returns an enumeration of all the terms in the index. virtual TermEnumPtr terms(); /// Returns an enumeration of all terms starting at a given term. virtual TermEnumPtr terms(const TermPtr& t); /// Get the {@link Document} at the n'th position. virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector); /// Returns true if document n has been deleted virtual bool isDeleted(int32_t n); /// Returns an enumeration of all the documents which contain term. virtual TermDocsPtr termDocs(const TermPtr& term); /// Returns an unpositioned {@link TermDocs} enumerator. virtual TermDocsPtr termDocs(); /// Returns an unpositioned {@link TermPositions} enumerator. virtual TermPositionsPtr termPositions(); /// Returns the number of documents containing the term t. virtual int32_t docFreq(const TermPtr& t); /// Returns the number of documents in this index. virtual int32_t numDocs(); /// Returns one greater than the largest possible document number. virtual int32_t maxDoc(); /// Get a list of unique field names that exist in this index and have the specified field option information. virtual HashSet getFieldNames(FieldOption fieldOption); /// Returns true if there are norms stored for this field. virtual bool hasNorms(const String& field); /// Returns the byte-encoded normalization factor for the named field of every document. virtual ByteArray norms(const String& field); /// Read norms into a pre-allocated array. virtual void norms(const String& field, ByteArray norms, int32_t offset); bool termsIndexLoaded(); /// NOTE: only called from IndexWriter when a near real-time reader is opened, or applyDeletes is run, sharing a /// segment that's still being merged. This method is not thread safe, and relies on the synchronization in IndexWriter void loadTermsIndex(int32_t termsIndexDivisor); bool normsClosed(); // for testing only bool normsClosed(const String& field); // for testing only /// Return a term frequency vector for the specified document and field. The vector returned contains term /// numbers and frequencies for all terms in the specified field of this document, if the field had /// storeTermVector flag set. If the flag was not set, the method returns null. virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays /// of the {@link TermFreqVector}. virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper); /// Map all the term vectors for all fields in a Document virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper); /// Return an array of term frequency vectors for the specified document. The array contains a vector for /// each vectorized field in the document. Each vector vector contains term numbers and frequencies for all /// terms in a given vectorized field. If no such fields existed, the method returns null. virtual Collection getTermFreqVectors(int32_t docNumber); /// Return the name of the segment this reader is reading. String getSegmentName(); /// Return the SegmentInfo of the segment this reader is reading. SegmentInfoPtr getSegmentInfo(); void setSegmentInfo(const SegmentInfoPtr& info); void startCommit(); void rollbackCommit(); /// Returns the directory this index resides in. virtual DirectoryPtr directory(); /// This is necessary so that cloned SegmentReaders (which share the underlying postings data) /// will map to the same entry in the FieldCache. virtual LuceneObjectPtr getFieldCacheKey(); virtual LuceneObjectPtr getDeletesCacheKey(); /// Returns the number of unique terms (across all fields) in this reader. virtual int64_t getUniqueTermCount(); static SegmentReaderPtr getOnlySegmentReader(const DirectoryPtr& dir); static SegmentReaderPtr getOnlySegmentReader(const IndexReaderPtr& reader); virtual int32_t getTermInfosIndexDivisor(); protected: bool checkDeletedCounts(); void loadDeletedDocs(); /// Clones the norm bytes. May be overridden by subclasses. /// @param bytes Byte array to clone /// @return New BitVector virtual ByteArray cloneNormBytes(ByteArray bytes); /// Clones the deleteDocs BitVector. May be overridden by subclasses. /// @param bv BitVector to clone /// @return New BitVector virtual BitVectorPtr cloneDeletedDocs(const BitVectorPtr& bv); /// Implements commit. virtual void doCommit(MapStringString commitUserData); virtual void commitChanges(MapStringString commitUserData); /// Implements close. virtual void doClose(); /// Implements deletion of the document numbered docNum. /// Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}. virtual void doDelete(int32_t docNum); /// Implements actual undeleteAll() in subclass. virtual void doUndeleteAll(); /// can return null if norms aren't stored ByteArray getNorms(const String& field); /// Implements setNorm in subclass. virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); void openNorms(const DirectoryPtr& cfsDir, int32_t readBufferSize); friend class ReaderPool; friend class IndexWriter; friend class Norm; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SegmentTermDocs.h000066400000000000000000000045241456444476200235750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTTERMDOCS_H #define SEGMENTTERMDOCS_H #include "TermPositions.h" namespace Lucene { class LPPAPI SegmentTermDocs : public TermPositions, public LuceneObject { public: SegmentTermDocs(const SegmentReaderPtr& parent); virtual ~SegmentTermDocs(); LUCENE_CLASS(SegmentTermDocs); protected: SegmentReaderWeakPtr _parent; SegmentReader* __parent; IndexInputPtr _freqStream; IndexInput* __freqStream; int32_t count; int32_t df; BitVectorPtr deletedDocs; BitVector* __deletedDocs; int32_t _doc; int32_t _freq; int32_t skipInterval; int32_t maxSkipLevels; DefaultSkipListReaderPtr skipListReader; int64_t freqBasePointer; int64_t proxBasePointer; int64_t skipPointer; bool haveSkipped; bool currentFieldStoresPayloads; bool currentFieldOmitTermFreqAndPositions; public: /// Sets this to the data for a term. virtual void seek(const TermPtr& term); /// Sets this to the data for the current term in a {@link TermEnum}. virtual void seek(const TermEnumPtr& termEnum); virtual void seek(const TermInfoPtr& ti, const TermPtr& term); virtual void close(); /// Returns the current document number. virtual int32_t doc(); /// Returns the frequency of the term within the current document. virtual int32_t freq(); /// Moves to the next pair in the enumeration. virtual bool next(); /// Optimized implementation. virtual int32_t read(Collection& docs, Collection& freqs); /// Optimized implementation. virtual bool skipTo(int32_t target); /// Used for testing virtual IndexInputPtr freqStream(); virtual void freqStream(const IndexInputPtr& freqStream); protected: virtual void skippingDoc(); virtual int32_t readNoTf(Collection& docs, Collection& freqs, int32_t length); /// Overridden by SegmentTermPositions to skip in prox stream. virtual void skipProx(int64_t proxPointer, int32_t payloadLength); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SegmentTermEnum.h000066400000000000000000000052671456444476200236160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTTERMENUM_H #define SEGMENTTERMENUM_H #include "TermEnum.h" namespace Lucene { class LPPAPI SegmentTermEnum : public TermEnum { public: SegmentTermEnum(); SegmentTermEnum(const IndexInputPtr& i, const FieldInfosPtr& fis, bool isi); virtual ~SegmentTermEnum(); LUCENE_CLASS(SegmentTermEnum); protected: IndexInputPtr input; TermBufferPtr termBuffer; TermBufferPtr prevBuffer; TermBufferPtr scanBuffer; // used for scanning TermInfoPtr _termInfo; int32_t format; bool isIndex; int32_t formatM1SkipInterval; public: FieldInfosPtr fieldInfos; int64_t size; int64_t position; int64_t indexPointer; int32_t indexInterval; int32_t skipInterval; int32_t maxSkipLevels; public: virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); void seek(int64_t pointer, int64_t p, const TermPtr& t, const TermInfoPtr& ti); /// Increments the enumeration to the next element. True if one exists. virtual bool next(); /// Optimized scan, without allocating new terms. Return number of invocations to next(). int32_t scanTo(const TermPtr& term); /// Returns the current Term in the enumeration. /// Initially invalid, valid after next() called for the first time. virtual TermPtr term(); /// Returns the previous Term enumerated. Initially null. TermPtr prev(); /// Returns the current TermInfo in the enumeration. /// Initially invalid, valid after next() called for the first time. TermInfoPtr termInfo(); /// Sets the argument to the current TermInfo in the enumeration. /// Initially invalid, valid after next() called for the first time. void termInfo(const TermInfoPtr& ti); /// Returns the docFreq of the current Term in the enumeration. /// Initially invalid, valid after next() called for the first time. virtual int32_t docFreq(); /// Returns the freqPointer from the current TermInfo in the enumeration. /// Initially invalid, valid after next() called for the first time. int64_t freqPointer(); /// Returns the proxPointer from the current TermInfo in the enumeration. /// Initially invalid, valid after next() called for the first time. int64_t proxPointer(); /// Closes the enumeration to further activity, freeing resources. virtual void close(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SegmentTermPositionVector.h000066400000000000000000000031541456444476200256720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTTERMPOSITIONVECTOR_H #define SEGMENTTERMPOSITIONVECTOR_H #include "SegmentTermVector.h" namespace Lucene { class LPPAPI SegmentTermPositionVector : public SegmentTermVector { public: SegmentTermPositionVector(const String& field, Collection terms, Collection termFreqs, Collection< Collection > positions, Collection< Collection > offsets); virtual ~SegmentTermPositionVector(); LUCENE_CLASS(SegmentTermPositionVector); protected: Collection< Collection > positions; Collection< Collection > offsets; protected: static const Collection EMPTY_TERM_POS(); public: /// Returns an array of TermVectorOffsetInfo in which the term is found. /// @param index The position in the array to get the offsets from /// @return An array of TermVectorOffsetInfo objects or the empty list virtual Collection getOffsets(int32_t index); /// Returns an array of positions in which the term is found. /// Terms are identified by the index at which its number appears in the term String array obtained from the indexOf method. virtual Collection getTermPositions(int32_t index); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SegmentTermPositions.h000066400000000000000000000051761456444476200247000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTTERMPOSITIONS_H #define SEGMENTTERMPOSITIONS_H #include "SegmentTermDocs.h" namespace Lucene { class LPPAPI SegmentTermPositions : public SegmentTermDocs { public: SegmentTermPositions(const SegmentReaderPtr& parent); virtual ~SegmentTermPositions(); LUCENE_CLASS(SegmentTermPositions); protected: IndexInputPtr proxStream; int32_t proxCount; int32_t position; /// The current payload length int32_t payloadLength; /// Indicates whether the payload of the current position has been read from the proxStream yet bool needToLoadPayload; // these variables are being used to remember information for a lazy skip int64_t lazySkipPointer; int32_t lazySkipProxCount; public: using SegmentTermDocs::seek; virtual void seek(const TermInfoPtr& ti, const TermPtr& term); virtual void close(); /// Returns next position in the current document. virtual int32_t nextPosition(); /// Moves to the next pair in the enumeration. virtual bool next(); /// Not supported virtual int32_t read(Collection& docs, Collection& freqs); /// Returns the length of the payload at the current term position. virtual int32_t getPayloadLength(); /// Returns the payload data at the current term position. virtual ByteArray getPayload(ByteArray data, int32_t offset); /// Checks if a payload can be loaded at this position. virtual bool isPayloadAvailable(); protected: int32_t readDeltaPosition(); virtual void skippingDoc(); virtual void skipProx(int64_t proxPointer, int32_t payloadLength); virtual void skipPositions(int32_t n); virtual void skipPayload(); /// It is not always necessary to move the prox pointer to a new document after the freq pointer has /// been moved. Consider for example a phrase query with two terms: the freq pointer for term 1 has to /// move to document x to answer the question if the term occurs in that document. But only if term 2 /// also matches document x, the positions have to be read to figure out if term 1 and term 2 appear next /// to each other in document x and thus satisfy the query. So we move the prox pointer lazily to the /// document as soon as positions are requested. virtual void lazySkip(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SegmentTermVector.h000066400000000000000000000031271456444476200241450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTTERMVECTOR_H #define SEGMENTTERMVECTOR_H #include "TermPositionVector.h" namespace Lucene { class LPPAPI SegmentTermVector : public TermPositionVector, public LuceneObject { public: SegmentTermVector(const String& field, Collection terms, Collection termFreqs); virtual ~SegmentTermVector(); LUCENE_CLASS(SegmentTermVector); protected: String field; Collection terms; Collection termFreqs; public: /// @return The number of the field this vector is associated with virtual String getField(); virtual String toString(); /// @return The number of terms in the term vector. virtual int32_t size(); /// @return An Array of term texts in ascending order. virtual Collection getTerms(); /// @return Array of term frequencies. virtual Collection getTermFrequencies(); /// Return an index in the term numbers array returned from getTerms at which the term with the /// specified term appears. virtual int32_t indexOf(const String& term); /// Just like indexOf(int) but searches for a number of terms at the same time. virtual Collection indexesOf(Collection termNumbers, int32_t start, int32_t length); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SegmentWriteState.h000066400000000000000000000022001456444476200241350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SEGMENTWRITESTATE_H #define SEGMENTWRITESTATE_H #include "LuceneObject.h" namespace Lucene { class SegmentWriteState : public LuceneObject { public: SegmentWriteState(const DocumentsWriterPtr& docWriter, const DirectoryPtr& directory, const String& segmentName, const String& docStoreSegmentName, int32_t numDocs, int32_t numDocsInStore, int32_t termIndexInterval); virtual ~SegmentWriteState(); LUCENE_CLASS(SegmentWriteState); public: DocumentsWriterWeakPtr _docWriter; DirectoryPtr directory; String segmentName; String docStoreSegmentName; int32_t numDocs; int32_t termIndexInterval; int32_t numDocsInStore; HashSet flushedFiles; public: String segmentFileName(const String& ext); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SerialMergeScheduler.h000066400000000000000000000017501456444476200245660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SERIALMERGESCHEDULER_H #define SERIALMERGESCHEDULER_H #include "MergeScheduler.h" namespace Lucene { /// A {@link MergeScheduler} that simply does each merge sequentially, using the current thread. class LPPAPI SerialMergeScheduler : public MergeScheduler { public: virtual ~SerialMergeScheduler(); LUCENE_CLASS(SerialMergeScheduler); public: /// Just do the merges in sequence. We do this "synchronized" so that even if the application is using /// multiple threads, only one merge may run at a time. virtual void merge(const IndexWriterPtr& writer); /// Close this MergeScheduler. virtual void close(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Set.h000066400000000000000000000062471456444476200212710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SET_H #define SET_H #include #include "LuceneSync.h" namespace Lucene { /// Utility template class to handle set based collections that can be safely copied and shared template < class TYPE, class LESS = std::less > class Set : public LuceneSync { public: typedef Set this_type; typedef std::set set_type; typedef typename set_type::iterator iterator; typedef typename set_type::const_iterator const_iterator; typedef TYPE value_type; virtual ~Set() { } protected: boost::shared_ptr setContainer; public: static this_type newInstance() { this_type instance; instance.setContainer = Lucene::newInstance(); return instance; } template static this_type newInstance(ITER first, ITER last) { this_type instance; instance.setContainer = Lucene::newInstance(first, last); return instance; } void reset() { setContainer.reset(); } int32_t size() const { return (int32_t)setContainer->size(); } bool empty() const { return setContainer->empty(); } void clear() { setContainer->clear(); } iterator begin() { return setContainer->begin(); } iterator end() { return setContainer->end(); } const_iterator begin() const { return setContainer->begin(); } const_iterator end() const { return setContainer->end(); } bool add(const TYPE& type) { return setContainer->insert(type).second; } template void addAll(ITER first, ITER last) { setContainer->insert(first, last); } bool remove(const TYPE& type) { return (setContainer->erase(type) > 0); } iterator find(const TYPE& type) { return setContainer->find(type); } bool contains(const TYPE& type) const { return (setContainer->find(type) != setContainer->end()); } bool equals(const this_type& other) const { return equals(other, std::equal_to()); } template bool equals(const this_type& other, PRED comp) const { if (setContainer->size() != other.setContainer->size()) { return false; } return std::equal(setContainer->begin(), setContainer->end(), other.setContainer->begin(), comp); } void swap(this_type& other) { setContainer.swap(other->setContainer); } operator bool() const { return setContainer.get() != NULL; } bool operator! () const { return !setContainer; } bool operator== (const this_type& other) { return (setContainer == other.setContainer); } bool operator!= (const this_type& other) { return (setContainer != other.setContainer); } }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SetBasedFieldSelector.h000066400000000000000000000033071456444476200246670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SETBASEDFIELDSELECTOR_H #define SETBASEDFIELDSELECTOR_H #include "FieldSelector.h" namespace Lucene { /// Declare what fields to load normally and what fields to load lazily class LPPAPI SetBasedFieldSelector : public FieldSelector { public: /// Pass in the Set of {@link Field} names to load and the Set of {@link Field} names to load lazily. /// If both are null, the Document will not have any {@link Field} on it. /// @param fieldsToLoad A Set of {@link String} field names to load. May be empty, but not null /// @param lazyFieldsToLoad A Set of {@link String} field names to load lazily. May be empty, but not null SetBasedFieldSelector(HashSet fieldsToLoad, HashSet lazyFieldsToLoad); virtual ~SetBasedFieldSelector(); LUCENE_CLASS(SetBasedFieldSelector); protected: HashSet fieldsToLoad; HashSet lazyFieldsToLoad; public: /// Indicate whether to load the field with the given name or not. If the {@link Field#name()} is not in /// either of the initializing Sets, then {@link FieldSelectorResult#NO_LOAD} is returned. If a Field name /// is in both fieldsToLoad and lazyFieldsToLoad, lazy has precedence. /// @param fieldName The {@link Field} name to check /// @return The {@link FieldSelectorResult} virtual FieldSelectorResult accept(const String& fieldName); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Similarity.h000066400000000000000000000756331456444476200226710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMILARITY_H #define SIMILARITY_H #include "Explanation.h" namespace Lucene { /// Scoring API. /// /// Similarity defines the components of Lucene scoring. Overriding computation of these components is /// a convenient way to alter Lucene scoring. /// /// Suggested reading: /// Introduction To Information Retrieval, Chapter 6. /// /// The following describes how Lucene scoring evolves from underlying information retrieval models to /// (efficient) implementation. We first brief on VSM Score, then derive from it Lucene's Conceptual Scoring /// Formula, from which, finally, evolves Lucene's Practical Scoring Function (the latter is connected directly /// with Lucene classes and methods). /// /// Lucene combines Boolean model (BM) of /// Information Retrieval with Vector Space Model /// (VSM) of Information Retrieval - documents "approved" by BM are scored by VSM. /// /// In VSM, documents and queries are represented as weighted vectors in a multi-dimensional space, where each /// distinct index term is a dimension, and weights are Tf-idf /// values. /// /// VSM does not require weights to be Tf-idf values, but Tf-idf values are believed to produce search results /// of high quality, and so Lucene is using Tf-idf. Tf and Idf are described in more detail below, but for now, /// for completion, let's just say that for given term t and document (or query) x, Tf(t,x) varies with the /// number of occurrences of term t in x (when one increases so does the other) and idf(t) similarly varies with /// the inverse of the number of index documents containing term t. /// /// VSM score of document d for query q is the Cosine /// Similarity of the weighted query vectors V(q) and V(d): /// ///
 
/// /// /// ///
/// /// ///
/// /// /// /// /// ///
/// cosine-similarity(q,d)   =   /// /// /// /// /// ///
V(q) · V(d)
–––––––––
|V(q)| |V(d)|
///
///
///
///
VSM Score
///
///
 
/// /// Where V(q) · V(d) is the dot product of the /// weighted vectors, and |V(q)| and |V(d)| are their /// Euclidean norms. /// /// Note: the above equation can be viewed as the dot product of the normalized weighted vectors, in the sense /// that dividing V(q) by its euclidean norm is normalizing it to a unit vector. /// /// Lucene refines VSM score for both search quality and usability: ///
    ///
  • Normalizing V(d) to the unit vector is known to be problematic in that it removes all document length /// information. For some documents removing this info is probably ok, eg. a document made by duplicating a /// certain paragraph 10 times, especially if that paragraph is made of distinct terms. But for a document which /// contains no duplicated paragraphs, this might be wrong. To avoid this problem, a different document length /// normalization factor is used, which normalizes to a vector equal to or larger than the unit vector: /// doc-len-norm(d). ///
  • ///
  • At indexing, users can specify that certain documents are more important than others, by assigning a /// document boost. For this, the score of each document is also multiplied by its boost value doc-boost(d). ///
  • ///
  • Lucene is field based, hence each query term applies to a single field, document length normalization /// is by the length of the certain field, and in addition to document boost there are also document fields /// boosts. ///
  • ///
  • The same field can be added to a document during indexing several times, and so the boost of that field /// is the multiplication of the boosts of the separate additions (or parts) of that field within the document. ///
  • ///
  • At search time users can specify boosts to each query, sub-query, and each query term, hence the /// contribution of a query term to the score of a document is multiplied by the boost of that query term /// query-boost(q). ///
  • ///
  • A document may match a multi term query without containing all the terms of that query (this is correct /// for some of the queries), and users can further reward documents matching more query terms through a /// coordination factor, which is usually larger when more terms are matched: coord-factor(q,d). ///
  • ///
/// /// Under the simplifying assumption of a single field in the index, we get Lucene's Conceptual scoring formula: /// ///
 
/// /// /// ///
/// /// ///
/// /// /// /// /// /// ///
/// score(q,d)   =   /// coord-factor(q,d) ·   /// query-boost(q) ·   /// /// /// /// /// ///
V(q) · V(d)
–––––––––
|V(q)|
///
///   ·   doc-len-norm(d) ///   ·   doc-boost(d) ///
///
///
///
Lucene Conceptual Scoring Formula
///
///
 
/// /// The conceptual formula is a simplification in the sense that (1) terms and documents are fielded and (2) /// boosts are usually per query term rather than per query. /// /// We now describe how Lucene implements this conceptual scoring formula, and derive from it Lucene's Practical /// Scoring Function. /// /// For efficient score computation some scoring components are computed and aggregated in advance: ///
    ///
  • Query-boost for the query (actually for each query term) is known when search starts. ///
  • ///
  • Query Euclidean norm |V(q)| can be computed when search starts, as it is independent of the document /// being scored. From search optimization perspective, it is a valid question why bother to normalize the /// query at all, because all scored documents will be multiplied by the same |V(q)|, and hence documents ranks /// (their order by score) will not be affected by this normalization. There are two good reasons to keep this /// normalization: ///
      ///
    • Recall that Cosine Similarity can be used /// find how similar two documents are. One can use Lucene for eg. clustering, and use a document as a query to /// compute its similarity to other documents. In this use case it is important that the score of document d3 /// for query d1 is comparable to the score of document d3 for query d2. In other words, scores of a document for /// two distinct queries should be comparable. There are other applications that may require this. And this is /// exactly what normalizing the query vector V(q) provides: comparability (to a certain extent) of two or more /// queries. ///
    • ///
    • Applying query normalization on the scores helps to keep the scores around the unit vector, hence preventing /// loss of score data because of floating point precision limitations. ///
    • ///
    ///
  • ///
  • Document length norm doc-len-norm(d) and document boost doc-boost(d) are known at indexing time. They are /// computed in advance and their multiplication is saved as a single value in the index: norm(d). (In the equations /// below, norm(t in d) means norm(field(t) in doc d) where field(t) is the field associated with term t.) ///
  • ///
/// /// Lucene's Practical Scoring Function is derived from the above. The color codes demonstrate how it relates to /// those of the conceptual formula: /// /// /// /// ///
/// /// ///
/// /// /// /// /// /// /// /// /// /// /// ///
/// score(q,d)   =   /// coord(q,d)  ·  /// queryNorm(q)  ·  /// /// /// /// ( /// tf(t in d)  ·  /// idf(t)2  ·  /// t.getBoost() ·  /// norm(t,d) /// ) ///
t in q
///
///
///
Lucene Practical Scoring Function
///
/// /// where ///
    ///
  1. /// /// tf(t in d) /// correlates to the term's frequency, defined as the number of times term t appears in the currently /// scored document d. Documents that have more occurrences of a given term receive a higher score. /// Note that tf(t in q) is assumed to be 1 and therefore it does not appear in this equation, /// However if a query contains twice the same term, there will be two term-queries with that same term /// and hence the computation would still be correct (although not very efficient). /// The default computation for tf(t in d) in {@link DefaultSimilarity#tf(float) DefaultSimilarity} is: /// ///
     
    /// /// /// /// /// ///
    /// {@link DefaultSimilarity#tf(float) tf(t in d)}   =   /// /// frequency½ ///
    ///
     
    ///
  2. /// ///
  3. /// /// idf(t) stands for Inverse Document Frequency. This value correlates to the inverse of docFreq /// (the number of documents in which the term t appears). This means rarer terms give higher contribution /// to the total score. idf(t) appears for t in both the query and the document, hence it is squared in /// the equation. The default computation for idf(t) in {@link DefaultSimilarity#idf(int, int) DefaultSimilarity} is: /// ///
     
    /// /// /// /// /// /// /// ///
    /// {@link DefaultSimilarity#idf(int, int) idf(t)}  =   /// /// 1 + log ( /// /// /// /// /// ///
    numDocs
    –––––––––
    docFreq+1
    ///
    /// ) ///
    ///
     
    ///
  4. /// ///
  5. /// /// coord(q,d) /// is a score factor based on how many of the query terms are found in the specified document. Typically, a /// document that contains more of the query's terms will receive a higher score than another document with /// fewer query terms. This is a search time factor computed in {@link #coord(int, int) coord(q,d)} by the /// Similarity in effect at search time. ///
     
    ///
  6. /// ///
  7. /// /// queryNorm(q) /// /// is a normalizing factor used to make scores between queries comparable. This factor does not affect /// document ranking (since all ranked documents are multiplied by the same factor), but rather just attempts /// to make scores from different queries (or even different indexes) comparable. This is a search time /// factor computed by the Similarity in effect at search time. /// /// The default computation in {@link DefaultSimilarity#queryNorm(float) DefaultSimilarity} /// produces a Euclidean norm: ///
     
    /// /// /// /// /// ///
    /// queryNorm(q)   =   /// {@link DefaultSimilarity#queryNorm(float) queryNorm(sumOfSquaredWeights)} ///   =   /// /// /// /// /// ///
    1
    /// –––––––––––––– ///
    sumOfSquaredWeights½
    ///
    ///
     
    /// /// The sum of squared weights (of the query terms) is computed by the query {@link Weight} object. For example, /// a {@link BooleanQuery boolean query} computes this value as: /// ///
     
    /// /// /// /// /// /// /// /// /// /// /// ///
    /// {@link Weight#sumOfSquaredWeights() sumOfSquaredWeights}   =   /// {@link Query#getBoost() q.getBoost()} 2 ///  ·  /// /// /// /// ( /// idf(t)  ·  /// t.getBoost() /// ) 2 ///
    t in q
    ///
     
    /// ///
  8. /// ///
  9. /// /// t.getBoost() /// is a search time boost of term t in the query q as specified in the query text or as set by application /// calls to {@link Query#setBoost(float) setBoost()}. Notice that there is really no direct API for accessing /// a boost of one term in a multi term query, but rather multi terms are represented in a query as multi /// {@link TermQuery TermQuery} objects, and so the boost of a term in the query is accessible by calling /// the sub-query {@link Query#getBoost() getBoost()}. ///
     
    ///
  10. /// ///
  11. /// /// norm(t,d) encapsulates a few (indexing time) boost and length factors: /// ///
      ///
    • Document boost - set by calling /// {@link Document#setBoost(float) doc.setBoost()} /// before adding the document to the index. ///
    • ///
    • Field boost - set by calling /// {@link Fieldable#setBoost(float) field.setBoost()} /// before adding the field to a document. ///
    • ///
    • {@link #lengthNorm(String, int) lengthNorm(field)} - computed when the document is added to /// the index in accordance with the number of tokens of this field in the document, so that shorter fields /// contribute more to the score. LengthNorm is computed by the Similarity class in effect at indexing. ///
    • ///
    /// /// When a document is added to the index, all the above factors are multiplied. /// If the document has multiple fields with the same name, all their boosts are multiplied together: /// ///
     
    /// /// /// /// /// /// /// /// /// /// /// ///
    /// norm(t,d)   =   /// {@link Document#getBoost() doc.getBoost()} ///  ·  /// {@link #lengthNorm(String, int) lengthNorm(field)} ///  ·  /// /// /// /// {@link Fieldable#getBoost() f.getBoost}() ///
    field f in d named as t
    ///
     
    /// However the resulted norm value is {@link #encodeNorm(float) encoded} as a single byte before being stored. /// At search time, the norm byte value is read from the index {@link Directory directory} and {@link /// #decodeNorm(byte) decoded} back to a float norm value. This encoding/decoding, while reducing index size, /// comes with the price of precision loss - it is not guaranteed that decode(encode(x)) = x. For instance, /// decode(encode(0.89)) = 0.75. ///
     
    /// Compression of norm values to a single byte saves memory at search time, because once a field is referenced /// at search time, its norms - for all documents - are maintained in memory. ///
     
    /// The rationale supporting such lossy compression of norm values is that given the difficulty (and inaccuracy) /// of users to express their true information need by a query, only big differences matter. ///
     
    /// Last, note that search time is too late to modify this norm part of scoring, eg. by using a different /// {@link Similarity} for search. ///
     
    ///
  12. ///
/// /// @see #setDefault(SimilarityPtr) /// @see IndexWriter#setSimilarity(SimilarityPtr) /// @see Searcher#setSimilarity(SimilarityPtr) class LPPAPI Similarity : public LuceneObject { public: Similarity(); virtual ~Similarity(); LUCENE_CLASS(Similarity); protected: static const int32_t NO_DOC_ID_PROVIDED; public: static const Collection NORM_TABLE; public: /// Return the default Similarity implementation used by indexing and search code. /// This is initially an instance of {@link DefaultSimilarity}. /// @see Searcher#setSimilarity(SimilarityPtr) /// @see IndexWriter#setSimilarity(SimilarityPtr) static SimilarityPtr getDefault(); /// Decodes a normalization factor stored in an index. /// @see #encodeNorm(double) static double decodeNorm(uint8_t b); /// Returns a table for decoding normalization bytes. /// @see #encodeNorm(double) static const Collection& getNormDecoder(); /// Compute the normalization value for a field, given the accumulated state of term processing for this /// field (see {@link FieldInvertState}). /// /// Implementations should calculate a float value based on the field state and then return that value. /// /// For backward compatibility this method by default calls {@link #lengthNorm(String, int32_t)} passing /// {@link FieldInvertState#getLength()} as the second argument, and then multiplies this value by {@link /// FieldInvertState#getBoost()}. /// /// @param field Field name /// @param state Current processing state for this field /// @return The calculated float norm virtual double computeNorm(const String& fieldName, const FieldInvertStatePtr& state); /// Computes the normalization value for a field given the total number of terms contained in a field. /// These values, together with field boosts, are stored in an index and multiplied into scores for hits /// on each field by the search code. /// /// Matches in longer fields are less precise, so implementations of this method usually return smaller /// values when numTokens is large, and larger values when numTokens is small. /// /// Note that the return values are computed under {@link IndexWriter#addDocument(DocumentPtr)} and then /// stored using {@link #encodeNorm(double)}. Thus they have limited precision, and documents must be /// re-indexed if this method is altered. /// /// @param fieldName The name of the field /// @param numTokens The total number of tokens contained in fields named fieldName of doc. /// @return A normalization factor for hits on this field of this document /// @see Field#setBoost(double) virtual double lengthNorm(const String& fieldName, int32_t numTokens) = 0; /// Computes the normalization value for a query given the sum of the squared weights of each of the query /// terms. This value is multiplied into the weight of each query term. While the classic query /// normalization factor is computed as 1/sqrt(sumOfSquaredWeights), other implementations might completely /// ignore sumOfSquaredWeights (ie return 1). /// /// This does not affect ranking, but the default implementation does make scores from different queries /// more comparable than they would be by eliminating the magnitude of the Query vector as a factor in the /// score. /// /// @param sumOfSquaredWeights The sum of the squares of query term weights /// @return a normalization factor for query weights virtual double queryNorm(double sumOfSquaredWeights) = 0; /// Encodes a normalization factor for storage in an index. /// /// The encoding uses a three-bit mantissa, a five-bit exponent, and the zero-exponent point at 15, thus /// representing values from around 7x10^9 to 2x10^-9 with about one significant decimal digit of accuracy. /// Zero is also represented. Negative numbers are rounded up to zero. Values too large to represent /// are rounded down to the largest representable value. Positive values too small to represent are rounded /// up to the smallest positive representable value. /// /// @see Field#setBoost(double) static uint8_t encodeNorm(double f); /// Computes a score factor based on a term or phrase's frequency in a document. This value is multiplied /// by the {@link #idf(int32_t, int32_t)} factor for each term in the query and these products are then /// summed to form the initial score for a document. /// /// Terms and phrases repeated in a document indicate the topic of the document, so implementations of this /// method usually return larger values when freq is large, and smaller values when freq is small. /// /// The default implementation calls {@link #tf(double)}. /// /// @param freq The frequency of a term within a document /// @return A score factor based on a term's within-document frequency virtual double tf(int32_t freq); /// Computes the amount of a sloppy phrase match, based on an edit distance. This value is summed for /// each sloppy phrase match in a document to form the frequency that is passed to {@link #tf(double)}. /// /// A phrase match with a small edit distance to a document passage more closely matches the document, so /// implementations of this method usually return larger values when the edit distance is small and /// smaller values when it is large. /// /// @see PhraseQuery#setSlop(int32_t) /// @param distance The edit distance of this sloppy phrase match /// @return The frequency increment for this match virtual double sloppyFreq(int32_t distance) = 0; /// Computes a score factor based on a term or phrase's frequency in a document. This value is multiplied /// by the {@link #idf(int32_t, int32_t)} factor for each term in the query and these products are then /// summed to form the initial score for a document. /// /// Terms and phrases repeated in a document indicate the topic of the document, so implementations of this /// method usually return larger values when freq is large, and smaller values when freq is small. /// /// @param freq The frequency of a term within a document /// @return A score factor based on a term's within-document frequency virtual double tf(double freq) = 0; /// Computes a score factor for a simple term and returns an explanation for that score factor. /// /// The default implementation uses: ///
    /// idf(searcher->docFreq(term), searcher->maxDoc());
    /// 
/// /// Note that {@link Searcher#maxDoc()} is used instead of {@link IndexReader#numDocs() IndexReader#numDocs()} /// because also {@link Searcher#docFreq(TermPtr)} is used, and when the latter is inaccurate, so is {@link /// Searcher#maxDoc()}, and in the same direction. In addition, {@link Searcher#maxDoc()} is more efficient /// to compute. /// /// @param term The term in question /// @param searcher The document collection being searched /// @return An IDFExplain object that includes both an idf score factor and an explanation for the term. virtual IDFExplanationPtr idfExplain(const TermPtr& term, const SearcherPtr& searcher); /// Computes a score factor for a phrase. /// /// The default implementation sums the idf factor for each term in the phrase. /// /// @param terms The terms in the phrase /// @param searcher The document collection being searched /// @return An IDFExplain object that includes both an idf score factor for the phrase and an explanation /// for each term. virtual IDFExplanationPtr idfExplain(Collection terms, const SearcherPtr& searcher); /// Computes a score factor based on a term's document frequency (the number of documents which contain the /// term). This value is multiplied by the {@link #tf(int32_t)} factor for each term in the query and these /// products are then summed to form the initial score for a document. /// /// Terms that occur in fewer documents are better indicators of topic, so implementations of this method /// usually return larger values for rare terms, and smaller values for common terms. /// /// @param docFreq The number of documents which contain the term /// @param numDocs The total number of documents in the collection /// @return A score factor based on the term's document frequency virtual double idf(int32_t docFreq, int32_t numDocs) = 0; /// Computes a score factor based on the fraction of all query terms that a document contains. This value /// is multiplied into scores. /// /// The presence of a large portion of the query terms indicates a better match with the query, so /// implementations of this method usually return larger values when the ratio between these parameters is /// large and smaller values when the ratio between them is small. /// /// @param overlap The number of query terms matched in the document /// @param maxOverlap The total number of terms in the query /// @return A score factor based on term overlap with the query virtual double coord(int32_t overlap, int32_t maxOverlap) = 0; /// Calculate a scoring factor based on the data in the payload. Overriding implementations are responsible /// for interpreting what is in the payload. Lucene makes no assumptions about what is in the byte array. /// /// The default implementation returns 1. /// /// @param docId The docId currently being scored. If this value is {@link #NO_DOC_ID_PROVIDED}, then it /// should be assumed that the PayloadQuery implementation does not provide document information /// @param fieldName The fieldName of the term this payload belongs to /// @param start The start position of the payload /// @param end The end position of the payload /// @param payload The payload byte array to be scored /// @param offset The offset into the payload array /// @param length The length in the array /// @return An implementation dependent float to be used as a scoring factor virtual double scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SimilarityDelegator.h000066400000000000000000000026701456444476200245070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMILARITYDELEGATOR_H #define SIMILARITYDELEGATOR_H #include "Similarity.h" namespace Lucene { /// Delegating scoring implementation. Useful in {@link Query#getSimilarity(Searcher)} implementations, /// to override only certain methods of a Searcher's Similarity implementation. class LPPAPI SimilarityDelegator : public Similarity { public: SimilarityDelegator(const SimilarityPtr& delegee); virtual ~SimilarityDelegator(); LUCENE_CLASS(SimilarityDelegator); protected: SimilarityPtr delegee; public: virtual double computeNorm(const String& field, const FieldInvertStatePtr& state); virtual double lengthNorm(const String& fieldName, int32_t numTokens); virtual double queryNorm(double sumOfSquaredWeights); virtual double tf(double freq); virtual double sloppyFreq(int32_t distance); virtual double idf(int32_t docFreq, int32_t numDocs); virtual double coord(int32_t overlap, int32_t maxOverlap); virtual double scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SimpleAnalyzer.h000066400000000000000000000015301456444476200234630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMPLEANALYZER_H #define SIMPLEANALYZER_H #include "Analyzer.h" namespace Lucene { /// An {@link Analyzer} that filters {@link LetterTokenizer} with {@link LowerCaseFilter} class LPPAPI SimpleAnalyzer : public Analyzer { public: virtual ~SimpleAnalyzer(); LUCENE_CLASS(SimpleAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SimpleFSDirectory.h000066400000000000000000000027671456444476200241100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMPLEFSDIRECTORY_H #define SIMPLEFSDIRECTORY_H #include "FSDirectory.h" namespace Lucene { /// A straightforward implementation of {@link FSDirectory} using std::ofstream and std::ifstream. class LPPAPI SimpleFSDirectory : public FSDirectory { public: /// Create a new SimpleFSDirectory for the named location and {@link NativeFSLockFactory}. /// @param path the path of the directory. /// @param lockFactory the lock factory to use, or null for the default ({@link NativeFSLockFactory}) SimpleFSDirectory(const String& path, const LockFactoryPtr& lockFactory = LockFactoryPtr()); virtual ~SimpleFSDirectory(); LUCENE_CLASS(SimpleFSDirectory); public: /// Creates an IndexOutput for the file with the given name. virtual IndexOutputPtr createOutput(const String& name); /// Returns a stream reading an existing file, with the specified read buffer size. The particular Directory implementation may ignore the buffer size. virtual IndexInputPtr openInput(const String& name); /// Creates an IndexInput for the file with the given name. virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SimpleFSLockFactory.h000066400000000000000000000026431456444476200243550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMPLEFSLOCKFACTORY_H #define SIMPLEFSLOCKFACTORY_H #include "FSLockFactory.h" #include "Lock.h" namespace Lucene { /// Implements {@link LockFactory} using {@link File#createNewFile()}. /// @see LockFactory class LPPAPI SimpleFSLockFactory : public FSLockFactory { public: /// Create a SimpleFSLockFactory instance, with null (unset) lock directory. When you pass this factory /// to a {@link FSDirectory} subclass, the lock directory is automatically set to the directory itself. /// Be sure to create one instance for each directory your create! SimpleFSLockFactory(); /// Instantiate using the provided directory name. /// @param lockDir where lock files should be created. SimpleFSLockFactory(const String& lockDir); virtual ~SimpleFSLockFactory(); LUCENE_CLASS(SimpleFSLockFactory); public: /// Return a new Lock instance identified by lockName. virtual LockPtr makeLock(const String& lockName); /// Attempt to clear (forcefully unlock and remove) the specified lock. virtual void clearLock(const String& lockName); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SimpleLRUCache.h000066400000000000000000000044061456444476200232710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMPLELRUCACHE_H #define SIMPLELRUCACHE_H #include #include "LuceneObject.h" namespace Lucene { /// General purpose LRU cache map. /// Accessing an entry will keep the entry cached. {@link #get(const KEY&)} and /// {@link #put(const KEY&, const VALUE&)} results in an access to the corresponding entry. template class SimpleLRUCache : public LuceneObject { public: typedef std::pair key_value; typedef std::list< key_value > key_list; typedef typename key_list::const_iterator const_iterator; typedef boost::unordered_map map_type; typedef typename map_type::const_iterator map_iterator; SimpleLRUCache(int32_t cacheSize) { this->cacheSize = cacheSize; } virtual ~SimpleLRUCache() { } protected: int32_t cacheSize; key_list cacheList; map_type cacheMap; public: void put(const KEY& key, const VALUE& value) { cacheList.push_front(std::make_pair(key, value)); cacheMap[key] = cacheList.begin(); if ((int32_t)cacheList.size() > cacheSize) { cacheMap.erase(cacheList.back().first); cacheList.pop_back(); } } VALUE get(const KEY& key) { map_iterator find = cacheMap.find(key); if (find == cacheMap.end()) { return VALUE(); } VALUE value(find->second->second); cacheList.erase(find->second); cacheList.push_front(std::make_pair(key, value)); cacheMap[key] = cacheList.begin(); return value; } bool contains(const KEY& key) const { return (cacheMap.find(key) != cacheMap.end()); } int32_t size() const { return (int32_t)cacheList.size(); } const_iterator begin() const { return cacheList.begin(); } const_iterator end() const { return cacheList.end(); } }; }; #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SingleInstanceLockFactory.h000066400000000000000000000030301456444476200255700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SINGLEINSTANCELOCKFACTORY_H #define SINGLEINSTANCELOCKFACTORY_H #include "LockFactory.h" namespace Lucene { /// Implements {@link LockFactory} for a single in-process instance, meaning all /// locking will take place through this one instance. Only use this {@link LockFactory} /// when you are certain all IndexReaders and IndexWriters for a given index are running /// against a single shared in-process Directory instance. This is currently the /// default locking for RAMDirectory. /// @see LockFactory class LPPAPI SingleInstanceLockFactory : public LockFactory { public: SingleInstanceLockFactory(); virtual ~SingleInstanceLockFactory(); LUCENE_CLASS(SingleInstanceLockFactory); protected: HashSet locks; public: /// Return a new Lock instance identified by lockName. /// @param lockName name of the lock to be created. virtual LockPtr makeLock(const String& lockName); /// Attempt to clear (forcefully unlock and remove) the /// specified lock. Only call this at a time when you are /// certain this lock is no longer in use. /// @param lockName name of the lock to be cleared. virtual void clearLock(const String& lockName); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SingleTermEnum.h000066400000000000000000000021051456444476200234210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SINGLETERMENUM_H #define SINGLETERMENUM_H #include "FilteredTermEnum.h" namespace Lucene { /// Subclass of FilteredTermEnum for enumerating a single term. /// /// This can be used by {@link MultiTermQuery}s that need only visit one term, but want to preserve /// MultiTermQuery semantics such as {@link MultiTermQuery#rewriteMethod}. class LPPAPI SingleTermEnum : public FilteredTermEnum { public: SingleTermEnum(const IndexReaderPtr& reader, const TermPtr& singleTerm); virtual ~SingleTermEnum(); LUCENE_CLASS(SingleTermEnum); protected: TermPtr singleTerm; bool _endEnum; public: virtual double difference(); protected: virtual bool endEnum(); virtual bool termCompare(const TermPtr& term); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SloppyPhraseScorer.h000066400000000000000000000067261456444476200243470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SLOPPYPHRASESCORER_H #define SLOPPYPHRASESCORER_H #include "PhraseScorer.h" namespace Lucene { class SloppyPhraseScorer : public PhraseScorer { public: SloppyPhraseScorer(const WeightPtr& weight, Collection tps, Collection offsets, const SimilarityPtr& similarity, int32_t slop, ByteArray norms); virtual ~SloppyPhraseScorer(); LUCENE_CLASS(SloppyPhraseScorer); protected: int32_t slop; Collection repeats; Collection tmpPos; // for flipping repeating pps bool checkedRepeats; public: /// Score a candidate doc for all slop-valid position-combinations (matches) encountered while /// traversing/hopping the PhrasePositions. The score contribution of a match depends on the distance: /// - highest score for distance=0 (exact match). /// - score gets lower as distance gets higher. /// Example: for query "a b"~2, a document "x a b a y" can be scored twice: once for "a b" (distance=0), /// and once for "b a" (distance=2). /// Possibly not all valid combinations are encountered, because for efficiency we always propagate the /// least PhrasePosition. This allows to base on PriorityQueue and move forward faster. /// As result, for example, document "a b c b a" would score differently for queries "a b c"~4 and /// "c b a"~4, although they really are equivalent. Similarly, for doc "a b c b a f g", query "c b"~2 /// would get same score as "g f"~2, although "c b"~2 could be matched twice. We may want to fix this /// in the future (currently not, for performance reasons). virtual double phraseFreq(); protected: /// Flip pp2 and pp in the queue: pop until finding pp2, insert back all but pp2, insert pp back. /// Assumes: pp!=pp2, pp2 in pq, pp not in pq. Called only when there are repeating pps. PhrasePositions* flip(PhrasePositions* pp, PhrasePositions* pp2); /// Init PhrasePositions in place. /// There is a one time initialization for this scorer: /// - Put in repeats[] each pp that has another pp with same position in the doc. /// - Also mark each such pp by pp.repeats = true. /// Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient. /// In particular, this allows to score queries with no repetitions with no overhead due to this computation. /// - Example 1 - query with no repetitions: "ho my"~2 /// - Example 2 - query with repetitions: "ho my my"~2 /// - Example 3 - query with repetitions: "my ho my"~2 /// Init per doc with repeats in query, includes propagating some repeating pp's to avoid false phrase detection. /// @return end (max position), or -1 if any term ran out (ie. done) int32_t initPhrasePositions(); /// We disallow two pp's to have the same TermPosition, thereby verifying multiple occurrences in the query /// of the same word would go elsewhere in the matched doc. /// @return null if differ (i.e. valid) otherwise return the higher offset PhrasePositions out of the first /// two PPs found to not differ. PhrasePositions* termPositionsDiffer(PhrasePositions* pp); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SmallDouble.h000066400000000000000000000020031456444476200227230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SMALLDOUBLE_H #define SMALLDOUBLE_H #include "LuceneObject.h" namespace Lucene { /// Floating point numbers smaller than 32 bits. class SmallDouble : public LuceneObject { public: virtual ~SmallDouble(); LUCENE_CLASS(SmallDouble); public: /// Converts a floating point number to an 8 bit float. /// Values less than zero are all mapped to zero. /// Values are truncated (rounded down) to the nearest 8 bit value. /// Values between zero and the smallest representable value are rounded up. static uint8_t doubleToByte(double f); /// Converts an 8 bit floating point number to a double. static double byteToDouble(uint8_t b); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SnapshotDeletionPolicy.h000066400000000000000000000036171456444476200251770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SNAPSHOTDELETIONPOLICY_H #define SNAPSHOTDELETIONPOLICY_H #include "IndexDeletionPolicy.h" namespace Lucene { class LPPAPI SnapshotDeletionPolicy : public IndexDeletionPolicy { public: SnapshotDeletionPolicy(const IndexDeletionPolicyPtr& primary); virtual ~SnapshotDeletionPolicy(); LUCENE_CLASS(SnapshotDeletionPolicy); protected: IndexCommitPtr lastCommit; IndexDeletionPolicyPtr primary; String _snapshot; public: /// This is called once when a writer is first instantiated to give the policy a chance to remove old /// commit points. virtual void onInit(Collection commits); /// This is called each time the writer completed a commit. This gives the policy a chance to remove /// old commit points with each commit. virtual void onCommit(Collection commits); /// Take a snapshot of the most recent commit to the index. You must call release() to free this snapshot. /// Note that while the snapshot is held, the files it references will not be deleted, which will consume /// additional disk space in your index. If you take a snapshot at a particularly bad time (say just before /// you call optimize()) then in the worst case this could consume an extra 1X of your total index size, /// until you release the snapshot. virtual IndexCommitPtr snapshot(); /// Release the currently held snapshot. virtual void release(); protected: Collection wrapCommits(Collection commits); friend class MyCommitPoint; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Sort.h000066400000000000000000000114711456444476200214600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SORT_H #define SORT_H #include "LuceneObject.h" namespace Lucene { /// Encapsulates sort criteria for returned hits. /// /// The fields used to determine sort order must be carefully chosen. Documents must contain a single term /// in such a field, and the value of the term should indicate the document's relative position in a given /// sort order. The field must be indexed, but should not be tokenized, and does not need to be stored /// (unless you happen to want it back with the rest of your document data). In other words: /// ///
/// document->add(newLucene(L"byNumber", StringUtils::toString(x), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
/// 
/// /// Valid Types of Values /// /// There are four possible kinds of term values which may be put into sorting fields: Integers, Longs, Doubles, /// or Strings. Unless {@link SortField SortField} objects are specified, the type of value in the field is /// determined by parsing the first term in the field. /// /// Integer term values should contain only digits and an optional preceding negative sign. Values must be base /// 10 and in the range INT_MIN and INT_MAX inclusive. Documents which should appear first in the sort should /// have low value integers, later documents high values (ie. the documents should be numbered 1..n where 1 is /// the first and n the last). /// /// Long term values should contain only digits and an optional preceding negative sign. Values must be base 10 /// and in the range LLONG_MIN and LLONG_MAX inclusive. Documents which should appear first in the sort should /// have low value integers, later documents high values. /// /// Double term values should conform to values accepted by Double (except that NaN and Infinity are not /// supported). Documents which should appear first in the sort should have low values, later documents high /// values. /// /// String term values can contain any valid String, but should not be tokenized. The values are sorted according /// to their comparable natural order. Note that using this type of term value has higher memory requirements /// than the other two types. /// /// Object Reuse /// /// One of these objects can be used multiple times and the sort order changed between usages. /// This class is thread safe. /// /// Memory Usage /// /// Sorting uses of caches of term values maintained by the internal HitQueue(s). The cache is static and /// contains an integer or double array of length IndexReader::maxDoc() for each field name for which a sort is /// performed. In other words, the size of the cache in bytes is: /// ///
/// 4 * IndexReader::maxDoc() * (# of different fields actually used to sort)
/// 
/// /// For String fields, the cache is larger: in addition to the above array, the value of every term in the /// field is kept in memory. If there are many unique terms in the field, this could be quite large. /// /// Note that the size of the cache is not affected by how many fields are in the index and might be used to /// sort - only by the ones actually used to sort a result set. class LPPAPI Sort : public LuceneObject { public: /// Sorts by computed relevance. This is the same sort criteria as calling {@link /// Searcher#search(QueryPtr, int32_t) Searcher#search()} without a sort criteria, only with slightly more /// overhead. Sort(); /// Sorts by the criteria in the given SortField. Sort(const SortFieldPtr& field); /// Sorts in succession by the criteria in each SortField. Sort(Collection fields); virtual ~Sort(); LUCENE_CLASS(Sort); public: /// Internal representation of the sort criteria Collection fields; public: /// Represents sorting by computed relevance. Using this sort criteria returns the same results as calling /// {@link Searcher#search(QueryPtr, int32_t) Searcher#search()} without a sort criteria, only with slightly /// more overhead. static SortPtr RELEVANCE(); /// Represents sorting by index order. static SortPtr INDEXORDER(); /// Sets the sort to the given criteria. void setSort(const SortFieldPtr& field); /// Sets the sort to the given criteria in succession. void setSort(Collection fields); /// Representation of the sort criteria. /// @return Array of SortField objects used in this sort criteria Collection getSort(); virtual String toString(); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SortField.h000066400000000000000000000144021456444476200224210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SORTFIELD_H #define SORTFIELD_H #include "LuceneObject.h" namespace Lucene { /// Stores information about how to sort documents by terms in an individual field. Fields must be indexed /// in order to sort by them. class LPPAPI SortField : public LuceneObject { public: /// Creates a sort by terms in the given field with the type of term values explicitly given. /// @param field Name of field to sort by. Can be null if type is SCORE or DOC. /// @param type Type of values in the terms. /// @param reverse True if natural order should be reversed. SortField(const String& field, int32_t type, bool reverse = false); /// Creates a sort, possibly in reverse, by terms in the given field, parsed to numeric values using a /// custom {@link Parser}. /// @param field Name of field to sort by /// @param parser Instance of a {@link Parser}, which must subclass one of the existing numeric parsers from /// {@link FieldCache}. Sort type is inferred by testing which numeric parser the parser subclasses. /// @param reverse True if natural order should be reversed. SortField(const String& field, const ParserPtr& parser, bool reverse = false); /// Creates a sort, possibly in reverse, by terms in the given field sorted according to the given locale. /// @param field Name of field to sort by, cannot be null. /// @param locale Locale of values in the field. /// @param reverse True if natural order should be reversed. SortField(const String& field, const std::locale& locale, bool reverse = false); /// Creates a sort, possibly in reverse, with a custom comparison function. /// @param field Name of field to sort by; cannot be null. /// @param comparator Returns a comparator for sorting hits. /// @param reverse True if natural order should be reversed. SortField(const String& field, const FieldComparatorSourcePtr& comparator, bool reverse = false); virtual ~SortField(); LUCENE_CLASS(SortField); public: /// Sort by document score (relevancy). Sort values are Double and higher values are at the front. static const int32_t SCORE; /// Sort by document number (index order). Sort values are Integer and lower values are at the front. static const int32_t DOC; /// Sort using term values as Strings. Sort values are String and lower values are at the front. static const int32_t STRING; /// Sort using term values as Integers. Sort values are Integer and lower values are at the front. static const int32_t INT; /// Sort using term values as Floats. Sort values are Float and lower values are at the front. static const int32_t FLOAT; /// Sort using term values as Longs. Sort values are Long and lower values are at the front. static const int32_t LONG; /// Sort using term values as Doubles. Sort values are Double and lower values are at the front. static const int32_t DOUBLE; /// Sort using term values as Shorts. Sort values are Short and lower values are at the front. static const int32_t SHORT; /// Sort using a custom Comparator. Sort values are any ComparableValue and sorting is done according /// to natural order. static const int32_t CUSTOM; /// Sort using term values as Bytes. Sort values are Byte and lower values are at the front. static const int32_t BYTE; /// Sort using term values as Strings, but comparing by value (using String::compare) for all comparisons. /// This is typically slower than {@link #STRING}, which uses ordinals to do the sorting. static const int32_t STRING_VAL; INTERNAL: bool reverse; // defaults to natural order String field; int32_t type; // defaults to determining type dynamically localePtr locale; // defaults to "natural order" (no Locale) ParserPtr parser; private: /// Used for CUSTOM sort FieldComparatorSourcePtr comparatorSource; public: /// Represents sorting by document score (relevancy). static SortFieldPtr FIELD_SCORE(); /// Represents sorting by document number (index order). static SortFieldPtr FIELD_DOC(); /// Returns the name of the field. Could return null if the sort is by SCORE or DOC. /// @return Name of field, possibly null. String getField(); /// Returns the type of contents in the field. /// @return One of the constants SCORE, DOC, STRING, INT or DOUBLE. int32_t getType(); /// Returns the Locale by which term values are interpreted. localePtr getLocale(); /// Returns the instance of a {@link FieldCache} parser that fits to the given sort type. May return null /// if no parser was specified. Sorting is using the default parser then. /// @return An instance of a parser, or null. ParserPtr getParser(); /// Returns whether the sort should be reversed. /// @return True if natural order should be reversed. bool getReverse(); /// Returns the {@link FieldComparatorSource} used for custom sorting FieldComparatorSourcePtr getComparatorSource(); virtual String toString(); /// Returns true if other is equal to this. If a {@link FieldComparatorSource} or {@link Parser} was provided, /// it must properly implement equals (unless a singleton is always used). virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); /// Returns the {@link FieldComparator} to use for sorting. /// @param numHits number of top hits the queue will store /// @param sortPos position of this SortField within {@link Sort}. The comparator is primary if sortPos == 0, /// secondary if sortPos == 1, etc. Some comparators can optimize themselves when they are the primary sort. /// @return {@link FieldComparator} to use when sorting FieldComparatorPtr getComparator(int32_t numHits, int32_t sortPos); protected: /// Sets field and type, and ensures field is not NULL unless type is SCORE or DOC void initFieldType(const String& field, int32_t type); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SortedTermVectorMapper.h000066400000000000000000000043251456444476200251510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SORTEDTERMVECTORMAPPER_H #define SORTEDTERMVECTORMAPPER_H #include #include "TermVectorMapper.h" namespace Lucene { /// Store a sorted collection of {@link TermVectorEntry}s. Collects all term information into a single, /// sorted set. /// /// NOTE: This Mapper ignores all Field information for the Document. This means that if you are using offset/ /// positions you will not know what Fields they correlate with. /// /// This is not thread-safe class LPPAPI SortedTermVectorMapper : public TermVectorMapper { public: /// @param comparator A Comparator for sorting {@link TermVectorEntry}s SortedTermVectorMapper(TermVectorEntryComparator comparator); SortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator); virtual ~SortedTermVectorMapper(); LUCENE_CLASS(SortedTermVectorMapper); protected: Collection currentSet; MapStringTermVectorEntry termToTVE; bool storeOffsets; bool storePositions; TermVectorEntryComparator comparator; public: static const wchar_t* ALL; public: /// Map the Term Vector information into your own structure virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions); /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions); /// The TermVectorEntrySet. A SortedSet of {@link TermVectorEntry} objects. Sort is by the comparator passed /// into the constructor. /// /// This set will be empty until after the mapping process takes place. /// /// @return The sorted set of {@link TermVectorEntry}. Collection getTermVectorEntrySet(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SortedVIntList.h000066400000000000000000000060761456444476200234330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SORTEDVINTLIST_H #define SORTEDVINTLIST_H #include "DocIdSet.h" namespace Lucene { /// Stores and iterate on sorted integers in compressed form in RAM. /// /// The code for compressing the differences between ascending integers was borrowed from {@link IndexInput} /// and {@link IndexOutput}. /// /// NOTE: this class assumes the stored integers are doc Ids (hence why it extends {@link DocIdSet}). Therefore /// its {@link #iterator()} assumes {@link DocIdSetIterator#NO_MORE_DOCS} can be used as sentinel. If you /// intend to use this value, then make sure it's not used during search flow. class LPPAPI SortedVIntList : public DocIdSet { public: /// Create a SortedVIntList from all elements of an array of integers. /// @param sortedInts A sorted array of non negative integers. SortedVIntList(Collection sortedInts); /// Create a SortedVIntList from an array of integers. /// @param sortedInts A sorted array of non negative integers. /// @param inputSize The number of integers to be used from the array. SortedVIntList(Collection sortedInts, int32_t inputSize); /// Create a SortedVIntList from a BitSet. /// @param bits A bit set representing a set of integers. SortedVIntList(const BitSetPtr& bits); /// Create a SortedVIntList from an OpenBitSet. /// @param bits A bit set representing a set of integers. SortedVIntList(const OpenBitSetPtr& bits); /// Create a SortedVIntList. /// @param docIdSetIterator An iterator providing document numbers as a set of integers. /// This DocIdSetIterator is iterated completely when this constructor is called and it must provide the /// integers in non decreasing order. SortedVIntList(const DocIdSetIteratorPtr& docIdSetIterator); virtual ~SortedVIntList(); LUCENE_CLASS(SortedVIntList); public: /// When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set, a SortedVIntList representing the /// index numbers of the set bits will be smaller than that BitSet. static const int32_t BITS2VINTLIST_SIZE; protected: static const int32_t VB1; static const int32_t BIT_SHIFT; static const int32_t MAX_BYTES_PER_INT; int32_t _size; ByteArray bytes; int32_t lastBytePos; int32_t lastInt; public: /// @return The total number of sorted integers. int32_t size(); /// @return The size of the byte array storing the compressed sorted integers. int32_t getByteSize(); /// This DocIdSet implementation is cacheable. virtual bool isCacheable(); /// @return An iterator over the sorted integers. virtual DocIdSetIteratorPtr iterator(); protected: void initBytes(); void addInt(int32_t nextInt); friend class SortedDocIdSetIterator; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SpanFilter.h000066400000000000000000000027331456444476200226010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANFILTER_H #define SPANFILTER_H #include "Filter.h" namespace Lucene { /// Abstract base class providing a mechanism to restrict searches to a subset of an index and also maintains /// and returns position information. /// /// This is useful if you want to compare the positions from a SpanQuery with the positions of items in a filter. /// For instance, if you had a SpanFilter that marked all the occurrences of the word "foo" in documents, and /// then you entered a new SpanQuery containing bar, you could not only filter by the word foo, but you could /// then compare position information for post processing. class LPPAPI SpanFilter : public Filter { public: virtual ~SpanFilter(); LUCENE_CLASS(SpanFilter); public: /// Returns a SpanFilterResult with true for documents which should be permitted in search results, and /// false for those that should not and Spans for where the true docs match. /// @param reader The {@link IndexReader} to load position and DocIdSet information from /// @return A {@link SpanFilterResult} virtual SpanFilterResultPtr bitSpans(const IndexReaderPtr& reader) = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SpanFilterResult.h000066400000000000000000000037001456444476200237730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANFILTERRESULT_H #define SPANFILTERRESULT_H #include "LuceneObject.h" namespace Lucene { /// The results of a SpanQueryFilter. Wraps the BitSet and the position information from the SpanQuery class LPPAPI SpanFilterResult : public LuceneObject { public: /// @param docIdSet The DocIdSet for the Filter /// @param positions A List of {@link PositionInfo} objects SpanFilterResult(const DocIdSetPtr& docIdSet, Collection positions); virtual ~SpanFilterResult(); LUCENE_CLASS(SpanFilterResult); protected: DocIdSetPtr docIdSet; Collection positions; // Spans spans public: /// The first entry in the array corresponds to the first "on" bit. Entries are increasing by /// document order. /// @return A List of PositionInfo objects Collection getPositions(); /// Returns the docIdSet DocIdSetPtr getDocIdSet(); }; class LPPAPI PositionInfo : public LuceneObject { public: PositionInfo(int32_t doc); virtual ~PositionInfo(); LUCENE_CLASS(PositionInfo); protected: int32_t doc; Collection positions; public: void addPosition(int32_t start, int32_t end); int32_t getDoc(); Collection getPositions(); }; class LPPAPI StartEnd : public LuceneObject { public: StartEnd(int32_t start, int32_t end); virtual ~StartEnd(); LUCENE_CLASS(StartEnd); protected: int32_t start; int32_t end; public: /// @return The end position of this match int32_t getEnd(); /// @return The start position of this match int32_t getStart(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SpanFirstQuery.h000066400000000000000000000027641456444476200234750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANFIRSTQUERY_H #define SPANFIRSTQUERY_H #include "SpanQuery.h" #include "Spans.h" namespace Lucene { /// Matches spans near the beginning of a field. class LPPAPI SpanFirstQuery : public SpanQuery { public: /// Construct a SpanFirstQuery matching spans in match whose end position is less than or equal to end. SpanFirstQuery(const SpanQueryPtr& match, int32_t end); virtual ~SpanFirstQuery(); LUCENE_CLASS(SpanFirstQuery); protected: SpanQueryPtr match; int32_t end; public: using SpanQuery::toString; /// Return the SpanQuery whose matches are filtered. SpanQueryPtr getMatch(); /// Return the maximum end position permitted in a match. int32_t getEnd(); virtual String getField(); virtual String toString(const String& field); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual void extractTerms(SetTerm terms); virtual SpansPtr getSpans(const IndexReaderPtr& reader); virtual QueryPtr rewrite(const IndexReaderPtr& reader); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); friend class FirstSpans; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SpanNearQuery.h000066400000000000000000000036661456444476200232750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANNEARQUERY_H #define SPANNEARQUERY_H #include "SpanQuery.h" namespace Lucene { /// Matches spans which are near one another. One can specify slop, the maximum number of intervening /// unmatched positions, as well as whether matches are required to be in-order. class LPPAPI SpanNearQuery : public SpanQuery { public: /// Construct a SpanNearQuery. Matches spans matching a span from each clause, with up to slop total /// unmatched positions between them. * When inOrder is true, the spans from each clause must be /// ordered as in clauses. SpanNearQuery(Collection clauses, int32_t slop, bool inOrder, bool collectPayloads = true); virtual ~SpanNearQuery(); LUCENE_CLASS(SpanNearQuery); protected: Collection clauses; int32_t slop; bool inOrder; String field; bool collectPayloads; public: using SpanQuery::toString; /// Return the clauses whose spans are matched. Collection getClauses(); /// Return the maximum number of intervening unmatched positions permitted. int32_t getSlop(); /// Return true if matches are required to be in-order. bool isInOrder(); virtual String getField(); virtual void extractTerms(SetTerm terms); virtual String toString(const String& field); virtual SpansPtr getSpans(const IndexReaderPtr& reader); virtual QueryPtr rewrite(const IndexReaderPtr& reader); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SpanNotQuery.h000066400000000000000000000027661456444476200231500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANNOTQUERY_H #define SPANNOTQUERY_H #include "SpanQuery.h" namespace Lucene { /// Removes matches which overlap with another SpanQuery. class LPPAPI SpanNotQuery : public SpanQuery { public: /// Construct a SpanNotQuery matching spans from include which have no overlap with spans from exclude. SpanNotQuery(const SpanQueryPtr& include, const SpanQueryPtr& exclude); virtual ~SpanNotQuery(); LUCENE_CLASS(SpanNotQuery); protected: SpanQueryPtr include; SpanQueryPtr exclude; public: using SpanQuery::toString; /// Return the SpanQuery whose matches are filtered. SpanQueryPtr getInclude(); /// Return the SpanQuery whose matches must not overlap those returned. SpanQueryPtr getExclude(); virtual String getField(); virtual void extractTerms(SetTerm terms); virtual String toString(const String& field); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual SpansPtr getSpans(const IndexReaderPtr& reader); virtual QueryPtr rewrite(const IndexReaderPtr& reader); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SpanOrQuery.h000066400000000000000000000025171456444476200227620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANORQUERY_H #define SPANORQUERY_H #include "SpanQuery.h" namespace Lucene { /// Matches the union of its clauses. class LPPAPI SpanOrQuery : public SpanQuery { public: /// Construct a SpanOrQuery merging the provided clauses. SpanOrQuery(Collection clauses); virtual ~SpanOrQuery(); LUCENE_CLASS(SpanOrQuery); protected: Collection clauses; String field; public: using SpanQuery::toString; /// Return the clauses whose spans are matched. Collection getClauses(); virtual String getField(); virtual void extractTerms(SetTerm terms); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual QueryPtr rewrite(const IndexReaderPtr& reader); virtual String toString(const String& field); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual SpansPtr getSpans(const IndexReaderPtr& reader); friend class OrSpans; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SpanQuery.h000066400000000000000000000016131456444476200224550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANQUERY_H #define SPANQUERY_H #include "Query.h" namespace Lucene { /// Base class for span-based queries. class LPPAPI SpanQuery : public Query { public: virtual ~SpanQuery(); LUCENE_CLASS(SpanQuery); public: /// Returns the matches for this query in an index. Used internally to search for spans. virtual SpansPtr getSpans(const IndexReaderPtr& reader) = 0; /// Returns the name of the field matched by this query. virtual String getField() = 0; virtual WeightPtr createWeight(const SearcherPtr& searcher); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SpanQueryFilter.h000066400000000000000000000031201456444476200236160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANQUERYFILTER_H #define SPANQUERYFILTER_H #include "SpanFilter.h" namespace Lucene { /// Constrains search results to only match those which also match a provided query. Also provides position /// information about where each document matches at the cost of extra space compared with the /// QueryWrapperFilter. There is an added cost to this above what is stored in a {@link QueryWrapperFilter}. /// Namely, the position information for each matching document is stored. /// /// This filter does not cache. See the {@link CachingSpanFilter} for a wrapper that caches. class LPPAPI SpanQueryFilter : public SpanFilter { public: /// Constructs a filter which only matches documents matching query. /// @param query The {@link SpanQuery} to use as the basis for the Filter. SpanQueryFilter(const SpanQueryPtr& query = SpanQueryPtr()); virtual ~SpanQueryFilter(); LUCENE_CLASS(SpanQueryFilter); protected: SpanQueryPtr query; public: virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader); virtual SpanFilterResultPtr bitSpans(const IndexReaderPtr& reader); SpanQueryPtr getQuery(); virtual String toString(); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SpanScorer.h000066400000000000000000000024701456444476200226070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANSCORER_H #define SPANSCORER_H #include "Scorer.h" namespace Lucene { /// Public for extension only. class LPPAPI SpanScorer : public Scorer { public: SpanScorer(const SpansPtr& spans, const WeightPtr& weight, const SimilarityPtr& similarity, ByteArray norms); virtual ~SpanScorer(); LUCENE_CLASS(SpanScorer); protected: SpansPtr spans; WeightPtr weight; ByteArray norms; double value; bool more; int32_t doc; double freq; public: virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); virtual int32_t docID(); virtual double score(); virtual float termFreq(){ return freq; } protected: virtual bool setFreqCurrentDoc(); /// This method is no longer an official member of {@link Scorer}, but it is needed by SpanWeight /// to build an explanation. virtual ExplanationPtr explain(int32_t doc); friend class SpanWeight; friend class PayloadNearSpanWeight; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SpanTermQuery.h000066400000000000000000000022761456444476200233130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANTERMQUERY_H #define SPANTERMQUERY_H #include "SpanQuery.h" namespace Lucene { /// Matches spans containing a term. class LPPAPI SpanTermQuery : public SpanQuery { public: /// Construct a SpanTermQuery matching the named term's spans. SpanTermQuery(const TermPtr& term); virtual ~SpanTermQuery(); LUCENE_CLASS(SpanTermQuery); protected: TermPtr term; public: using SpanQuery::toString; /// Return the term whose spans are matched. TermPtr getTerm(); virtual String getField(); virtual void extractTerms(SetTerm terms); virtual String toString(const String& field); virtual int32_t hashCode(); virtual bool equals(const LuceneObjectPtr& other); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual SpansPtr getSpans(const IndexReaderPtr& reader); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/SpanWeight.h000066400000000000000000000023771456444476200226070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANWEIGHT_H #define SPANWEIGHT_H #include "Weight.h" namespace Lucene { /// Public for use by other weight implementations class LPPAPI SpanWeight : public Weight { public: SpanWeight(const SpanQueryPtr& query, const SearcherPtr& searcher); virtual ~SpanWeight(); LUCENE_CLASS(SpanWeight); protected: SimilarityPtr similarity; double value; double idf; double queryNorm; double queryWeight; SetTerm terms; SpanQueryPtr query; IDFExplanationPtr idfExp; public: virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); friend class PayloadNearSpanScorer; friend class PayloadTermSpanScorer; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Spans.h000066400000000000000000000057101456444476200216140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANS_H #define SPANS_H #include "LuceneObject.h" namespace Lucene { /// An enumeration of span matches. Used to implement span searching. Each span represents a range of term /// positions within a document. Matches are enumerated in order, by increasing document number, within that /// by increasing start position and finally by increasing end position. class LPPAPI Spans : public LuceneObject { public: virtual ~Spans(); LUCENE_CLASS(Spans); public: /// Move to the next match, returning true if any such exists. virtual bool next() = 0; /// Skips to the first match beyond the current, whose document number is greater than or equal to target. /// /// Returns true if there is such a match. /// /// Behaves as if written: ///
    /// bool skipTo(int32_t target)
    /// {
    ///     do
    ///     {
    ///         if (!next())
    ///             return false;
    ///     }
    ///     while (target > doc());
    ///     return true;
    /// }
    /// 
/// Most implementations are considerably more efficient than that. virtual bool skipTo(int32_t target) = 0; /// Returns the document number of the current match. Initially invalid. virtual int32_t doc() = 0; /// Returns the start position of the current match. Initially invalid. virtual int32_t start() = 0; /// Returns the end position of the current match. Initially invalid. virtual int32_t end() = 0; /// Returns the payload data for the current span. This is invalid until {@link #next()} is called for the /// first time. This method must not be called more than once after each call of {@link #next()}. However, /// most payloads are loaded lazily, so if the payload data for the current position is not needed, this /// method may not be called at all for performance reasons. An ordered SpanQuery does not lazy load, so /// if you have payloads in your index and you do not want ordered SpanNearQuerys to collect payloads, you /// can disable collection with a constructor option. /// /// Note that the return type is a collection, thus the ordering should not be relied upon. /// /// @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable /// is false virtual Collection getPayload() = 0; /// Checks if a payload can be loaded at this position. /// /// Payloads can only be loaded once per call to {@link #next()}. /// /// @return true if there is a payload available at this position that can be loaded virtual bool isPayloadAvailable() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/StandardAnalyzer.h000066400000000000000000000063141456444476200237770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STANDARDANALYZER_H #define STANDARDANALYZER_H #include "Analyzer.h" namespace Lucene { /// Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link LowerCaseFilter} and {@link StopFilter}, using /// a list of English stop words. /// /// You must specify the required {@link Version} compatibility when creating StandardAnalyzer: /// ///
    ///
  • As of 2.9, StopFilter preserves position increments ///
  • As of 2.4, Tokens incorrectly identified as acronyms are corrected ///
class LPPAPI StandardAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}). /// @param matchVersion Lucene version to match. StandardAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. /// @param matchVersion Lucene version to match. /// @param stopWords stop words StandardAnalyzer(LuceneVersion::Version matchVersion, HashSet stopWords); /// Builds an analyzer with the stop words from the given file. /// @see WordlistLoader#getWordSet(const String&, const String&) /// @param matchVersion Lucene version to match. /// @param stopwords File to read stop words from. StandardAnalyzer(LuceneVersion::Version matchVersion, const String& stopwords); /// Builds an analyzer with the stop words from the given reader. /// @see WordlistLoader#getWordSet(ReaderPtr, const String&) /// @param matchVersion Lucene version to match. /// @param stopwords Reader to read stop words from. StandardAnalyzer(LuceneVersion::Version matchVersion, const ReaderPtr& stopwords); virtual ~StandardAnalyzer(); LUCENE_CLASS(StandardAnalyzer); public: /// Default maximum allowed token length static const int32_t DEFAULT_MAX_TOKEN_LENGTH; protected: HashSet stopSet; /// Specifies whether deprecated acronyms should be replaced with HOST type. bool replaceInvalidAcronym; bool enableStopPositionIncrements; LuceneVersion::Version matchVersion; int32_t maxTokenLength; protected: /// Construct an analyzer with the given stop words. void ConstructAnalyser(LuceneVersion::Version matchVersion, HashSet stopWords); public: /// Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter} /// and a {@link StopFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); /// Set maximum allowed token length. If a token is seen that exceeds this length then it is discarded. This setting /// only takes effect the next time tokenStream or reusableTokenStream is called. void setMaxTokenLength(int32_t length); /// @see #setMaxTokenLength int32_t getMaxTokenLength(); virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/StandardFilter.h000066400000000000000000000021051456444476200234310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STANDARDFILTER_H #define STANDARDFILTER_H #include "TokenFilter.h" namespace Lucene { /// Normalizes tokens extracted with {@link StandardTokenizer}. class LPPAPI StandardFilter : public TokenFilter { public: /// Construct filtering input. StandardFilter(const TokenStreamPtr& input); virtual ~StandardFilter(); LUCENE_CLASS(StandardFilter); protected: TypeAttributePtr typeAtt; TermAttributePtr termAtt; protected: static const String& APOSTROPHE_TYPE(); static const String& ACRONYM_TYPE(); public: /// Returns the next token in the stream, or null at EOS. /// /// Removes 's from the end of words. /// Removes dots from acronyms. virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/StandardTokenizer.h000066400000000000000000000075711456444476200241720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STANDARDTOKENIZER_H #define STANDARDTOKENIZER_H #include "Tokenizer.h" namespace Lucene { /// A grammar-based tokenizer /// /// This should be a good tokenizer for most European-language documents: /// ///
    ///
  • Splits words at punctuation characters, removing punctuation. However, a dot that's not followed by /// whitespace is considered part of a token. ///
  • Splits words at hyphens, unless there's a number in the token, in which case the whole token is interpreted /// as a product number and is not split. ///
  • Recognizes email addresses and internet hostnames as one token. ///
/// /// Many applications have specific tokenizer needs. If this tokenizer does not suit your application, please consider /// copying this source code directory to your project and maintaining your own grammar-based tokenizer. /// /// You must specify the required {@link Version} compatibility when creating StandardAnalyzer: /// ///
    ///
  • As of 2.4, Tokens incorrectly identified as acronyms are corrected ///
class LPPAPI StandardTokenizer : public Tokenizer { public: /// Creates a new instance of the {@link StandardTokenizer}. Attaches the input to the newly created scanner. /// @param input The input reader StandardTokenizer(LuceneVersion::Version matchVersion, const ReaderPtr& input); /// Creates a new StandardTokenizer with a given {@link AttributeSource}. StandardTokenizer(LuceneVersion::Version matchVersion, const AttributeSourcePtr& source, const ReaderPtr& input); /// Creates a new StandardTokenizer with a given {@link AttributeSource.AttributeFactory} StandardTokenizer(LuceneVersion::Version matchVersion, const AttributeFactoryPtr& factory, const ReaderPtr& input); virtual ~StandardTokenizer(); LUCENE_CLASS(StandardTokenizer); protected: /// A private instance of the scanner StandardTokenizerImplPtr scanner; bool replaceInvalidAcronym; int32_t maxTokenLength; // this tokenizer generates three attributes: offset, positionIncrement and type TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; PositionIncrementAttributePtr posIncrAtt; TypeAttributePtr typeAtt; public: static const int32_t ALPHANUM; static const int32_t APOSTROPHE; static const int32_t ACRONYM; static const int32_t COMPANY; static const int32_t EMAIL; static const int32_t HOST; static const int32_t NUM; static const int32_t CJ; /// @deprecated this solves a bug where HOSTs that end with '.' are identified as ACRONYMs. static const int32_t ACRONYM_DEP; /// String token types that correspond to token type int constants static const Collection TOKEN_TYPES(); protected: void init(const ReaderPtr& input, LuceneVersion::Version matchVersion); public: /// Set the max allowed token length. Any token longer than this is skipped. void setMaxTokenLength(int32_t length); /// @see #setMaxTokenLength int32_t getMaxTokenLength(); /// @see TokenStream#next() virtual bool incrementToken(); virtual void end(); virtual void reset(const ReaderPtr& input); /// @return true if StandardTokenizer now returns these tokens as Hosts, otherwise false /// @deprecated Remove in 3.X and make true the only valid value bool isReplaceInvalidAcronym(); /// @param replaceInvalidAcronym Set to true to replace mischaracterized acronyms as HOST. /// @deprecated Remove in 3.X and make true the only valid value void setReplaceInvalidAcronym(bool replaceInvalidAcronym); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/StandardTokenizerImpl.h000066400000000000000000000150111456444476200250000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STANDARDTOKENIZERIMPL_H #define STANDARDTOKENIZERIMPL_H #include "LuceneObject.h" namespace Lucene { class StandardTokenizerImpl : public LuceneObject { public: /// Creates a new scanner /// @param in the Reader to read input from. StandardTokenizerImpl(const ReaderPtr& in); virtual ~StandardTokenizerImpl(); LUCENE_CLASS(StandardTokenizerImpl); protected: /// Initial size of the lookahead buffer static const int32_t ZZ_BUFFERSIZE; /// Translates characters to character classes static CharArray _ZZ_CMAP; static const wchar_t ZZ_CMAP_PACKED[]; static const int32_t ZZ_CMAP_LENGTH; static const int32_t ZZ_CMAP_PACKED_LENGTH; /// Translates characters to character classes static void ZZ_CMAP_INIT(); static const wchar_t* ZZ_CMAP(); /// Translates DFA states to action switch labels. static IntArray _ZZ_ACTION; static const wchar_t ZZ_ACTION_PACKED_0[]; static const int32_t ZZ_ACTION_LENGTH; static const int32_t ZZ_ACTION_PACKED_LENGTH; /// Translates DFA states to action switch labels. static void ZZ_ACTION_INIT(); static const int32_t* ZZ_ACTION(); /// Translates a state to a row index in the transition table static IntArray _ZZ_ROWMAP; static const wchar_t ZZ_ROWMAP_PACKED_0[]; static const int32_t ZZ_ROWMAP_LENGTH; static const int32_t ZZ_ROWMAP_PACKED_LENGTH; /// Translates a state to a row index in the transition table static void ZZ_ROWMAP_INIT(); static const int32_t* ZZ_ROWMAP(); /// The transition table of the DFA static IntArray _ZZ_TRANS; static const wchar_t ZZ_TRANS_PACKED_0[]; static const int32_t ZZ_TRANS_LENGTH; static const int32_t ZZ_TRANS_PACKED_LENGTH; /// The transition table of the DFA static void ZZ_TRANS_INIT(); static const int32_t* ZZ_TRANS(); // error codes static const int32_t ZZ_UNKNOWN_ERROR; static const int32_t ZZ_NO_MATCH; static const int32_t ZZ_PUSHBACK_2BIG; static const wchar_t* ZZ_ERROR_MSG[]; /// ZZ_ATTRIBUTE[aState] contains the attributes of state aState static IntArray _ZZ_ATTRIBUTE; static const wchar_t ZZ_ATTRIBUTE_PACKED_0[]; static const int32_t ZZ_ATTRIBUTE_LENGTH; static const int32_t ZZ_ATTRIBUTE_PACKED_LENGTH; /// ZZ_ATTRIBUTE[aState] contains the attributes of state aState static void ZZ_ATTRIBUTE_INIT(); static const int32_t* ZZ_ATTRIBUTE(); /// The input device ReaderPtr zzReader; /// The current state of the DFA int32_t zzState; /// The current lexical state int32_t zzLexicalState; /// This buffer contains the current text to be matched and is the source of the yytext() string CharArray zzBuffer; /// The text position at the last accepting state int32_t zzMarkedPos; /// The text position at the last state to be included in yytext int32_t zzPushbackPos; /// The current text position in the buffer int32_t zzCurrentPos; /// StartRead marks the beginning of the yytext() string in the buffer int32_t zzStartRead; /// EndRead marks the last character in the buffer, that has been read from input int32_t zzEndRead; /// Number of newlines encountered up to the start of the matched text int32_t yyline; /// The number of characters up to the start of the matched text int32_t _yychar; /// The number of characters from the last newline up to the start of the matched text int32_t yycolumn; /// zzAtBOL == true if the scanner is currently at the beginning of a line bool zzAtBOL; /// zzAtEOF == true if the scanner is at the EOF bool zzAtEOF; public: /// This character denotes the end of file static const int32_t YYEOF; /// Lexical states static const int32_t YYINITIAL; public: int32_t yychar(); /// Resets the Tokenizer to a new Reader. void reset(const ReaderPtr& r); /// Fills Lucene token with the current token text. void getText(const TokenPtr& t); /// Fills TermAttribute with the current token text. void getText(const TermAttributePtr& t); /// Closes the input stream. void yyclose(); /// Resets the scanner to read from a new input stream. Does not close the old reader. /// /// All internal variables are reset, the old input stream cannot be reused (internal buffer is discarded and lost). /// Lexical state is set to ZZ_INITIAL. /// /// @param reader the new input stream. void yyreset(const ReaderPtr& reader); /// Returns the current lexical state. int32_t yystate(); /// Enters a new lexical state /// @param newState the new lexical state. void yybegin(int32_t newState); /// Returns the text matched by the current regular expression. String yytext(); /// Returns the character at position pos from the matched text. /// /// It is equivalent to yytext()[pos], but faster /// @param pos the position of the character to fetch. A value from 0 to yylength() - 1. /// @return the character at position pos. wchar_t yycharat(int32_t pos); /// Returns the length of the matched text region. int32_t yylength(); /// Pushes the specified amount of characters back into the input stream. /// /// They will be read again by then next call of the scanning method /// @param number the number of characters to be read again. This number must not be greater than yylength() void yypushback(int32_t number); /// Resumes scanning until the next regular expression is matched, the end of input is encountered or an I/O- /// Error occurs. int32_t getNextToken(); protected: /// Refills the input buffer. bool zzRefill(); /// Reports an error that occurred while scanning. /// /// In a well-formed scanner (no or only correct usage of yypushback(int32_t) and a match-all fallback rule) /// this method will only be called with things that "Can't Possibly Happen". If this method is called, /// something is seriously wrong. /// /// Usual syntax/scanner level error handling should be done in error fallback rules. /// /// @param errorCode The code of the errormessage to display. void zzScanError(int32_t errorCode); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/StopAnalyzer.h000066400000000000000000000035361456444476200231670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STOPANALYZER_H #define STOPANALYZER_H #include "Analyzer.h" namespace Lucene { /// Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}. /// /// You must specify the required {@link Version} compatibility when creating StopAnalyzer: As of 2.9, position /// increments are preserved class LPPAPI StopAnalyzer : public Analyzer { public: /// Builds an analyzer which removes words in {@link #ENGLISH_STOP_WORDS_SET}. StopAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the stop words from the given set. StopAnalyzer(LuceneVersion::Version matchVersion, HashSet stopWords); /// Builds an analyzer with the stop words from the given file. StopAnalyzer(LuceneVersion::Version matchVersion, const String& stopwordsFile); /// Builds an analyzer with the stop words from the given reader. StopAnalyzer(LuceneVersion::Version matchVersion, const ReaderPtr& stopwords); virtual ~StopAnalyzer(); LUCENE_CLASS(StopAnalyzer); protected: HashSet stopWords; bool enablePositionIncrements; static const wchar_t* _ENGLISH_STOP_WORDS_SET[]; public: /// An unmodifiable set containing some common English words that are usually not useful for searching. static const HashSet ENGLISH_STOP_WORDS_SET(); virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/StopFilter.h000066400000000000000000000060651456444476200226270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STOPFILTER_H #define STOPFILTER_H #include "TokenFilter.h" namespace Lucene { /// Removes stop words from a token stream. class LPPAPI StopFilter : public TokenFilter { public: /// Construct a token stream filtering the given input. If stopWords is an instance of {@link CharArraySet} /// (true if makeStopSet() was used to construct the set) it will be directly used and ignoreCase will be /// ignored since CharArraySet directly controls case sensitivity. /// /// If stopWords is not an instance of {@link CharArraySet}, a new CharArraySet will be constructed and /// ignoreCase will be used to specify the case sensitivity of that set. /// /// @param enablePositionIncrements true if token positions should record the removed stop words /// @param input Input TokenStream /// @param stopWords A Set of Strings or char[] or any other toString()-able set representing the stopwords /// @param ignoreCase if true, all words are lower cased first StopFilter(bool enablePositionIncrements, const TokenStreamPtr& input, HashSet stopWords, bool ignoreCase = false); StopFilter(bool enablePositionIncrements, const TokenStreamPtr& input, const CharArraySetPtr& stopWords, bool ignoreCase = false); virtual ~StopFilter(); LUCENE_CLASS(StopFilter); protected: CharArraySetPtr stopWords; bool enablePositionIncrements; TermAttributePtr termAtt; PositionIncrementAttributePtr posIncrAtt; public: /// Builds a Set from an array of stop words, appropriate for passing into the StopFilter constructor. static HashSet makeStopSet(Collection stopWords); /// Returns the next input Token whose term() is not a stop word. virtual bool incrementToken(); /// Returns version-dependent default for enablePositionIncrements. Analyzers that embed StopFilter use this /// method when creating the StopFilter. Prior to 2.9, this returns false. On 2.9 or later, it returns true. static bool getEnablePositionIncrementsVersionDefault(LuceneVersion::Version matchVersion); /// @see #setEnablePositionIncrements(bool). bool getEnablePositionIncrements(); /// If true, this StopFilter will preserve positions of the incoming tokens (ie, accumulate and set position /// increments of the removed stop tokens). Generally, true is best as it does not lose information (positions /// of the original tokens) during indexing. /// /// When set, when a token is stopped (omitted), the position increment of the following token is incremented. /// /// NOTE: be sure to also set {@link QueryParser#setEnablePositionIncrements} if you use QueryParser to create queries. void setEnablePositionIncrements(bool enable); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/StoredFieldsWriter.h000066400000000000000000000037061456444476200243170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STOREDFIELDSWRITER_H #define STOREDFIELDSWRITER_H #include "DocumentsWriter.h" namespace Lucene { /// This is a DocFieldConsumer that writes stored fields. class StoredFieldsWriter : public LuceneObject { public: StoredFieldsWriter(const DocumentsWriterPtr& docWriter, const FieldInfosPtr& fieldInfos); virtual ~StoredFieldsWriter(); LUCENE_CLASS(StoredFieldsWriter); public: FieldsWriterPtr fieldsWriter; DocumentsWriterWeakPtr _docWriter; FieldInfosPtr fieldInfos; int32_t lastDocID; Collection docFreeList; int32_t freeCount; int32_t allocCount; public: StoredFieldsWriterPerThreadPtr addThread(const DocStatePtr& docState); void flush(const SegmentWriteStatePtr& state); void closeDocStore(const SegmentWriteStatePtr& state); StoredFieldsWriterPerDocPtr getPerDoc(); void abort(); /// Fills in any hole in the docIDs void fill(int32_t docID); void finishDocument(const StoredFieldsWriterPerDocPtr& perDoc); bool freeRAM(); void free(const StoredFieldsWriterPerDocPtr& perDoc); protected: void initFieldsWriter(); }; class StoredFieldsWriterPerDoc : public DocWriter { public: StoredFieldsWriterPerDoc(const StoredFieldsWriterPtr& fieldsWriter); virtual ~StoredFieldsWriterPerDoc(); LUCENE_CLASS(StoredFieldsWriterPerDoc); protected: StoredFieldsWriterWeakPtr _fieldsWriter; public: PerDocBufferPtr buffer; RAMOutputStreamPtr fdt; int32_t numStoredFields; public: void reset(); virtual void abort(); virtual int64_t sizeInBytes(); virtual void finish(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/StoredFieldsWriterPerThread.h000066400000000000000000000020621456444476200261100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STOREDFIELDSWRITERPERTHREAD_H #define STOREDFIELDSWRITERPERTHREAD_H #include "LuceneObject.h" namespace Lucene { class StoredFieldsWriterPerThread : public LuceneObject { public: StoredFieldsWriterPerThread(const DocStatePtr& docState, const StoredFieldsWriterPtr& storedFieldsWriter); virtual ~StoredFieldsWriterPerThread(); LUCENE_CLASS(StoredFieldsWriterPerThread); public: FieldsWriterPtr localFieldsWriter; StoredFieldsWriterWeakPtr _storedFieldsWriter; DocStatePtr docState; StoredFieldsWriterPerDocPtr doc; public: void startDocument(); void addField(const FieldablePtr& field, const FieldInfoPtr& fieldInfo); DocWriterPtr finishDocument(); void abort(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/StringReader.h000066400000000000000000000023141456444476200231160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STRINGREADER_H #define STRINGREADER_H #include "Reader.h" namespace Lucene { /// Convenience class for reading strings. class LPPAPI StringReader : public Reader { public: /// Creates a new StringReader, given the String to read from. StringReader(const String& str); virtual ~StringReader(); LUCENE_CLASS(StringReader); protected: String str; int32_t position; public: /// Read a single character. virtual int32_t read(); /// Read characters into a portion of an array. virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); /// Close the stream. virtual void close(); /// Tell whether this stream supports the mark() operation virtual bool markSupported(); /// Reset the stream. virtual void reset(); /// The number of bytes in the stream. virtual int64_t length(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/StringUtils.h000066400000000000000000000061621456444476200230210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef STRINGUTILS_H #define STRINGUTILS_H #include "Lucene.h" namespace Lucene { class LPPAPI StringUtils { public: /// Maximum length of UTF encoding. static const int32_t MAX_ENCODING_UTF8_SIZE; /// Default character radix. static const int32_t CHARACTER_MAX_RADIX; public: /// Convert uft8 buffer into unicode. static int32_t toUnicode(const uint8_t* utf8, int32_t length, CharArray unicode); /// Convert uft8 buffer into unicode. static int32_t toUnicode(const uint8_t* utf8, int32_t length, const UnicodeResultPtr& unicodeResult); /// Convert uft8 buffer into unicode. static String toUnicode(const uint8_t* utf8, int32_t length); /// Convert uft8 string into unicode. static String toUnicode(const SingleString& s); /// Convert unicode buffer into uft8. static int32_t toUTF8(const wchar_t* unicode, int32_t length, ByteArray utf8); /// Convert unicode buffer into uft8. static int32_t toUTF8(const wchar_t* unicode, int32_t length, const UTF8ResultPtr& utf8Result); /// Convert unicode buffer into uft8. static SingleString toUTF8(const wchar_t* unicode, int32_t length); /// Convert unicode string into uft8. static SingleString toUTF8(const String& s); /// Convert given string to lower case using current locale static void toLower(String& str); /// Convert given string to lower case using current locale static String toLower(const String& str); /// Convert given string to upper case using current locale static void toUpper(String& str); /// Convert given string to upper case using current locale static String toUpper(const String& str); /// Compare two strings ignoring case differences static int32_t compareCase(const String& first, const String& second); /// Splits string using given delimiters static Collection split(const String& str, const String& delim); /// Convert the given string to int32_t. static int32_t toInt(const String& value); /// Convert the given string to int64_t. static int64_t toLong(const String& value); /// Return given value as a long integer using base unit. static int64_t toLong(const String& value, int32_t base); /// Convert the given string to double. static double toDouble(const String& value); /// Compute the hash code from string. static int32_t hashCode(const String& value); /// Return given value as a string using base unit. static String toString(int64_t value, int32_t base); /// Convert any given type to a {@link String}. template static String toString(const TYPE& value) { StringStream os; os << value; return os.str(); } }; #define UTF8_TO_STRING(utf8) StringUtils::toUnicode(utf8, SIZEOF_ARRAY(utf8)) } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Synchronize.h000066400000000000000000000037761456444476200230550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SYNCHRONIZE_H #define SYNCHRONIZE_H #include #include #include #include "Lucene.h" namespace Lucene { /// Utility class to support locking via a mutex. class LPPAPI Synchronize { public: Synchronize(); virtual ~Synchronize(); protected: boost::recursive_timed_mutex mutexSynchronize; int64_t lockThread; int32_t recursionCount; public: /// create a new Synchronize instance atomically. static void createSync(SynchronizePtr& sync); /// Lock mutex using an optional timeout. void lock(int32_t timeout = 0); /// Unlock mutex. void unlock(); /// Unlock all recursive mutex. int32_t unlockAll(); /// Returns true if mutex is currently locked by current thread. bool holdsLock(); }; /// Utility class to support scope locking. class LPPAPI SyncLock { public: SyncLock(const SynchronizePtr& sync, int32_t timeout = 0); template SyncLock(OBJECT object, int32_t timeout = 0) { this->sync = object->getSync(); lock(timeout); } virtual ~SyncLock(); protected: SynchronizePtr sync; protected: void lock(int32_t timeout); }; #define LUCENE_RUN_ONCE(Command) \ do { \ static std::atomic RUN_ONCE_hasRun = {}; \ if (!RUN_ONCE_hasRun) { \ static boost::mutex RUN_ONCE_mutex; \ boost::mutex::scoped_lock RUN_ONCE_lock(RUN_ONCE_mutex); \ if (!RUN_ONCE_hasRun) { \ Command; \ RUN_ONCE_hasRun = true; \ } \ } \ } while(0) } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TeeSinkTokenFilter.h000066400000000000000000000131201456444476200242330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TEESINKTOKENFILTER_H #define TEESINKTOKENFILTER_H #include "TokenFilter.h" #include "TokenStream.h" namespace Lucene { /// This TokenFilter provides the ability to set aside attribute states that have already been analyzed. This is /// useful in situations where multiple fields share many common analysis steps and then go their separate ways. /// /// It is also useful for doing things like entity extraction or proper noun analysis as part of the analysis workflow /// and saving off those tokens for use in another field. /// ///
/// TeeSinkTokenFilterPtr source1 = newLucene(newLucene(reader1));
/// SinkTokenStreamPtr sink1 = source1->newSinkTokenStream();
/// SinkTokenStreamPtr sink2 = source1->newSinkTokenStream();
///
/// TeeSinkTokenFilterPtr source2 = newLucene(newLucene(reader2));
/// source2->addSinkTokenStream(sink1);
/// source2->addSinkTokenStream(sink2);
///
/// TokenStreamPtr final1 = newLucene(source1);
/// TokenStreamPtr final2 = source2;
/// TokenStreamPtr final3 = newLucene(sink1);
/// TokenStreamPtr final4 = newLucene(sink2);
///
/// d->add(newLucene(L"f1", final1));
/// d->add(newLucene(L"f2", final2));
/// d->add(newLucene(L"f3", final3));
/// d->add(newLucene(L"f4", final4));
/// 
/// /// In this example, sink1 and sink2 will both get tokens from both reader1 and reader2 after whitespace tokenizer /// and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired. /// It is important, that tees are consumed before sinks (in the above example, the field names must be less the /// sink's field names). If you are not sure, which stream is consumed first, you can simply add another sink and /// then pass all tokens to the sinks at once using {@link #consumeAllTokens}. /// /// This TokenFilter is exhausted after this. In the above example, change the example above to: /// ///
/// ...
/// TokenStreamPtr final1 = newLucene(source1->newSinkTokenStream());
/// TokenStreamPtr final2 = source2->newSinkTokenStream();
/// sink1->consumeAllTokens();
/// sink2->consumeAllTokens();
/// ...
/// 
/// /// In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are /// ready. /// /// Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene. class LPPAPI TeeSinkTokenFilter : public TokenFilter { public: /// Instantiates a new TeeSinkTokenFilter. TeeSinkTokenFilter(const TokenStreamPtr& input); virtual ~TeeSinkTokenFilter(); LUCENE_CLASS(TeeSinkTokenFilter); protected: Collection sinks; public: /// Returns a new {@link SinkTokenStream} that receives all tokens consumed by this stream. SinkTokenStreamPtr newSinkTokenStream(); /// Returns a new {@link SinkTokenStream} that receives all tokens consumed by this stream that pass /// the supplied filter. /// @see SinkFilter SinkTokenStreamPtr newSinkTokenStream(const SinkFilterPtr& filter); /// Adds a {@link SinkTokenStream} created by another TeeSinkTokenFilter to this one. The supplied stream will /// also receive all consumed tokens. This method can be used to pass tokens from two different tees to one sink. void addSinkTokenStream(const SinkTokenStreamPtr& sink); /// TeeSinkTokenFilter passes all tokens to the added sinks when itself is consumed. To be sure, that all tokens /// from the input stream are passed to the sinks, you can call this methods. This instance is exhausted after this, /// but all sinks are instant available. void consumeAllTokens(); virtual bool incrementToken(); virtual void end(); }; class LPPAPI SinkFilter : public LuceneObject { public: virtual ~SinkFilter(); LUCENE_CLASS(SinkFilter); public: /// Returns true, if the current state of the passed-in {@link AttributeSource} shall be stored in the sink. virtual bool accept(const AttributeSourcePtr& source) = 0; /// Called by {@link SinkTokenStream#reset()}. This method does nothing by default and can optionally be overridden. virtual void reset(); }; class LPPAPI AcceptAllSinkFilter : public SinkFilter { public: virtual ~AcceptAllSinkFilter(); LUCENE_CLASS(AcceptAllSinkFilter); public: virtual bool accept(const AttributeSourcePtr& source); }; /// A filter that decides which {@link AttributeSource} states to store in the sink. class LPPAPI SinkTokenStream : public TokenStream { public: SinkTokenStream(const AttributeSourcePtr& source, const SinkFilterPtr& filter); virtual ~SinkTokenStream(); LUCENE_CLASS(SinkTokenStream); protected: Collection cachedStates; AttributeSourceStatePtr finalState; bool initIterator; Collection::iterator it; SinkFilterPtr filter; protected: bool accept(const AttributeSourcePtr& source); void addState(const AttributeSourceStatePtr& state); void setFinalState(const AttributeSourceStatePtr& finalState); public: virtual bool incrementToken(); virtual void end(); virtual void reset(); friend class TeeSinkTokenFilter; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Term.h000066400000000000000000000042161456444476200214370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERM_H #define TERM_H #include "LuceneObject.h" namespace Lucene { /// A Term represents a word from text. This is the unit of search. It is composed of two elements, /// the text of the word, as a string, and the name of the field that the text occurred in, an interned /// string. /// /// Note that terms may represent more than words from text fields, but also things like dates, email /// addresses, urls, etc. class LPPAPI Term : public LuceneObject { public: /// Constructs a Term with the given field and text. Term(const String& fld, const String& txt = EmptyString); virtual ~Term(); LUCENE_CLASS(Term); public: String _field; String _text; public: /// Returns the field of this term, an interned string. The field indicates the part of a document /// which this term came from. String field(); /// Returns the text of this term. In the case of words, this is simply the text of the word. In /// the case of dates and other types, this is an encoding of the object as a string. String text(); /// Optimized construction of new Terms by reusing same field as this Term /// @param text The text of the new term (field is implicitly same as this Term instance) /// @return A new Term TermPtr createTerm(const String& text); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); /// Compares two terms, returning a negative integer if this term belongs before the argument, zero /// if this term is equal to the argument, and a positive integer if this term belongs after the argument. /// /// The ordering of terms is first by field, then by text. virtual int32_t compareTo(const LuceneObjectPtr& other); void set(const String& fld, const String& txt); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermAttribute.h000066400000000000000000000073201456444476200233220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMATTRIBUTE_H #define TERMATTRIBUTE_H #include "Attribute.h" namespace Lucene { /// The term text of a Token. class LPPAPI TermAttribute : public Attribute { public: TermAttribute(); virtual ~TermAttribute(); LUCENE_CLASS(TermAttribute); protected: static const int32_t MIN_BUFFER_SIZE; CharArray _termBuffer; int32_t _termLength; public: virtual String toString(); /// Returns the Token's term text. /// /// This method has a performance penalty because the text is stored internally in a char[]. If possible, /// use {@link #termBuffer()} and {@link #termLength()} directly instead. If you really need a String, use /// this method, which is nothing more than a convenience call to new String(token.termBuffer(), 0, /// token.termLength()) virtual String term(); /// Copies the contents of buffer, starting at offset for length characters, into the termBuffer array. /// @param buffer the buffer to copy /// @param offset the index in the buffer of the first character to copy /// @param length the number of characters to copy virtual void setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length); /// Copies the contents of buffer into the termBuffer array. /// @param buffer the buffer to copy virtual void setTermBuffer(const String& buffer); /// Returns the internal termBuffer character array which you can then directly alter. If the array is /// too small for your token, use {@link #resizeTermBuffer(int)} to increase it. After altering the buffer /// be sure to call {@link #setTermLength} to record the number of valid characters that were placed into /// the termBuffer. virtual CharArray termBuffer(); /// Optimized implementation of termBuffer. virtual wchar_t* termBufferArray(); /// Grows the termBuffer to at least size newSize, preserving the existing content. Note: If the next /// operation is to change the contents of the term buffer use {@link #setTermBuffer(char[], int, int)}, /// {@link #setTermBuffer(String)}, or {@link #setTermBuffer(String, int, int)} to optimally combine the /// resize with the setting of the termBuffer. /// @param newSize minimum size of the new termBuffer /// @return newly created termBuffer with length >= newSize virtual CharArray resizeTermBuffer(int32_t newSize); /// Return number of valid characters (length of the term) in the termBuffer array. virtual int32_t termLength(); /// Set number of valid characters (length of the term) in the termBuffer array. Use this to truncate the /// termBuffer or to synchronize with external manipulation of the termBuffer. Note: to grow the size of /// the array, use {@link #resizeTermBuffer(int)} first. /// @param length the truncated length virtual void setTermLength(int32_t length); virtual int32_t hashCode(); virtual void clear(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual bool equals(const LuceneObjectPtr& other); virtual void copyTo(const AttributePtr& target); protected: /// Allocates a buffer char[] of at least newSize, without preserving the existing content. Its always /// used in places that set the content. /// @param newSize minimum size of the buffer void growTermBuffer(int32_t newSize); void initTermBuffer(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermBuffer.h000066400000000000000000000025031456444476200225660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMBUFFER_H #define TERMBUFFER_H #include "LuceneObject.h" namespace Lucene { class TermBuffer : public LuceneObject { public: TermBuffer(); virtual ~TermBuffer(); LUCENE_CLASS(TermBuffer); protected: String field; TermPtr term; // cached bool preUTF8Strings; // true if strings are stored in modified UTF8 encoding UnicodeResultPtr text; UTF8ResultPtr bytes; public: virtual int32_t compareTo(const LuceneObjectPtr& other); /// Call this if the IndexInput passed to {@link #read} stores terms in the "modified UTF8" format. void setPreUTF8Strings(); void read(const IndexInputPtr& input, const FieldInfosPtr& fieldInfos); void set(const TermPtr& term); void set(const TermBufferPtr& other); void reset(); TermPtr toTerm(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); protected: int32_t compareChars(wchar_t* chars1, int32_t len1, wchar_t* chars2, int32_t len2); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermDocs.h000066400000000000000000000044611456444476200222520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMDOCS_H #define TERMDOCS_H #include "LuceneObject.h" namespace Lucene { /// TermDocs provides an interface for enumerating ; pairs for a term. The document /// portion names each document containing the term. Documents are indicated by number. The frequency /// portion gives the number of times the term occurred in each document. The pairs are ordered by document /// number. /// @see IndexReader#termDocs() class LPPAPI TermDocs { protected: TermDocs(); public: LUCENE_INTERFACE(TermDocs); public: /// Sets this to the data for a term. The enumeration is reset to the start of the data for this term. virtual void seek(const TermPtr& term) = 0; /// Sets this to the data for the current term in a {@link TermEnum}. /// This may be optimized in some implementations. virtual void seek(const TermEnumPtr& termEnum) = 0; /// Returns the current document number. This is invalid until {@link #next()} is called for the first time. virtual int32_t doc() = 0; /// Returns the frequency of the term within the current document. This is invalid until {@link #next()} is /// called for the first time. virtual int32_t freq() = 0; /// Moves to the next pair in the enumeration. Returns true if there is such a next pair in the enumeration. virtual bool next() = 0; /// Attempts to read multiple entries from the enumeration, up to length of docs. Document numbers are stored /// in docs, and term frequencies are stored in freqs. Returns the number of entries read. Zero is only /// returned when the stream has been exhausted. virtual int32_t read(Collection& docs, Collection& freqs) = 0; /// Skips entries to the first beyond the current whose document number is greater than or equal to target. /// Returns true if there is such an entry. virtual bool skipTo(int32_t target) = 0; /// Frees associated resources. virtual void close() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermEnum.h000066400000000000000000000021541456444476200222630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMENUM_H #define TERMENUM_H #include "LuceneObject.h" namespace Lucene { /// Abstract class for enumerating terms. /// /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater /// than all that precede it. class LPPAPI TermEnum : public LuceneObject { public: virtual ~TermEnum(); LUCENE_CLASS(TermEnum); public: /// Increments the enumeration to the next element. True if one exists. virtual bool next() = 0; /// Returns the current Term in the enumeration. virtual TermPtr term() = 0; /// Returns the docFreq of the current Term in the enumeration. virtual int32_t docFreq() = 0; /// Closes the enumeration to further activity, freeing resources. virtual void close() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermFreqVector.h000066400000000000000000000045021456444476200234360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMFREQVECTOR_H #define TERMFREQVECTOR_H #include "LuceneObject.h" namespace Lucene { /// Provides access to stored term vector of a document field. The vector consists of the name of the field, an /// array of the terms that occur in the field of the {@link Document} and a parallel array of frequencies. Thus, /// getTermFrequencies()[5] corresponds with the frequency of getTerms()[5], assuming there are at least 5 terms /// in the Document. class LPPAPI TermFreqVector { protected: TermFreqVector(); public: virtual ~TermFreqVector(); LUCENE_INTERFACE(TermFreqVector); public: /// The {@link Fieldable} name. /// @return The name of the field this vector is associated with. virtual String getField(); /// @return The number of terms in the term vector. virtual int32_t size(); /// @return An Array of term texts in ascending order. virtual Collection getTerms(); /// Array of term frequencies. Locations of the array correspond one to one to the terms in the array obtained from /// getTerms method. Each location in the array contains the number of times this term occurs in the document or the /// document field. virtual Collection getTermFrequencies(); /// Return an index in the term numbers array returned from getTerms at which the term with the specified term appears. /// If this term does not appear in the array, return -1. virtual int32_t indexOf(const String& term); /// Just like indexOf(int) but searches for a number of terms at the same time. Returns an array that has the same size /// as the number of terms searched for, each slot containing the result of searching for that term number. /// /// @param terms array containing terms to look for /// @param start index in the array where the list of terms starts /// @param length the number of terms in the list virtual Collection indexesOf(Collection terms, int32_t start, int32_t length); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermInfo.h000066400000000000000000000017511456444476200222540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMINFO_H #define TERMINFO_H #include "LuceneObject.h" namespace Lucene { /// A TermInfo is the record of information stored for a term. class TermInfo : public LuceneObject { public: TermInfo(const TermInfoPtr& ti); TermInfo(int32_t df = 0, int64_t fp = 0, int64_t pp = 0); virtual ~TermInfo(); LUCENE_CLASS(TermInfo); public: /// The number of documents which contain the term. int32_t docFreq; int64_t freqPointer; int64_t proxPointer; int32_t skipOffset; public: void set(int32_t docFreq, int64_t freqPointer, int64_t proxPointer, int32_t skipOffset); void set(const TermInfoPtr& ti); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermInfosReader.h000066400000000000000000000051761456444476200235670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMINFOSREADER_H #define TERMINFOSREADER_H #include "CloseableThreadLocal.h" #include "SimpleLRUCache.h" namespace Lucene { /// This stores a monotonically increasing set of pairs in a Directory. Pairs are /// accessed either by Term or by ordinal position the set. class TermInfosReader : public LuceneObject { public: TermInfosReader(const DirectoryPtr& dir, const String& seg, const FieldInfosPtr& fis, int32_t readBufferSize, int32_t indexDivisor); virtual ~TermInfosReader(); LUCENE_CLASS(TermInfosReader); protected: DirectoryPtr directory; String segment; FieldInfosPtr fieldInfos; CloseableThreadLocal threadResources; SegmentTermEnumPtr origEnum; int64_t _size; Collection indexTerms; Collection indexInfos; Collection indexPointers; int32_t totalIndexInterval; static const int32_t DEFAULT_CACHE_SIZE; public: int32_t getSkipInterval(); int32_t getMaxSkipLevels(); void close(); /// Returns the number of term/value pairs in the set. int64_t size(); /// Returns the TermInfo for a Term in the set, or null. TermInfoPtr get(const TermPtr& term); /// Returns the position of a Term in the set or -1. int64_t getPosition(const TermPtr& term); /// Returns an enumeration of all the Terms and TermInfos in the set. SegmentTermEnumPtr terms(); /// Returns an enumeration of terms starting at or after the named term. SegmentTermEnumPtr terms(const TermPtr& term); protected: TermInfosReaderThreadResourcesPtr getThreadResources(); /// Returns the offset of the greatest index entry which is less than or equal to term. int32_t getIndexOffset(const TermPtr& term); void seekEnum(const SegmentTermEnumPtr& enumerator, int32_t indexOffset); /// Returns the TermInfo for a Term in the set, or null. TermInfoPtr get(const TermPtr& term, bool useCache); void ensureIndexIsRead(); }; class TermInfosReaderThreadResources : public LuceneObject { public: virtual ~TermInfosReaderThreadResources(); LUCENE_CLASS(TermInfosReaderThreadResources); public: SegmentTermEnumPtr termEnum; // Used for caching the least recently looked-up Terms TermInfoCachePtr termInfoCache; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermInfosWriter.h000066400000000000000000000071201456444476200236300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMINFOSWRITER_H #define TERMINFOSWRITER_H #include "LuceneObject.h" namespace Lucene { /// This stores a monotonically increasing set of pairs in a Directory. A TermInfos /// can be written once, in order. class TermInfosWriter : public LuceneObject { public: TermInfosWriter(const DirectoryPtr& directory, const String& segment, const FieldInfosPtr& fis, int32_t interval); TermInfosWriter(const DirectoryPtr& directory, const String& segment, const FieldInfosPtr& fis, int32_t interval, bool isIndex); virtual ~TermInfosWriter(); LUCENE_CLASS(TermInfosWriter); public: /// The file format version, a negative number. static const int32_t FORMAT; /// Changed strings to true utf8 with length-in-bytes not length-in-chars. static const int32_t FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; /// NOTE: always change this if you switch to a new format. static const int32_t FORMAT_CURRENT; /// The fraction of terms in the "dictionary" which should be stored in RAM. Smaller values use more memory, but /// make searching slightly faster, while larger values use less memory and make searching slightly slower. /// Searching is typically not dominated by dictionary lookup, so tweaking this is rarely useful. int32_t indexInterval; /// The fraction of {@link TermDocs} entries stored in skip tables, used to accelerate {@link TermDocs#skipTo(int)}. /// Larger values result in smaller indexes, greater acceleration, but fewer accelerable cases, while smaller values /// result in bigger indexes, less acceleration and more accelerable cases. More detailed experiments would be useful /// here. int32_t skipInterval; /// The maximum number of skip levels. Smaller values result in slightly smaller indexes, but slower skipping /// in big posting lists. int32_t maxSkipLevels; protected: FieldInfosPtr fieldInfos; IndexOutputPtr output; TermInfoPtr lastTi; int64_t size; int64_t lastIndexPointer; bool isIndex; ByteArray lastTermBytes; int32_t lastTermBytesLength; int32_t lastFieldNumber; TermInfosWriterPtr otherWriter; TermInfosWriterWeakPtr _other; UTF8ResultPtr utf8Result; // Currently used only by assert statements UnicodeResultPtr unicodeResult1; UnicodeResultPtr unicodeResult2; public: virtual void initialize(); void add(const TermPtr& term, const TermInfoPtr& ti); /// Adds a new <, TermInfo> pair to the set. Term must be lexicographically /// greater than all previous Terms added. TermInfo pointers must be positive and greater than all previous. void add(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength, const TermInfoPtr& ti); /// Called to complete TermInfos creation. void close(); protected: void initialize(const DirectoryPtr& directory, const String& segment, const FieldInfosPtr& fis, int32_t interval, bool isi); /// Currently used only by assert statements bool initUnicodeResults(); /// Currently used only by assert statement int32_t compareToLastTerm(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength); void writeTerm(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermPositionVector.h000066400000000000000000000030351456444476200243450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMPOSITIONVECTOR_H #define TERMPOSITIONVECTOR_H #include "TermFreqVector.h" namespace Lucene { /// Extends TermFreqVector to provide additional information about positions in which each of the terms is found. A TermPositionVector not necessarily /// contains both positions and offsets, but at least one of these arrays exists. class LPPAPI TermPositionVector : public TermFreqVector { protected: TermPositionVector(); public: virtual ~TermPositionVector(); LUCENE_INTERFACE(TermPositionVector); public: /// Returns an array of positions in which the term is found. Terms are identified by the index at which its number appears in the term String /// array obtained from the indexOf method. May return null if positions have not been stored. virtual Collection getTermPositions(int32_t index); /// Returns an array of TermVectorOffsetInfo in which the term is found. May return null if offsets have not been stored. /// @see Token /// @param index The position in the array to get the offsets from /// @return An array of TermVectorOffsetInfo objects or the empty list virtual Collection getOffsets(int32_t index); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermPositions.h000066400000000000000000000046331456444476200233520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMPOSITIONS_H #define TERMPOSITIONS_H #include "TermDocs.h" namespace Lucene { /// TermPositions provides an interface for enumerating the *> /// tuples for a term. The document and frequency are the same as for a TermDocs. The positions portion /// lists the ordinal positions of each occurrence of a term in a document. /// @see IndexReader#termPositions() class LPPAPI TermPositions : public TermDocs { protected: TermPositions(); public: virtual ~TermPositions(); LUCENE_INTERFACE(TermPositions); public: /// Returns next position in the current document. It is an error to call this more than {@link #freq()} /// times without calling {@link #next()}. This is invalid until {@link #next()} is called for // the first time. virtual int32_t nextPosition(); /// Returns the length of the payload at the current term position. This is invalid until {@link /// #nextPosition()} is called for the first time. /// @return length of the current payload in number of bytes virtual int32_t getPayloadLength(); /// Returns the payload data at the current term position. This is invalid until {@link #nextPosition()} /// is called for the first time. /// This method must not be called more than once after each call of {@link #nextPosition()}. However, /// payloads are loaded lazily, so if the payload data for the current position is not needed, /// this method may not be called at all for performance reasons. /// @param data the array into which the data of this payload is to be stored /// @param offset the offset in the array into which the data of this payload is to be stored. /// @return a byte array containing the data of this payload virtual ByteArray getPayload(ByteArray data, int32_t offset); /// Checks if a payload can be loaded at this position. /// Payloads can only be loaded once per call to {@link #nextPosition()}. /// @return true if there is a payload available at this position that can be loaded virtual bool isPayloadAvailable(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermQuery.h000066400000000000000000000024141456444476200224630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMQUERY_H #define TERMQUERY_H #include "Query.h" namespace Lucene { /// A Query that matches documents containing a term. This may be combined with other terms with a /// {@link BooleanQuery}. class LPPAPI TermQuery : public Query { public: /// Constructs a query for the term. TermQuery(const TermPtr& term); virtual ~TermQuery(); LUCENE_CLASS(TermQuery); protected: TermPtr term; public: using Query::toString; /// Returns the term of this query. TermPtr getTerm(); virtual WeightPtr createWeight(const SearcherPtr& searcher); virtual void extractTerms(SetTerm terms); /// Prints a user-readable version of this query. virtual String toString(const String& field); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); friend class TermWeight; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermRangeFilter.h000066400000000000000000000055241456444476200235650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMRANGEFILTER_H #define TERMRANGEFILTER_H #include "MultiTermQueryWrapperFilter.h" namespace Lucene { /// A Filter that restricts search results to a range of term values in a given field. /// /// This filter matches the documents looking for terms that fall into the supplied range according to {@link /// String#compare(String)}, unless a Collator is provided. It is not intended for numerical ranges; use {@link /// NumericRangeFilter} instead. /// /// If you construct a large number of range filters with different ranges but on the same field, {@link /// FieldCacheRangeFilter} may have significantly better performance. class LPPAPI TermRangeFilter : public MultiTermQueryWrapperFilter { public: /// Warning: Using this constructor and supplying a non-null value in the collator parameter will cause /// every single index Term in the Field referenced by lowerTerm and/or upperTerm to be examined. Depending /// on the number of index Terms in this Field, the operation could be very slow. /// @param lowerTerm The lower bound on this range /// @param upperTerm The upper bound on this range /// @param includeLower Does this range include the lower bound? /// @param includeUpper Does this range include the upper bound? /// @param collator The collator to use when determining range inclusion; set to null to use Unicode code /// point ordering instead of collation. TermRangeFilter(const String& fieldName, StringValue lowerTerm, StringValue upperTerm, bool includeLower, bool includeUpper, CollatorPtr collator = CollatorPtr()); virtual ~TermRangeFilter(); LUCENE_CLASS(TermRangeFilter); public: /// Constructs a filter for field fieldName matching less than or equal to upperTerm. static TermRangeFilterPtr Less(const String& fieldName, StringValue upperTerm); /// Constructs a filter for field fieldName matching greater than or equal to lowerTerm. static TermRangeFilterPtr More(const String& fieldName, StringValue lowerTerm); /// Returns the field name for this filter String getField(); /// Returns the lower value of this range filter String getLowerTerm(); /// Returns the upper value of this range filter String getUpperTerm(); /// Returns true if the lower endpoint is inclusive bool includesLower(); /// Returns true if the upper endpoint is inclusive bool includesUpper(); /// Returns the collator used to determine range inclusion, if any. CollatorPtr getCollator(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermRangeQuery.h000066400000000000000000000067151456444476200234500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMRANGEQUERY_H #define TERMRANGEQUERY_H #include "MultiTermQuery.h" namespace Lucene { /// A Query that matches documents within an range of terms. /// /// This query matches the documents looking for terms that fall into the supplied range according to {@link /// String#compare(String)}, unless a Collator is provided. It is not intended for numerical ranges; use {@link /// NumericRangeQuery} instead. /// /// This query uses the {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} rewrite method. class LPPAPI TermRangeQuery : public MultiTermQuery { public: /// Constructs a query selecting all terms greater/equal than lowerTerm but less/equal than upperTerm. /// /// If an endpoint is null, it is said to be "open". Either or both endpoints may be open. Open endpoints /// may not be exclusive (you can't select all but the first or last term without explicitly specifying the /// term to exclude.) /// /// If collator is not null, it will be used to decide whether index terms are within the given range, rather /// than using the Unicode code point order in which index terms are stored. /// /// Warning: Using this constructor and supplying a non-null value in the collator parameter will cause every /// single index Term in the Field referenced by lowerTerm and/or upperTerm to be examined. Depending on the /// number of index Terms in this Field, the operation could be very slow. /// /// @param lowerTerm The Term text at the lower end of the range /// @param upperTerm The Term text at the upper end of the range /// @param includeLower If true, the lowerTerm is included in the range. /// @param includeUpper If true, the upperTerm is included in the range. /// @param collator The collator to use to collate index Terms, to determine their membership in the range /// bounded by lowerTerm and upperTerm. TermRangeQuery(const String& fieldName, StringValue lowerTerm, StringValue upperTerm, bool includeLower, bool includeUpper, CollatorPtr collator = CollatorPtr()); virtual ~TermRangeQuery(); LUCENE_CLASS(TermRangeQuery); protected: StringValue lowerTerm; StringValue upperTerm; CollatorPtr collator; String field; bool includeLower; bool includeUpper; public: using MultiTermQuery::toString; /// Returns the field name for this query String getField(); /// Returns the lower value of this range query String getLowerTerm(); /// Returns the upper value of this range query String getUpperTerm(); /// Returns true if the lower endpoint is inclusive bool includesLower(); /// Returns true if the upper endpoint is inclusive bool includesUpper(); /// Returns the collator used to determine range inclusion, if any. CollatorPtr getCollator(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual String toString(const String& field); virtual int32_t hashCode(); virtual bool equals(const LuceneObjectPtr& other); protected: virtual FilteredTermEnumPtr getEnum(const IndexReaderPtr& reader); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermRangeTermEnum.h000066400000000000000000000043521456444476200240720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMRANGETERMENUM_H #define TERMRANGETERMENUM_H #include "FilteredTermEnum.h" namespace Lucene { /// Subclass of FilteredTermEnum for enumerating all terms that match the specified range parameters. /// /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than /// all that precede it. class LPPAPI TermRangeTermEnum : public FilteredTermEnum { public: /// Enumerates all terms greater/equal than lowerTerm but less/equal than upperTerm. /// /// If an endpoint is null, it is said to be "open". Either or both endpoints may be open. Open endpoints /// may not be exclusive (you can't select all but the first or last term without explicitly specifying /// the term to exclude.) /// /// @param reader /// @param field An interned field that holds both lower and upper terms. /// @param lowerTermText The term text at the lower end of the range /// @param upperTermText The term text at the upper end of the range /// @param includeLower If true, the lowerTerm is included in the range. /// @param includeUpper If true, the upperTerm is included in the range. /// @param collator The collator to use to collate index Terms, to determine their membership in the range /// bounded by lowerTerm and upperTerm. TermRangeTermEnum(const IndexReaderPtr& reader, const String& field, StringValue lowerTermText, StringValue upperTermText, bool includeLower, bool includeUpper, const CollatorPtr& collator); virtual ~TermRangeTermEnum(); LUCENE_CLASS(TermRangeTermEnum); protected: CollatorPtr collator; bool _endEnum; String field; StringValue upperTermText; StringValue lowerTermText; bool includeLower; bool includeUpper; public: virtual double difference(); protected: virtual bool endEnum(); virtual bool termCompare(const TermPtr& term); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermScorer.h000066400000000000000000000051321456444476200226130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMSCORER_H #define TERMSCORER_H #include "Scorer.h" namespace Lucene { /// A Scorer for documents matching a Term. class LPPAPI TermScorer : public Scorer { public: /// Construct a TermScorer. /// @param weight The weight of the Term in the query. /// @param td An iterator over the documents matching the Term. /// @param similarity The Similarity implementation to be used for score computations. /// @param norms The field norms of the document fields for the Term. TermScorer(const WeightPtr& weight, const TermDocsPtr& td, const SimilarityPtr& similarity, ByteArray norms); virtual ~TermScorer(); LUCENE_CLASS(TermScorer); protected: WeightPtr weight; TermDocsPtr termDocs; // for malloc and free TermDocs* __termDocs; // for work, ByteArray norms; double weightValue; int32_t doc; Collection docs; // buffered doc numbers decltype(docs.get()) __docs; // Collection freqs; // buffered term freqs decltype(freqs.get()) __freqs; // int32_t freq; int32_t pointer; int32_t pointerMax; static const int32_t SCORE_CACHE_SIZE; Collection scoreCache; public: virtual void score(const CollectorPtr& collector); virtual int32_t docID(); /// Advances to the next document matching the query. /// The iterator over the matching documents is buffered using {@link /// TermDocs#read(Collection, Collection)}. /// @return the document matching the query or -1 if there are no more documents. virtual int32_t nextDoc(); virtual double score(); /// Advances to the first match beyond the current whose document number is greater than or equal to a /// given target. The implementation uses {@link TermDocs#skipTo(int32_t)}. /// @param target The target document number. /// @return the matching document or -1 if none exist. virtual int32_t advance(int32_t target); /// Returns a string representation of this TermScorer. virtual String toString(); virtual float termFreq(){ return freq; } protected: static const Collection& SIM_NORM_DECODER(); virtual bool score(const CollectorPtr& collector, int32_t max, int32_t firstDocID); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermSpans.h000066400000000000000000000021041456444476200224360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMSPANS_H #define TERMSPANS_H #include "Spans.h" namespace Lucene { /// Public for extension only class LPPAPI TermSpans : public Spans { public: TermSpans(const TermPositionsPtr& positions, const TermPtr& term); virtual ~TermSpans(); LUCENE_CLASS(TermSpans); protected: TermPositionsPtr positions; TermPtr term; int32_t _doc; int32_t freq; int32_t count; int32_t position; public: virtual bool next(); virtual bool skipTo(int32_t target); virtual int32_t doc(); virtual int32_t start(); virtual int32_t end(); virtual Collection getPayload(); virtual bool isPayloadAvailable(); virtual String toString(); TermPositionsPtr getPositions(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermVectorEntry.h000066400000000000000000000030111456444476200236340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTORENTRY_H #define TERMVECTORENTRY_H #include "LuceneObject.h" namespace Lucene { /// Convenience class for holding TermVector information. class LPPAPI TermVectorEntry : public LuceneObject { public: TermVectorEntry(const String& field = EmptyString, const String& term = EmptyString, int32_t frequency = 0, Collection offsets = Collection(), Collection positions = Collection()); virtual ~TermVectorEntry(); LUCENE_CLASS(TermVectorEntry); protected: String field; String term; int32_t frequency; Collection offsets; Collection positions; public: String getField(); int32_t getFrequency(); Collection getOffsets(); Collection getPositions(); String getTerm(); void setFrequency(int32_t frequency); void setOffsets(Collection offsets); void setPositions(Collection positions); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermVectorEntryFreqSortedComparator.h000066400000000000000000000015561456444476200276770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTORENTRYFREQSORTEDCOMPARATOR_H #define TERMVECTORENTRYFREQSORTEDCOMPARATOR_H #include "LuceneObject.h" namespace Lucene { /// Compares {@link TermVectorEntry}s first by frequency and then by the term (case-sensitive) class LPPAPI TermVectorEntryFreqSortedComparator : public LuceneObject { public: virtual ~TermVectorEntryFreqSortedComparator(); LUCENE_CLASS(TermVectorEntryFreqSortedComparator); public: static bool compare(const TermVectorEntryPtr& first, const TermVectorEntryPtr& second); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermVectorMapper.h000066400000000000000000000065751456444476200240010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTORMAPPER_H #define TERMVECTORMAPPER_H #include "LuceneObject.h" namespace Lucene { /// The TermVectorMapper can be used to map Term Vectors into your own structure instead of the parallel /// array structure used by {@link IndexReader#getTermFreqVector(int,String)}. /// /// It is up to the implementation to make sure it is thread-safe. class LPPAPI TermVectorMapper : public LuceneObject { public: /// @param ignoringPositions true if this mapper should tell Lucene to ignore positions even if /// they are stored. /// @param ignoringOffsets similar to ignoringPositions TermVectorMapper(bool ignoringPositions = false, bool ignoringOffsets = false); virtual ~TermVectorMapper(); LUCENE_CLASS(TermVectorMapper); protected: bool ignoringPositions; bool ignoringOffsets; public: /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. /// This method will be called once before retrieving the vector for a field. /// /// This method will be called before {@link #map(String,int,TermVectorOffsetInfo[],int[])}. /// @param field The field the vector is for /// @param numTerms The number of terms that need to be mapped /// @param storeOffsets true if the mapper should expect offset information /// @param storePositions true if the mapper should expect positions info virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) = 0; /// Map the Term Vector information into your own structure /// @param term The term to add to the vector /// @param frequency The frequency of the term in the document /// @param offsets null if the offset is not specified, otherwise the offset into the field of the term /// @param positions null if the position is not specified, otherwise the position in the field of the term virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions) = 0; /// Indicate to Lucene that even if there are positions stored, this mapper is not interested in them and /// they can be skipped over. Derived classes should set this to true if they want to ignore positions. /// The default is false, meaning positions will be loaded if they are stored. virtual bool isIgnoringPositions(); /// @see #isIgnoringPositions() Same principal as {@link #isIgnoringPositions()}, but applied to offsets. virtual bool isIgnoringOffsets(); /// Passes down the index of the document whose term vector is currently being mapped, once for each top /// level call to a term vector reader. /// /// Default implementation IGNORES the document number. Override if your implementation needs the document /// number. /// /// NOTE: Document numbers are internal to Lucene and subject to change depending on indexing operations. /// /// @param documentNumber index of document currently being mapped virtual void setDocumentNumber(int32_t documentNumber); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermVectorOffsetInfo.h000066400000000000000000000033631456444476200246070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTOROFFSETINFO_H #define TERMVECTOROFFSETINFO_H #include "LuceneObject.h" namespace Lucene { /// The TermVectorOffsetInfo class holds information pertaining to a Term in a {@link TermPositionVector}'s /// offset information. This offset information is the character offset as set during the Analysis phase /// (and thus may not be the actual offset in the original content). class LPPAPI TermVectorOffsetInfo : public LuceneObject { public: TermVectorOffsetInfo(int32_t startOffset = 0, int32_t endOffset = 0); virtual ~TermVectorOffsetInfo(); LUCENE_CLASS(TermVectorOffsetInfo); protected: int32_t startOffset; int32_t endOffset; public: /// Convenience declaration when creating a {@link TermPositionVector} that stores only position information. static const Collection EMPTY_OFFSET_INFO(); /// The accessor for the ending offset for the term int32_t getEndOffset(); void setEndOffset(int32_t endOffset); /// The accessor for the starting offset of the term. int32_t getStartOffset(); void setStartOffset(int32_t startOffset); /// Two TermVectorOffsetInfos are equals if both the start and end offsets are the same. /// @return true if both {@link #getStartOffset()} and {@link #getEndOffset()} are the same for both objects. virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermVectorsReader.h000066400000000000000000000133111456444476200241240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTORSREADER_H #define TERMVECTORSREADER_H #include "TermVectorMapper.h" namespace Lucene { class LPPAPI TermVectorsReader : public LuceneObject { public: TermVectorsReader(); TermVectorsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos); TermVectorsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos, int32_t readBufferSize, int32_t docStoreOffset = -1, int32_t size = 0); virtual ~TermVectorsReader(); LUCENE_CLASS(TermVectorsReader); public: /// NOTE: if you make a new format, it must be larger than the current format static const int32_t FORMAT_VERSION; /// Changes to speed up bulk merging of term vectors static const int32_t FORMAT_VERSION2; /// Changed strings to UTF8 with length-in-bytes not length-in-chars static const int32_t FORMAT_UTF8_LENGTH_IN_BYTES; /// NOTE: always change this if you switch to a new format. static const int32_t FORMAT_CURRENT; /// The size in bytes that the FORMAT_VERSION will take up at the beginning of each file static const int32_t FORMAT_SIZE; static const uint8_t STORE_POSITIONS_WITH_TERMVECTOR; static const uint8_t STORE_OFFSET_WITH_TERMVECTOR; protected: FieldInfosPtr fieldInfos; IndexInputPtr tvx; IndexInputPtr tvd; IndexInputPtr tvf; int32_t _size; int32_t numTotalDocs; /// The docID offset where our docs begin in the index file. This will be 0 if we have our own private file. int32_t docStoreOffset; int32_t format; public: /// Used for bulk copy when merging IndexInputPtr getTvdStream(); /// Used for bulk copy when merging IndexInputPtr getTvfStream(); bool canReadRawDocs(); /// Retrieve the length (in bytes) of the tvd and tvf entries for the next numDocs starting with /// startDocID. This is used for bulk copying when merging segments, if the field numbers are /// congruent. Once this returns, the tvf & tvd streams are seeked to the startDocID. void rawDocs(Collection tvdLengths, Collection tvfLengths, int32_t startDocID, int32_t numDocs); void close(); /// @return The number of documents in the reader int32_t size(); void get(int32_t docNum, const String& field, const TermVectorMapperPtr& mapper); /// Retrieve the term vector for the given document and field /// @param docNum The document number to retrieve the vector for /// @param field The field within the document to retrieve /// @return The TermFreqVector for the document and field or null if there is no termVector for /// this field. TermFreqVectorPtr get(int32_t docNum, const String& field); /// Return all term vectors stored for this document or null if the could not be read in. /// /// @param docNum The document number to retrieve the vector for /// @return All term frequency vectors Collection get(int32_t docNum); void get(int32_t docNumber, const TermVectorMapperPtr& mapper); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); protected: void ConstructReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size); void seekTvx(int32_t docNum); int32_t checkValidFormat(const IndexInputPtr& in); /// Reads the String[] fields; you have to pre-seek tvd to the right point Collection readFields(int32_t fieldCount); /// Reads the long[] offsets into TVF; you have to pre-seek tvx/tvd to the right point Collection readTvfPointers(int32_t fieldCount); Collection readTermVectors(int32_t docNum, Collection fields, Collection tvfPointers); void readTermVectors(Collection fields, Collection tvfPointers, const TermVectorMapperPtr& mapper); /// @param field The field to read in /// @param tvfPointer The pointer within the tvf file where we should start reading /// @param mapper The mapper used to map the TermVector void readTermVector(const String& field, int64_t tvfPointer, const TermVectorMapperPtr& mapper); }; /// Models the existing parallel array structure class ParallelArrayTermVectorMapper : public TermVectorMapper { public: ParallelArrayTermVectorMapper(); virtual ~ParallelArrayTermVectorMapper(); LUCENE_CLASS(ParallelArrayTermVectorMapper); protected: Collection terms; Collection termFreqs; Collection< Collection > positions; Collection< Collection > offsets; int32_t currentPosition; bool storingOffsets; bool storingPositions; String field; public: /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. /// This method will be called once before retrieving the vector for a field. virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions); /// Map the Term Vector information into your own structure virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions); /// Construct the vector /// @return The {@link TermFreqVector} based on the mappings. TermFreqVectorPtr materializeVector(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermVectorsTermsWriter.h000066400000000000000000000056351456444476200252230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTORSTERMSWRITER_H #define TERMVECTORSTERMSWRITER_H #include "TermsHashConsumer.h" #include "DocumentsWriter.h" #include "RawPostingList.h" namespace Lucene { class TermVectorsTermsWriter : public TermsHashConsumer { public: TermVectorsTermsWriter(const DocumentsWriterPtr& docWriter); virtual ~TermVectorsTermsWriter(); LUCENE_CLASS(TermVectorsTermsWriter); public: DocumentsWriterWeakPtr _docWriter; TermVectorsWriterPtr termVectorsWriter; Collection docFreeList; int32_t freeCount; IndexOutputPtr tvx; IndexOutputPtr tvd; IndexOutputPtr tvf; int32_t lastDocID; int32_t allocCount; public: virtual TermsHashConsumerPerThreadPtr addThread(const TermsHashPerThreadPtr& perThread); virtual void createPostings(Collection postings, int32_t start, int32_t count); virtual void flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state); virtual void closeDocStore(const SegmentWriteStatePtr& state); TermVectorsTermsWriterPerDocPtr getPerDoc(); /// Fills in no-term-vectors for all docs we haven't seen since the last doc that had term vectors. void fill(int32_t docID); void initTermVectorsWriter(); void finishDocument(const TermVectorsTermsWriterPerDocPtr& perDoc); bool freeRAM(); void free(const TermVectorsTermsWriterPerDocPtr& doc); virtual void abort(); virtual int32_t bytesPerPosting(); }; class TermVectorsTermsWriterPerDoc : public DocWriter { public: TermVectorsTermsWriterPerDoc(const TermVectorsTermsWriterPtr& termsWriter = TermVectorsTermsWriterPtr()); virtual ~TermVectorsTermsWriterPerDoc(); LUCENE_CLASS(TermVectorsTermsWriterPerDoc); protected: TermVectorsTermsWriterWeakPtr _termsWriter; public: PerDocBufferPtr buffer; RAMOutputStreamPtr perDocTvf; int32_t numVectorFields; Collection fieldNumbers; Collection fieldPointers; public: void reset(); virtual void abort(); void addField(int32_t fieldNumber); virtual int64_t sizeInBytes(); virtual void finish(); }; class TermVectorsTermsWriterPostingList : public RawPostingList { public: TermVectorsTermsWriterPostingList(); virtual ~TermVectorsTermsWriterPostingList(); LUCENE_CLASS(TermVectorsTermsWriterPostingList); public: int32_t freq; // How many times this term occurred in the current doc int32_t lastOffset; // Last offset we saw int32_t lastPosition; // Last position where this term occurred }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermVectorsTermsWriterPerField.h000066400000000000000000000034641456444476200266340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTORSTERMSWRITERPERFIELD_H #define TERMVECTORSTERMSWRITERPERFIELD_H #include "TermsHashConsumerPerField.h" namespace Lucene { class TermVectorsTermsWriterPerField : public TermsHashConsumerPerField { public: TermVectorsTermsWriterPerField(const TermsHashPerFieldPtr& termsHashPerField, const TermVectorsTermsWriterPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo); virtual ~TermVectorsTermsWriterPerField(); LUCENE_CLASS(TermVectorsTermsWriterPerField); public: TermVectorsTermsWriterPerThreadWeakPtr _perThread; TermsHashPerFieldWeakPtr _termsHashPerField; TermVectorsTermsWriterWeakPtr _termsWriter; FieldInfoPtr fieldInfo; DocStateWeakPtr _docState; FieldInvertStateWeakPtr _fieldState; bool doVectors; bool doVectorPositions; bool doVectorOffsets; int32_t maxNumPostings; OffsetAttributePtr offsetAttribute; public: virtual int32_t getStreamCount(); virtual bool start(Collection fields, int32_t count); virtual void abort(); /// Called once per field per document if term vectors are enabled, to write the vectors to RAMOutputStream, /// which is then quickly flushed to the real term vectors files in the Directory. virtual void finish(); void shrinkHash(); virtual void start(const FieldablePtr& field); virtual void newTerm(const RawPostingListPtr& p0); virtual void addTerm(const RawPostingListPtr& p0); virtual void skippingLongTerm(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermVectorsTermsWriterPerThread.h000066400000000000000000000027041456444476200270140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTORSTERMSWRITERPERTHREAD_H #define TERMVECTORSTERMSWRITERPERTHREAD_H #include "TermsHashConsumerPerThread.h" namespace Lucene { class TermVectorsTermsWriterPerThread : public TermsHashConsumerPerThread { public: TermVectorsTermsWriterPerThread(const TermsHashPerThreadPtr& termsHashPerThread, const TermVectorsTermsWriterPtr& termsWriter); virtual ~TermVectorsTermsWriterPerThread(); LUCENE_CLASS(TermVectorsTermsWriterPerThread); public: TermVectorsTermsWriterWeakPtr _termsWriter; TermsHashPerThreadWeakPtr _termsHashPerThread; DocStateWeakPtr _docState; TermVectorsTermsWriterPerDocPtr doc; ByteSliceReaderPtr vectorSliceReader; Collection utf8Results; String lastVectorFieldName; public: virtual void startDocument(); virtual DocWriterPtr finishDocument(); virtual TermsHashConsumerPerFieldPtr addField(const TermsHashPerFieldPtr& termsHashPerField, const FieldInfoPtr& fieldInfo); virtual void abort(); /// Called only by assert bool clearLastVectorFieldName(); bool vectorFieldsInOrder(const FieldInfoPtr& fi); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermVectorsWriter.h000066400000000000000000000025631456444476200242050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMVECTORSWRITER_H #define TERMVECTORSWRITER_H #include "LuceneObject.h" namespace Lucene { class TermVectorsWriter : public LuceneObject { public: TermVectorsWriter(const DirectoryPtr& directory, const String& segment, const FieldInfosPtr& fieldInfos); virtual ~TermVectorsWriter(); LUCENE_CLASS(TermVectorsWriter); protected: IndexOutputPtr tvx; IndexOutputPtr tvd; IndexOutputPtr tvf; FieldInfosPtr fieldInfos; Collection utf8Results; public: /// Add a complete document specified by all its term vectors. If document has no term vectors, /// add value for tvx. void addAllDocVectors(Collection vectors); /// Do a bulk copy of numDocs documents from reader to our streams. This is used to expedite merging, /// if the field numbers are congruent. void addRawDocuments(const TermVectorsReaderPtr& reader, Collection tvdLengths, Collection tvfLengths, int32_t numDocs); /// Close all streams. void close(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermsHash.h000066400000000000000000000051021456444476200224210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMSHASH_H #define TERMSHASH_H #include "InvertedDocConsumer.h" namespace Lucene { /// This class implements {@link InvertedDocConsumer}, which is passed each token produced by the analyzer on /// each field. It stores these tokens in a hash table, and allocates separate byte streams per token. Consumers /// of this class, eg {@link FreqProxTermsWriter} and {@link TermVectorsTermsWriter}, write their own byte streams /// under each term. class TermsHash : public InvertedDocConsumer { public: TermsHash(const DocumentsWriterPtr& docWriter, bool trackAllocations, const TermsHashConsumerPtr& consumer, const TermsHashPtr& nextTermsHash); virtual ~TermsHash(); LUCENE_CLASS(TermsHash); public: TermsHashConsumerPtr consumer; TermsHashPtr nextTermsHash; int32_t bytesPerPosting; int32_t postingsFreeChunk; DocumentsWriterWeakPtr _docWriter; bool trackAllocations; protected: Collection postingsFreeList; int32_t postingsFreeCount; int32_t postingsAllocCount; public: /// Add a new thread virtual InvertedDocConsumerPerThreadPtr addThread(const DocInverterPerThreadPtr& docInverterPerThread); virtual TermsHashPerThreadPtr addThread(const DocInverterPerThreadPtr& docInverterPerThread, const TermsHashPerThreadPtr& primaryPerThread); virtual void setFieldInfos(const FieldInfosPtr& fieldInfos); /// Abort (called after hitting AbortException) /// NOTE: do not make this sync'd; it's not necessary (DW ensures all other threads are idle), and it /// leads to deadlock virtual void abort(); void shrinkFreePostings(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state); /// Close doc stores virtual void closeDocStore(const SegmentWriteStatePtr& state); /// Flush a new segment virtual void flush(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state); /// Attempt to free RAM, returning true if any RAM was freed virtual bool freeRAM(); void recyclePostings(Collection postings, int32_t numPostings); void getPostings(Collection postings); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermsHashConsumer.h000066400000000000000000000022511456444476200241370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMSHASHCONSUMER_H #define TERMSHASHCONSUMER_H #include "LuceneObject.h" namespace Lucene { class TermsHashConsumer : public LuceneObject { public: virtual ~TermsHashConsumer(); LUCENE_CLASS(TermsHashConsumer); public: FieldInfosPtr fieldInfos; public: virtual int32_t bytesPerPosting() = 0; virtual void createPostings(Collection postings, int32_t start, int32_t count) = 0; virtual TermsHashConsumerPerThreadPtr addThread(const TermsHashPerThreadPtr& perThread) = 0; virtual void flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) = 0; virtual void abort() = 0; virtual void closeDocStore(const SegmentWriteStatePtr& state) = 0; virtual void setFieldInfos(const FieldInfosPtr& fieldInfos); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermsHashConsumerPerField.h000066400000000000000000000022511456444476200255520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMSHASHCONSUMERPERFIELD_H #define TERMSHASHCONSUMERPERFIELD_H #include "LuceneObject.h" namespace Lucene { /// Implement this class to plug into the TermsHash processor, which inverts & stores Tokens into a hash /// table and provides an API for writing bytes into multiple streams for each unique Token. class TermsHashConsumerPerField : public LuceneObject { public: virtual ~TermsHashConsumerPerField(); LUCENE_CLASS(TermsHashConsumerPerField); public: virtual bool start(Collection fields, int32_t count) = 0; virtual void finish() = 0; virtual void skippingLongTerm() = 0; virtual void start(const FieldablePtr& field) = 0; virtual void newTerm(const RawPostingListPtr& p) = 0; virtual void addTerm(const RawPostingListPtr& p) = 0; virtual int32_t getStreamCount() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermsHashConsumerPerThread.h000066400000000000000000000015671456444476200257470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMSHASHCONSUMERPERTHREAD_H #define TERMSHASHCONSUMERPERTHREAD_H #include "LuceneObject.h" namespace Lucene { class TermsHashConsumerPerThread : public LuceneObject { public: virtual ~TermsHashConsumerPerThread(); LUCENE_CLASS(TermsHashConsumerPerThread); public: virtual void startDocument() = 0; virtual DocWriterPtr finishDocument() = 0; virtual TermsHashConsumerPerFieldPtr addField(const TermsHashPerFieldPtr& termsHashPerField, const FieldInfoPtr& fieldInfo) = 0; virtual void abort() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermsHashPerField.h000066400000000000000000000057571456444476200240540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMSHASHPERFIELD_H #define TERMSHASHPERFIELD_H #include "InvertedDocConsumerPerField.h" namespace Lucene { class TermsHashPerField : public InvertedDocConsumerPerField { public: TermsHashPerField(const DocInverterPerFieldPtr& docInverterPerField, const TermsHashPerThreadPtr& perThread, const TermsHashPerThreadPtr& nextPerThread, const FieldInfoPtr& fieldInfo); virtual ~TermsHashPerField(); LUCENE_CLASS(TermsHashPerField); public: TermsHashConsumerPerFieldPtr consumer; TermsHashPerFieldPtr nextPerField; DocInverterPerFieldWeakPtr _docInverterPerField; TermsHashPerThreadPtr nextPerThread; TermsHashPerThreadWeakPtr _perThread; DocStatePtr docState; FieldInvertStatePtr fieldState; TermAttributePtr termAtt; // Copied from our perThread CharBlockPoolPtr charPool; IntBlockPoolPtr intPool; ByteBlockPoolPtr bytePool; int32_t streamCount; int32_t numPostingInt; FieldInfoPtr fieldInfo; bool postingsCompacted; int32_t numPostings; IntArray intUptos; int32_t intUptoStart; protected: int32_t postingsHashSize; int32_t postingsHashHalfSize; int32_t postingsHashMask; Collection postingsHash; RawPostingListPtr p; bool doCall; bool doNextCall; public: virtual void initialize(); void shrinkHash(int32_t targetSize); void reset(); /// Called on hitting an aborting exception virtual void abort(); void initReader(const ByteSliceReaderPtr& reader, const RawPostingListPtr& p, int32_t stream); /// Collapse the hash table and sort in-place. Collection sortPostings(); /// Called before a field instance is being processed virtual void start(const FieldablePtr& field); /// Called once per field, and is given all Fieldable occurrences for this field in the document. virtual bool start(Collection fields, int32_t count); void add(int32_t textStart); /// Primary entry point (for first TermsHash) virtual void add(); void writeByte(int32_t stream, int8_t b); void writeBytes(int32_t stream, const uint8_t* b, int32_t offset, int32_t length); void writeVInt(int32_t stream, int32_t i); /// Called once per field per document, after all Fieldable occurrences are inverted virtual void finish(); /// Called when postings hash is too small (> 50% occupied) or too large (< 20% occupied). void rehashPostings(int32_t newSize); protected: void compactPostings(); /// Test whether the text for current RawPostingList p equals current tokenText. bool postingEquals(const wchar_t* tokenText, int32_t tokenTextLen); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TermsHashPerThread.h000066400000000000000000000034561456444476200242320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TERMSHASHPERTHREAD_H #define TERMSHASHPERTHREAD_H #include "InvertedDocConsumerPerThread.h" namespace Lucene { class TermsHashPerThread : public InvertedDocConsumerPerThread { public: TermsHashPerThread(const DocInverterPerThreadPtr& docInverterPerThread, const TermsHashPtr& termsHash, const TermsHashPtr& nextTermsHash, const TermsHashPerThreadPtr& primaryPerThread); virtual ~TermsHashPerThread(); LUCENE_CLASS(TermsHashPerThread); public: DocInverterPerThreadWeakPtr _docInverterPerThread; TermsHashWeakPtr _termsHash; TermsHashPtr nextTermsHash; TermsHashPerThreadWeakPtr _primaryPerThread; TermsHashConsumerPerThreadPtr consumer; TermsHashPerThreadPtr nextPerThread; CharBlockPoolPtr charPool; IntBlockPoolPtr intPool; ByteBlockPoolPtr bytePool; bool primary; DocStatePtr docState; Collection freePostings; int32_t freePostingsCount; public: virtual void initialize(); virtual InvertedDocConsumerPerFieldPtr addField(const DocInverterPerFieldPtr& docInverterPerField, const FieldInfoPtr& fieldInfo); virtual void abort(); /// perField calls this when it needs more postings void morePostings(); virtual void startDocument(); virtual DocWriterPtr finishDocument(); /// Clear all state void reset(bool recyclePostings); protected: static bool noNullPostings(Collection postings, int32_t count, const String& details); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TestPoint.h000066400000000000000000000020671456444476200224630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TESTPOINT_H #define TESTPOINT_H #include "Lucene.h" namespace Lucene { /// Used for unit testing as a substitute for stack trace class LPPAPI TestPoint { public: virtual ~TestPoint(); protected: static MapStringInt testMethods; static bool enable; public: static void enableTestPoints(); static void clear(); static void setTestPoint(const String& object, const String& method, bool point); static bool getTestPoint(const String& object, const String& method); static bool getTestPoint(const String& method); }; class LPPAPI TestScope { public: TestScope(const String& object, const String& method); virtual ~TestScope(); protected: String object; String method; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ThreadPool.h000066400000000000000000000043151456444476200225710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef THREADPOOL_H #define THREADPOOL_H #include #include #include #include "LuceneObject.h" namespace Lucene { typedef boost::shared_ptr workPtr; /// A Future represents the result of an asynchronous computation. Methods are provided to check if the computation /// is complete, to wait for its completion, and to retrieve the result of the computation. The result can only be /// retrieved using method get when the computation has completed, blocking if necessary until it is ready. class Future : public LuceneObject { public: virtual ~Future(); protected: boost::any value; public: void set(const boost::any& value) { SyncLock syncLock(this); this->value = value; } template TYPE get() { SyncLock syncLock(this); while (value.empty()) { wait(10); } return value.empty() ? TYPE() : boost::any_cast(value); } }; /// Utility class to handle a pool of threads. class ThreadPool : public LuceneObject { public: ThreadPool(); virtual ~ThreadPool(); LUCENE_CLASS(ThreadPool); protected: boost::asio::io_service io_service; workPtr work; boost::thread_group threadGroup; static const int32_t THREADPOOL_SIZE; public: /// Get singleton thread pool instance. static ThreadPoolPtr getInstance(); template FuturePtr scheduleTask(FUNC func) { FuturePtr future(newInstance()); io_service.post(boost::bind(&ThreadPool::execute, this, func, future)); return future; } protected: // this will be executed when one of the threads is available template void execute(FUNC func, const FuturePtr& future) { future->set(func()); future->notifyAll(); } }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TimeLimitingCollector.h000066400000000000000000000071341456444476200247740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TIMELIMITINGCOLLECTOR_H #define TIMELIMITINGCOLLECTOR_H #include "Collector.h" namespace Lucene { /// The {@link TimeLimitingCollector} is used to timeout search requests that take longer than the maximum /// allowed search time limit. After this time is exceeded, the search thread is stopped by throwing a /// {@link TimeExceededException}. class LPPAPI TimeLimitingCollector : public Collector { public: /// Create a TimeLimitedCollector wrapper over another {@link Collector} with a specified timeout. /// @param collector the wrapped {@link Collector} /// @param timeAllowed max time allowed for collecting hits after which TimeExceeded exception is thrown TimeLimitingCollector(const CollectorPtr& collector, int64_t timeAllowed); virtual ~TimeLimitingCollector(); LUCENE_CLASS(TimeLimitingCollector); public: /// Default timer resolution. /// @see #setResolution(int64_t) static const int32_t DEFAULT_RESOLUTION; /// Default for {@link #isGreedy()}. /// @see #isGreedy() bool DEFAULT_GREEDY; protected: static int64_t resolution; bool greedy; int64_t t0; int64_t timeout; CollectorPtr collector; int32_t docBase; public: /// Return the timer resolution. /// @see #setResolution(int64_t) static int64_t getResolution(); /// Set the timer resolution. /// The default timer resolution is 20 milliseconds. /// This means that a search required to take no longer than 800 milliseconds may be stopped after /// 780 to 820 milliseconds. Note that: ///
    ///
  • Finer (smaller) resolution is more accurate but less efficient. ///
  • Setting resolution to less than 5 milliseconds will be silently modified to 5 milliseconds. ///
  • Setting resolution smaller than current resolution might take effect only after current resolution. /// (Assume current resolution of 20 milliseconds is modified to 5 milliseconds, then it can take up to 20 /// milliseconds for the change to have effect. ///
static void setResolution(int64_t newResolution); /// Stop timer thread. static void stopTimer(); /// Checks if this time limited collector is greedy in collecting the last hit. A non greedy collector, /// upon a timeout, would throw a TimeExceeded without allowing the wrapped collector to collect current /// doc. A greedy one would first allow the wrapped hit collector to collect current doc and only then /// throw a TimeExceeded exception. /// @see #setGreedy(boolean) bool isGreedy(); /// Sets whether this time limited collector is greedy. /// @param greedy true to make this time limited greedy /// @see #isGreedy() void setGreedy(bool greedy); /// Calls {@link Collector#collect(int)} on the decorated {@link Collector} unless the allowed time has /// passed, in which case it throws an exception. virtual void collect(int32_t doc); virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); virtual void setScorer(const ScorerPtr& scorer); virtual bool acceptsDocsOutOfOrder(); protected: /// Initialize a single static timer thread to be used by all TimeLimitedCollector instances. static TimerThreadPtr TIMER_THREAD(); friend class TimerThread; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Token.h000066400000000000000000000422601456444476200216110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOKEN_H #define TOKEN_H #include "Attribute.h" #include "AttributeSource.h" namespace Lucene { /// A Token is an occurrence of a term from the text of a field. It consists of a term's text, the start and end /// offset of the term in the text of the field and a type string. /// /// The start and end offsets permit applications to re-associate a token with its source text, eg., to display /// highlighted query terms in a document browser, or to show matching text fragments in a /// KWIC display, etc. /// /// The type is a string, assigned by a lexical analyzer (a.k.a. tokenizer), naming the lexical or syntactic class /// that the token belongs to. For example an end of sentence marker token might be implemented with type "eos". /// The default token type is "word". /// /// A Token can optionally have metadata (a.k.a. Payload) in the form of a variable length byte array. Use {@link /// TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)} to retrieve the payloads /// from the index. /// /// Tokenizers and TokenFilters should try to re-use a Token instance when possible for best performance, by implementing /// the {@link TokenStream#incrementToken()} API. Failing that, to create a new Token you should first use one of /// the constructors that starts with null text. To load the token from a char[] use /// {@link #setTermBuffer(char[], int, int)}. To load from a String use {@link #setTermBuffer(String)} or {@link /// #setTermBuffer(String, int, int)}. Alternatively you can get the Token's termBuffer by calling either {@link /// #termBuffer()}, if you know that your text is shorter than the capacity of the termBuffer or {@link /// #resizeTermBuffer(int)}, if there is any possibility that you may need to grow the buffer. Fill in the characters /// of your term into this buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string, /// or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setTermLength(int)} to /// set the length of the term text. /// /// Typical Token reuse patterns: /// /// Copying text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified): ///
/// return reusableToken->reinit(string, startOffset, endOffset[, type]);
/// 
/// /// Copying some text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified): ///
/// return reusableToken->reinit(string, 0, string.length(), startOffset, endOffset[, type]);
/// 
/// /// Copying text from char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified): ///
/// return reusableToken->reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
/// 
/// /// Copying some text from a char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified): ///
/// return reusableToken->reinit(buffer, start, end - start, startOffset, endOffset[, type]);
/// 
/// /// Copying from one one Token to another (type is reset to {@link #DEFAULT_TYPE} if not specified): ///
/// return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
/// 
/// /// A few things to note: /// clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but /// should affect no one. /// Because TokenStreams can be chained, one cannot assume that the Token's current type is correct. The startOffset /// and endOffset represent the start and offset in the source text, so be careful in adjusting them. When caching a /// reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again. /// /// @see Payload class LPPAPI Token : public Attribute { public: /// Constructs a Token will null text. Token(); /// Constructs a Token with null text and start and end offsets. /// @param start start offset in the source text /// @param end end offset in the source text Token(int32_t start, int32_t end); /// Constructs a Token with null text and start and end offsets plus the Token type. /// @param start start offset in the source text /// @param end end offset in the source text /// @param type the lexical type of this Token Token(int32_t start, int32_t end, const String& type); /// Constructs a Token with null text and start and end offsets plus flags. /// @param start start offset in the source text /// @param end end offset in the source text /// @param flags The bits to set for this token Token(int32_t start, int32_t end, int32_t flags); /// Constructs a Token with the given term text, start and end offsets. The type defaults to "word." /// NOTE: for better indexing speed you should instead use the char[] termBuffer methods to set the term text. /// @param text term text /// @param start start offset in the source text /// @param end end offset in the source text Token(const String& text, int32_t start, int32_t end); /// Constructs a Token with the given term text, start and end offsets and type. /// NOTE: for better indexing speed you should instead use the char[] termBuffer methods to set the term text. /// @param text term text /// @param start start offset in the source text /// @param end end offset in the source text /// @param type the lexical type of this Token Token(const String& text, int32_t start, int32_t end, const String& type); /// Constructs a Token with the given term text, start and end offsets and flags. /// NOTE: for better indexing speed you should instead use the char[] termBuffer methods to set the term text. /// @param text term text /// @param start start offset in the source text /// @param end end offset in the source text /// @param flags The bits to set for this token Token(const String& text, int32_t start, int32_t end, int32_t flags); /// Constructs a Token with the given term buffer (offset and length), start and end offsets Token(CharArray startTermBuffer, int32_t termBufferOffset, int32_t termBufferLength, int32_t start, int32_t end); virtual ~Token(); LUCENE_CLASS(Token); public: static const String& DEFAULT_TYPE(); protected: static const int32_t MIN_BUFFER_SIZE; CharArray _termBuffer; int32_t _termLength; int32_t _startOffset; int32_t _endOffset; String _type; int32_t flags; PayloadPtr payload; int32_t positionIncrement; public: /// Set the position increment. This determines the position of this token relative to the previous Token /// in a {@link TokenStream}, used in phrase searching. /// /// The default value is one. /// /// Some common uses for this are: /// /// Set it to zero to put multiple terms in the same position. This is useful if, eg., a word has multiple /// stems. Searches for phrases including either stem will match. In this case, all but the first stem's /// increment should be set to zero: the increment of the first instance should be one. Repeating a token /// with an increment of zero can also be used to boost the scores of matches on that token. /// /// Set it to values greater than one to inhibit exact phrase matches. If, for example, one does not want /// phrases to match across removed stop words, then one could build a stop word filter that removes stop /// words and also sets the increment to the number of stop words removed before each non-stop word. Then /// exact phrase queries will only match when the terms occur with no intervening stop words. /// /// @param positionIncrement the distance from the prior term /// @see TermPositions virtual void setPositionIncrement(int32_t positionIncrement); /// Returns the position increment of this Token. /// @see #setPositionIncrement virtual int32_t getPositionIncrement(); /// Returns the Token's term text. /// /// This method has a performance penalty because the text is stored internally in a char[]. If possible, /// use {@link #termBuffer()} and {@link #termLength()} directly instead. If you really need a String, use /// this method, which is nothing more than a convenience call to String(token->termBuffer(), token->termLength()) virtual String term(); /// Copies the contents of buffer, starting at offset for length characters, into the termBuffer array. /// @param buffer the buffer to copy /// @param offset the index in the buffer of the first character to copy /// @param length the number of characters to copy virtual void setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length); /// Copies the contents of buffer into the termBuffer array. /// @param buffer the buffer to copy virtual void setTermBuffer(const String& buffer); /// Copies the contents of buffer, starting at offset and continuing for length characters, into the termBuffer array. /// @param buffer the buffer to copy /// @param offset the index in the buffer of the first character to copy /// @param length the number of characters to copy virtual void setTermBuffer(const String& buffer, int32_t offset, int32_t length); /// Returns the internal termBuffer character array which you can then directly alter. If the array is too /// small for your token, use {@link #resizeTermBuffer(int)} to increase it. After altering the buffer be sure /// to call {@link #setTermLength} to record the number of valid characters that were placed into the termBuffer. virtual CharArray termBuffer(); /// Optimized implementation of termBuffer. virtual wchar_t* termBufferArray(); /// Grows the termBuffer to at least size newSize, preserving the existing content. Note: If the next operation is /// to change the contents of the term buffer use {@link #setTermBuffer(char[], int, int)}, {@link /// #setTermBuffer(String)}, or {@link #setTermBuffer(String, int, int)} to optimally combine the resize with the /// setting of the termBuffer. /// @param newSize minimum size of the new termBuffer /// @return newly created termBuffer with length >= newSize virtual CharArray resizeTermBuffer(int32_t newSize); /// Return number of valid characters (length of the term) in the termBuffer array. virtual int32_t termLength(); /// Set number of valid characters (length of the term) in the termBuffer array. Use this to truncate the termBuffer /// or to synchronize with external manipulation of the termBuffer. Note: to grow the size of the array, use {@link /// #resizeTermBuffer(int)} first. /// @param length the truncated length virtual void setTermLength(int32_t length); /// Returns this Token's starting offset, the position of the first character corresponding to this token in the /// source text. /// /// Note that the difference between endOffset() and startOffset() may not be equal to {@link #termLength}, as the /// term text may have been altered by a stemmer or some other filter. virtual int32_t startOffset(); /// Set the starting offset. /// @see #startOffset() virtual void setStartOffset(int32_t offset); /// Returns this Token's ending offset, one greater than the position of the last character corresponding to this /// token in the source text. The length of the token in the source text is (endOffset - startOffset). virtual int32_t endOffset(); /// Set the ending offset. /// @see #endOffset() virtual void setEndOffset(int32_t offset); /// Set the starting and ending offset. /// @see #startOffset() and #endOffset() virtual void setOffset(int32_t startOffset, int32_t endOffset); /// Returns this Token's lexical type. Defaults to "word". virtual String type(); /// Set the lexical type. /// @see #type() virtual void setType(const String& type); /// Get the bitset for any bits that have been set. This is completely distinct from {@link #type()}, although /// they do share similar purposes. The flags can be used to encode information about the token for use by other /// {@link TokenFilter}s. /// /// @return The bits virtual int32_t getFlags(); /// @see #getFlags() virtual void setFlags(int32_t flags); /// Returns this Token's payload. virtual PayloadPtr getPayload(); /// Sets this Token's payload. virtual void setPayload(const PayloadPtr& payload); virtual String toString(); /// Resets the term text, payload, flags, and positionIncrement, startOffset, endOffset and token type to default. virtual void clear(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); /// Makes a clone, but replaces the term buffer and start/end offset in the process. This is more efficient than /// doing a full clone (and then calling setTermBuffer) because it saves a wasted copy of the old termBuffer. TokenPtr clone(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(char[], int, int)}, {@link #setStartOffset}, /// {@link #setEndOffset}, {@link #setType} /// @return this Token instance TokenPtr reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType); /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(char[], int, int)}, {@link #setStartOffset}, /// {@link #setEndOffset}, {@link #setType} on Token::DEFAULT_TYPE /// @return this Token instance TokenPtr reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset); /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(String)}, {@link #setStartOffset}, /// {@link #setEndOffset}, {@link #setType} /// @return this Token instance TokenPtr reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset, const String& newType); /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(String)}, {@link #setStartOffset}, /// {@link #setEndOffset}, {@link #setType} /// @return this Token instance TokenPtr reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType); /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(String)}, {@link #setStartOffset}, /// {@link #setEndOffset}, {@link #setType} on Token::DEFAULT_TYPE /// @return this Token instance TokenPtr reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset); /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(String, int, int)}, {@link #setStartOffset}, /// {@link #setEndOffset}, {@link #setType} on Token::DEFAULT_TYPE /// @return this Token instance TokenPtr reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset); /// Copy the prototype token's fields into this one. Note: Payloads are shared. void reinit(const TokenPtr& prototype); /// Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. void reinit(const TokenPtr& prototype, const String& newTerm); /// Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. void reinit(const TokenPtr& prototype, CharArray newTermBuffer, int32_t offset, int32_t length); virtual void copyTo(const AttributePtr& target); /// Convenience factory that returns Token as implementation for the basic attributes static AttributeFactoryPtr TOKEN_ATTRIBUTE_FACTORY(); protected: /// Construct Token and initialize values void ConstructToken(int32_t start, int32_t end, const String& type, int32_t flags); /// Allocates a buffer char[] of at least newSize, without preserving the existing content. Its always used in /// places that set the content. /// @param newSize minimum size of the buffer void growTermBuffer(int32_t newSize); void initTermBuffer(); /// Like clear() but doesn't clear termBuffer/text void clearNoTermBuffer(); }; /// Creates a TokenAttributeFactory returning {@link Token} as instance for the basic attributes and for all other /// attributes calls the given delegate factory. class LPPAPI TokenAttributeFactory : public AttributeFactory { public: TokenAttributeFactory(const AttributeFactoryPtr& delegate); virtual ~TokenAttributeFactory(); LUCENE_CLASS(TokenAttributeFactory); protected: AttributeFactoryPtr delegate; public: virtual AttributePtr createAttributeInstance(const String& className); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TokenFilter.h000066400000000000000000000024321456444476200227540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOKENFILTER_H #define TOKENFILTER_H #include "TokenStream.h" namespace Lucene { /// A TokenFilter is a TokenStream whose input is another TokenStream. /// /// This is an abstract class; subclasses must override {@link #incrementToken()}. /// @see TokenStream class LPPAPI TokenFilter : public TokenStream { protected: /// Construct a token stream filtering the given input. TokenFilter(const TokenStreamPtr& input); public: virtual ~TokenFilter(); LUCENE_CLASS(TokenFilter); protected: /// The source of tokens for this filter. TokenStreamPtr input; public: /// Performs end-of-stream operations, if any, and calls then end() on the input TokenStream. /// NOTE: Be sure to call TokenFilter::end() first when overriding this method. virtual void end(); /// Close the input TokenStream. virtual void close(); /// Reset the filter as well as the input TokenStream. virtual void reset(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TokenStream.h000066400000000000000000000132301456444476200227600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOKENSTREAM_H #define TOKENSTREAM_H #include "AttributeSource.h" namespace Lucene { /// A TokenStream enumerates the sequence of tokens, either from {@link Field}s of a {@link Document} or from /// query text. /// /// This is an abstract class; concrete subclasses are: {@link Tokenizer}, a TokenStream whose input is a Reader; /// and {@link TokenFilter}, a TokenStream whose input is another TokenStream. /// /// A new TokenStream API has been introduced with Lucene 2.9. This API has moved from being {@link Token}-based /// to {@link Attribute}-based. While {@link Token} still exists in 2.9 as a convenience class, the preferred way /// to store the information of a {@link Token} is to use {@link Attribute}s. /// /// TokenStream now extends {@link AttributeSource}, which provides access to all of the token {@link Attribute}s /// for the TokenStream. Note that only one instance per {@link Attribute} is created and reused for every /// token. This approach reduces object creation and allows local caching of references to the {@link Attribute}s. /// See {@link #incrementToken()} for further details. /// /// The workflow of the new TokenStream API is as follows: /// - Instantiation of TokenStream/{@link TokenFilter}s which add/get attributes to/from the {@link AttributeSource}. /// - The consumer calls {@link TokenStream#reset()}. /// - The consumer retrieves attributes from the stream and stores local references to all attributes it wants to access. /// - The consumer calls {@link #incrementToken()} until it returns false consuming the attributes after each call. /// - The consumer calls {@link #end()} so that any end-of-stream operations can be performed. /// - The consumer calls {@link #close()} to release any resource when finished using the TokenStream. /// /// To make sure that filters and consumers know which attributes are available, the attributes must be added during /// instantiation. Filters and consumers are not required to check for availability of attributes in {@link /// #incrementToken()}. /// /// Sometimes it is desirable to capture a current state of a TokenStream, eg., for buffering purposes (see {@link /// CachingTokenFilter}, {@link TeeSinkTokenFilter}). For this use case {@link AttributeSource#captureState} and {@link /// AttributeSource#restoreState} can be used. class LPPAPI TokenStream : public AttributeSource { protected: /// A TokenStream using the default attribute factory. TokenStream(); /// A TokenStream that uses the same attributes as the supplied one. TokenStream(const AttributeSourcePtr& input); /// A TokenStream using the supplied AttributeFactory for creating new {@link Attribute} instances. TokenStream(const AttributeFactoryPtr& factory); public: virtual ~TokenStream(); LUCENE_CLASS(TokenStream); public: /// Consumers (ie., {@link IndexWriter}) use this method to advance the stream to the next token. Implementing /// classes must implement this method and update the appropriate {@link Attribute}s with the attributes of /// the next token. /// /// The producer must make no assumptions about the attributes after the method has been returned: the caller may /// arbitrarily change it. If the producer needs to preserve the state for subsequent calls, it can use {@link /// #captureState} to create a copy of the current attribute state. /// /// This method is called for every token of a document, so an efficient implementation is crucial for good /// performance. To avoid calls to {@link #addAttribute(Class)} and {@link #getAttribute(Class)}, references to /// all {@link Attribute}s that this stream uses should be retrieved during instantiation. /// /// To ensure that filters and consumers know which attributes are available, the attributes must be added during /// instantiation. Filters and consumers are not required to check for availability of attributes in {@link /// #incrementToken()}. /// /// @return false for end of stream; true otherwise virtual bool incrementToken() = 0; /// This method is called by the consumer after the last token has been consumed, after {@link #incrementToken()} /// returned false (using the new TokenStream API). Streams implementing the old API should upgrade to use this /// feature. /// /// This method can be used to perform any end-of-stream operations, such as setting the final offset of a stream. /// The final offset of a stream might differ from the offset of the last token eg in case one or more whitespaces /// followed after the last token, but a {@link WhitespaceTokenizer} was used. virtual void end(); /// Resets this stream to the beginning. This is an optional operation, so subclasses may or may not implement /// this method. {@link #reset()} is not needed for the standard indexing process. However, if the tokens of a /// TokenStream are intended to be consumed more than once, it is necessary to implement {@link #reset()}. Note that /// if your TokenStream caches tokens and feeds them back again after a reset, it is imperative that you clone the /// tokens when you store them away (on the first pass) as well as when you return them (on future passes after /// {@link #reset()}). virtual void reset(); /// Releases resources associated with this stream. virtual void close(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Tokenizer.h000066400000000000000000000046061456444476200225050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOKENIZER_H #define TOKENIZER_H #include "TokenStream.h" namespace Lucene { /// A Tokenizer is a TokenStream whose input is a Reader. /// /// This is an abstract class; subclasses must override {@link #incrementToken()} /// /// Note: Subclasses overriding {@link #incrementToken()} must call {@link AttributeSource#clearAttributes()} /// before setting attributes. class LPPAPI Tokenizer : public TokenStream { protected: /// Construct a tokenizer with null input. Tokenizer(); /// Construct a token stream processing the given input. Tokenizer(const ReaderPtr& input); /// Construct a tokenizer with null input using the given AttributeFactory. Tokenizer(const AttributeFactoryPtr& factory); /// Construct a token stream processing the given input using the given AttributeFactory. Tokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); /// Construct a token stream processing the given input using the given AttributeSource. Tokenizer(const AttributeSourcePtr& source); /// Construct a token stream processing the given input using the given AttributeSource. Tokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); public: virtual ~Tokenizer(); LUCENE_CLASS(Tokenizer); protected: /// The text source for this Tokenizer. ReaderPtr input; CharStreamPtr charStream; public: /// By default, closes the input Reader. virtual void close(); /// Return the corrected offset. If {@link #input} is a {@link CharStream} subclass this method calls /// {@link CharStream#correctOffset}, else returns currentOff. /// @param currentOff offset as seen in the output /// @return corrected offset based on the input /// @see CharStream#correctOffset virtual int32_t correctOffset(int32_t currentOff); using TokenStream::reset; /// Reset the tokenizer to a new reader. Typically, an analyzer (in its reusableTokenStream method) will /// use this to re-use a previously created tokenizer. virtual void reset(const ReaderPtr& input); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TopDocs.h000066400000000000000000000026261456444476200221060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOPDOCS_H #define TOPDOCS_H #include "LuceneObject.h" namespace Lucene { /// Represents hits returned by {@link Searcher#search(QueryPtr, FilterPtr, int32_t)} and {@link /// Searcher#search(QueryPtr, int32_t)}. class LPPAPI TopDocs : public LuceneObject { public: /// Constructs a TopDocs with a default maxScore = double.NaN. TopDocs(int32_t totalHits, Collection scoreDocs); /// Constructs a TopDocs. TopDocs(int32_t totalHits, Collection scoreDocs, double maxScore); virtual ~TopDocs(); LUCENE_CLASS(TopDocs); public: /// The total number of hits for the query. int32_t totalHits; /// The top hits for the query. Collection scoreDocs; /// Stores the maximum score value encountered, needed for normalizing. double maxScore; public: /// Returns the maximum score value encountered. Note that in case scores are not tracked, /// this returns NaN. double getMaxScore(); /// Sets the maximum score value encountered. void setMaxScore(double maxScore); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TopDocsCollector.h000066400000000000000000000100161456444476200237450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOPDOCSCOLLECTOR_H #define TOPDOCSCOLLECTOR_H #include "Collector.h" #include "PriorityQueue.h" namespace Lucene { /// A base class for all collectors that return a {@link TopDocs} output. This collector allows easy extension /// by providing a single constructor which accepts a {@link PriorityQueue} as well as protected members for /// that priority queue and a counter of the number of total hits. /// /// Extending classes can override {@link #topDocs(int32_t, int32_t)} and {@link #getTotalHits()} in order to /// provide their own implementation. class LPPAPI TopDocsCollector : public Collector { public: TopDocsCollector(const HitQueueBasePtr& pq); virtual ~TopDocsCollector(); LUCENE_CLASS(TopDocsCollector); protected: /// The priority queue which holds the top documents. Note that different implementations of PriorityQueue /// give different meaning to 'top documents'. HitQueue for example aggregates the top scoring documents, /// while other PQ implementations may hold documents sorted by other criteria. HitQueueBasePtr pq; /// The total number of documents that the collector encountered. int32_t totalHits; public: /// The total number of documents that matched this query. virtual int32_t getTotalHits(); /// Returns the top docs that were collected by this collector. virtual TopDocsPtr topDocs(); /// Returns the documents in the range [start .. pq.size()) that were collected by this collector. Note that /// if start >= pq.size(), an empty TopDocs is returned. /// /// This method is convenient to call if the application always asks for the last results, starting from the /// last 'page'. /// /// NOTE: you cannot call this method more than once for each search execution. If you need to call it more /// than once, passing each time a different start, you should call {@link #topDocs()} and work with the /// returned {@link TopDocs} object, which will contain all the results this search execution collected. virtual TopDocsPtr topDocs(int32_t start); /// Returns the documents in the rage [start .. start + howMany) that were collected by this collector. Note /// that if start >= pq.size(), an empty TopDocs is returned, and if pq.size() - start < howMany, then only /// the available documents in [start .. pq.size()) are returned. /// /// This method is useful to call in case pagination of search results is allowed by the search application, /// as well as it attempts to optimize the memory used by allocating only as much as requested by howMany. /// /// NOTE: you cannot call this method more than once for each search execution. If you need to call it more /// than once, passing each time a different range, you should call {@link #topDocs()} and work with the /// returned {@link TopDocs} object, which will contain all the results this search execution collected. virtual TopDocsPtr topDocs(int32_t start, int32_t howMany); protected: /// This is used in case topDocs() is called with illegal parameters, or there simply aren't (enough) results. static TopDocsPtr EMPTY_TOPDOCS(); /// Populates the results array with the ScoreDoc instances. This can be overridden in case a different /// ScoreDoc type should be returned. virtual void populateResults(Collection results, int32_t howMany); /// Returns a {@link TopDocs} instance containing the given results. If results is null it means there are /// no results to return, either because there were 0 calls to collect() or because the arguments to topDocs /// were invalid. virtual TopDocsPtr newTopDocs(Collection results, int32_t start); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TopFieldCollector.h000066400000000000000000000064161456444476200241110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOPFIELDCOLLECTOR_H #define TOPFIELDCOLLECTOR_H #include "TopDocsCollector.h" namespace Lucene { /// A {@link Collector} that sorts by {@link SortField} using {@link FieldComparator}s. /// /// See the {@link #create(SortPtr, int32_t, bool, bool, bool, bool)} method for instantiating a TopFieldCollector. class LPPAPI TopFieldCollector : public TopDocsCollector { public: TopFieldCollector(const HitQueueBasePtr& pq, int32_t numHits, bool fillFields); virtual ~TopFieldCollector(); LUCENE_CLASS(TopFieldCollector); protected: bool fillFields; /// Stores the maximum score value encountered, needed for normalizing. If document scores are not tracked, /// this value is initialized to NaN. double maxScore; int32_t numHits; FieldValueHitQueueEntryPtr bottom; bool queueFull; int32_t docBase; public: /// Creates a new {@link TopFieldCollector} from the given arguments. /// /// NOTE: The instances returned by this method pre-allocate a full array of length numHits. /// /// @param sort The sort criteria (SortFields). /// @param numHits The number of results to collect. /// @param fillFields Specifies whether the actual field values should be returned on the results (FieldDoc). /// @param trackDocScores Specifies whether document scores should be tracked and set on the results. Note /// that if set to false, then the results' scores will be set to NaN. Setting this to true affects /// performance, as it incurs the score computation on each competitive result. Therefore if document scores /// are not required by the application, it is recommended to set it to false. /// @param trackMaxScore Specifies whether the query's maxScore should be tracked and set on the resulting /// {@link TopDocs}. Note that if set to false, {@link TopDocs#getMaxScore()} returns NaN. Setting this to /// true affects performance as it incurs the score computation on each result. Also, setting this true /// automatically sets trackDocScores to true as well. /// @param docsScoredInOrder Specifies whether documents are scored in doc Id order or not by the given /// {@link Scorer} in {@link #setScorer(ScorerPtr)}. /// @return a {@link TopFieldCollector} instance which will sort the results by the sort criteria. static TopFieldCollectorPtr create(const SortPtr& sort, int32_t numHits, bool fillFields, bool trackDocScores, bool trackMaxScore, bool docsScoredInOrder); virtual void add(int32_t slot, int32_t doc, double score); virtual bool acceptsDocsOutOfOrder(); protected: static const Collection EMPTY_SCOREDOCS(); /// Only the following callback methods need to be overridden since topDocs(int32_t, int32_t) calls them to /// return the results. virtual void populateResults(Collection results, int32_t howMany); virtual TopDocsPtr newTopDocs(Collection results, int32_t start); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TopFieldDocs.h000066400000000000000000000021311456444476200230410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOPFIELDDOCS_H #define TOPFIELDDOCS_H #include "TopDocs.h" namespace Lucene { /// Represents hits returned by {@link Searcher#search(QueryPtr, FilterPtr, int32_t, SortPtr)}. class LPPAPI TopFieldDocs : public TopDocs { public: /// @param totalHits Total number of hits for the query. /// @param scoreDocs The top hits for the query. /// @param fields The sort criteria used to find the top hits. /// @param maxScore The maximum score encountered. TopFieldDocs(int32_t totalHits, Collection scoreDocs, Collection fields, double maxScore); virtual ~TopFieldDocs(); LUCENE_CLASS(TopFieldDocs); public: /// The fields which were used to sort results by. Collection fields; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TopScoreDocCollector.h000066400000000000000000000037001456444476200245600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOPSCOREDOCCOLLECTOR_H #define TOPSCOREDOCCOLLECTOR_H #include "TopDocsCollector.h" namespace Lucene { /// A {@link Collector} implementation that collects the top-scoring hits, returning them as a {@link TopDocs}. /// This is used by {@link IndexSearcher} to implement {@link TopDocs}-based search. Hits are sorted by score /// descending and then (when the scores are tied) docID ascending. When you create an instance of this /// collector you should know in advance whether documents are going to be collected in doc Id order or not. /// /// NOTE: The values Nan, NEGATIVE_INFINITY and POSITIVE_INFINITY are not valid scores. This collector will /// not properly collect hits with such scores. class LPPAPI TopScoreDocCollector : public TopDocsCollector { public: TopScoreDocCollector(int32_t numHits); virtual ~TopScoreDocCollector(); LUCENE_CLASS(TopScoreDocCollector); INTERNAL: ScoreDocPtr pqTop; int32_t docBase; ScorerWeakPtr _scorer; Scorer* __scorer; public: /// Creates a new {@link TopScoreDocCollector} given the number of hits to collect and whether documents /// are scored in order by the input {@link Scorer} to {@link #setScorer(ScorerPtr)}. /// /// NOTE: The instances returned by this method pre-allocate a full array of length numHits. static TopScoreDocCollectorPtr create(int32_t numHits, bool docsScoredInOrder); virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); virtual void setScorer(const ScorerPtr& scorer); protected: virtual TopDocsPtr newTopDocs(Collection results, int32_t start); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/TypeAttribute.h000066400000000000000000000023041456444476200233310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TYPEATTRIBUTE_H #define TYPEATTRIBUTE_H #include "Attribute.h" namespace Lucene { /// A Token's lexical type. The Default value is "word". class LPPAPI TypeAttribute : public Attribute { public: TypeAttribute(); TypeAttribute(const String& type); virtual ~TypeAttribute(); LUCENE_CLASS(TypeAttribute); protected: String _type; static const String& DEFAULT_TYPE(); public: virtual String toString(); /// Returns this Token's lexical type. Defaults to "word". String type(); /// Set the lexical type. /// @see #type() void setType(const String& type); virtual void clear(); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual void copyTo(const AttributePtr& target); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/UTF8Stream.h000066400000000000000000000065641456444476200224420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef UTF8STREAM_H #define UTF8STREAM_H #include "LuceneObject.h" namespace Lucene { class LPPAPI UTF8Base : public LuceneObject { public: virtual ~UTF8Base(); LUCENE_CLASS(UTF8Base); public: static const uint16_t LEAD_SURROGATE_MIN; static const uint16_t LEAD_SURROGATE_MAX; static const uint16_t TRAIL_SURROGATE_MIN; static const uint16_t TRAIL_SURROGATE_MAX; static const uint16_t LEAD_OFFSET; static const uint32_t SURROGATE_OFFSET; static const uint32_t CODE_POINT_MAX; static const wchar_t UNICODE_REPLACEMENT_CHAR; static const wchar_t UNICODE_TERMINATOR; protected: virtual uint32_t readNext() = 0; uint8_t mask8(uint32_t b); uint16_t mask16(uint32_t c); bool isTrail(uint32_t b); bool isSurrogate(uint32_t cp); bool isLeadSurrogate(uint32_t cp); bool isTrailSurrogate(uint32_t cp); bool isValidCodePoint(uint32_t cp); bool isOverlongSequence(uint32_t cp, int32_t length); }; class UTF8Encoder : public UTF8Base { public: UTF8Encoder(const wchar_t* unicodeBegin, const wchar_t* unicodeEnd); virtual ~UTF8Encoder(); LUCENE_CLASS(UTF8Encoder); protected: const wchar_t* unicodeBegin; const wchar_t* unicodeEnd; public: int32_t encode(uint8_t* utf8, int32_t length); int32_t utf16to8(uint8_t* utf8, int32_t length); int32_t utf32to8(uint8_t* utf8, int32_t length); protected: virtual uint32_t readNext(); uint8_t* appendChar(uint8_t* utf8, uint32_t cp); }; class UTF8EncoderStream : public UTF8Encoder { public: UTF8EncoderStream(const ReaderPtr& reader); virtual ~UTF8EncoderStream(); LUCENE_CLASS(UTF8EncoderStream); protected: ReaderPtr reader; protected: virtual uint32_t readNext(); }; class UTF8Decoder : public UTF8Base { public: UTF8Decoder(const uint8_t* utf8Begin, const uint8_t* utf8End); virtual ~UTF8Decoder(); LUCENE_CLASS(UTF8Decoder); protected: const uint8_t* utf8Begin; const uint8_t* utf8End; public: int32_t decode(wchar_t* unicode, int32_t length); int32_t utf8to16(wchar_t* unicode, int32_t length); int32_t utf8to32(wchar_t* unicode, int32_t length); protected: virtual uint32_t readNext(); int32_t sequenceLength(uint32_t cp); bool getSequence(uint32_t& cp, int32_t length); bool isValidNext(uint32_t& cp); }; class UTF8DecoderStream : public UTF8Decoder { public: UTF8DecoderStream(const ReaderPtr& reader); virtual ~UTF8DecoderStream(); LUCENE_CLASS(UTF8DecoderStream); protected: ReaderPtr reader; protected: virtual uint32_t readNext(); }; class UTF16Decoder : public UTF8Base { public: UTF16Decoder(const uint16_t* utf16Begin, const uint16_t* utf16End); virtual ~UTF16Decoder(); LUCENE_CLASS(UTF16Decoder); protected: const uint16_t* utf16Begin; const uint16_t* utf16End; public: int32_t decode(wchar_t* unicode, int32_t length); int32_t utf16to16(wchar_t* unicode, int32_t length); int32_t utf16to32(wchar_t* unicode, int32_t length); protected: virtual uint32_t readNext(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/UnicodeUtils.h000066400000000000000000000050221456444476200231330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef UNICODEUTILS_H #define UNICODEUTILS_H #include "LuceneObject.h" namespace Lucene { class LPPAPI UnicodeUtil { public: virtual ~UnicodeUtil(); public: /// Return true if supplied character is alpha-numeric. static bool isAlnum(wchar_t c); /// Return true if supplied character is alphabetic. static bool isAlpha(wchar_t c); /// Return true if supplied character is numeric. static bool isDigit(wchar_t c); /// Return true if supplied character is a space. static bool isSpace(wchar_t c); /// Return true if supplied character is uppercase. static bool isUpper(wchar_t c); /// Return true if supplied character is lowercase. static bool isLower(wchar_t c); /// Return true if supplied character is other type of letter. static bool isOther(wchar_t c); /// Return true if supplied character is non-spacing. static bool isNonSpacing(wchar_t c); /// Return uppercase representation of a given character. static wchar_t toUpper(wchar_t c); /// Return lowercase representation of a given character. static wchar_t toLower(wchar_t c); }; /// Utility class that contains utf8 and unicode translations. template class TranslationResult : public LuceneObject { public: TranslationResult() { result = Array::newInstance(10); length = 0; } public: Array result; int32_t length; public: void setLength(int32_t length) { if (!result) { result = Array::newInstance((int32_t)(1.5 * (double)length)); } if (result.size() < length) { result.resize((int32_t)(1.5 * (double)length)); } this->length = length; } void copyText(const TranslationResult& other) { setLength(other.length); MiscUtils::arrayCopy(other.result.get(), 0, result.get(), 0, other.length); } void copyText(boost::shared_ptr< TranslationResult > other) { copyText(*other); } }; class LPPAPI UTF8Result : public TranslationResult { public: virtual ~UTF8Result(); }; class LPPAPI UnicodeResult : public TranslationResult { public: virtual ~UnicodeResult(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ValueSource.h000066400000000000000000000032211456444476200227600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef VALUESOURCE_H #define VALUESOURCE_H #include "LuceneObject.h" namespace Lucene { /// Source of values for basic function queries. /// /// At its default/simplest form, values - one per doc - are used as the score of that doc. /// /// Values are instantiated as {@link DocValues} for a particular reader. /// ValueSource implementations differ in RAM requirements: it would always be a factor of the number of /// documents, but for each document the number of bytes can be 1, 2, 4, or 8. class LPPAPI ValueSource : public LuceneObject { public: virtual ~ValueSource(); LUCENE_CLASS(ValueSource); public: /// Return the DocValues used by the function query. /// @param reader The IndexReader used to read these values. If any caching is involved, that caching /// would also be IndexReader based. virtual DocValuesPtr getValues(const IndexReaderPtr& reader) = 0; /// Description of field, used in explain() virtual String description() = 0; virtual String toString(); /// Needed for possible caching of query results - used by {@link ValueSourceQuery#equals(LuceneObjectPtr)}. virtual bool equals(const LuceneObjectPtr& other) = 0; /// Needed for possible caching of query results - used by {@link ValueSourceQuery#hashCode()}. virtual int32_t hashCode() = 0; }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/ValueSourceQuery.h000066400000000000000000000032121456444476200240060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef VALUESOURCEQUERY_H #define VALUESOURCEQUERY_H #include "Query.h" namespace Lucene { /// A Query that sets the scores of document to the values obtained from a {@link ValueSource}. /// /// This query provides a score for each and every undeleted document in the index. /// /// The value source can be based on a (cached) value of an indexed field, but it can also be based on an /// external source, eg. values read from an external database. /// /// Score is set as: Score(doc,query) = (query.getBoost() * query.getBoost()) * valueSource(doc). class LPPAPI ValueSourceQuery : public Query { public: /// Create a value source query /// @param valSrc provides the values defines the function to be used for scoring ValueSourceQuery(const ValueSourcePtr& valSrc); virtual ~ValueSourceQuery(); LUCENE_CLASS(ValueSourceQuery); public: ValueSourcePtr valSrc; public: using Query::toString; virtual QueryPtr rewrite(const IndexReaderPtr& reader); virtual void extractTerms(SetTerm terms); virtual WeightPtr createWeight(const SearcherPtr& searcher); virtual String toString(const String& field); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/VariantUtils.h000066400000000000000000000061271456444476200231600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef VARIANTUTILS_H #define VARIANTUTILS_H #include #include #include "Lucene.h" #include "MiscUtils.h" namespace Lucene { class LPPAPI VariantUtils { public: template static TYPE get(const boost::any& var) { return var.type() == typeid(TYPE) ? boost::any_cast(var) : TYPE(); } template static TYPE get(VAR var) { #if BOOST_VERSION < 105800 return var.type() == typeid(TYPE) ? boost::get(var) : TYPE(); #else return var.type() == typeid(TYPE) ? boost::relaxed_get(var) : TYPE(); #endif } template static bool typeOf(VAR var) { return (var.type() == typeid(TYPE)); } static VariantNull null() { return VariantNull(); } static bool isNull(const boost::any& var) { return var.empty(); } template static bool isNull(VAR var) { return typeOf(var); } template static int32_t hashCode(VAR var) { if (typeOf(var)) { return StringUtils::hashCode(get(var)); } if (typeOf(var)) { return get(var); } if (typeOf(var)) { return (int32_t)get(var); } if (typeOf(var)) { int64_t longBits = MiscUtils::doubleToLongBits(get(var)); return (int32_t)(longBits ^ (longBits >> 32)); } if (typeOf< Collection >(var)) { return get< Collection >(var).hashCode(); } if (typeOf< Collection >(var)) { return get< Collection >(var).hashCode(); } if (typeOf< Collection >(var)) { return get< Collection >(var).hashCode(); } if (typeOf< Collection >(var)) { return get< Collection >(var).hashCode(); } if (typeOf< Collection >(var)) { return get< Collection >(var).hashCode(); } if (typeOf(var)) { return get(var)->hashCode(); } return 0; } template static bool equalsType(FIRST first, SECOND second) { return (first.type() == second.type()); } template static bool equals(FIRST first, SECOND second) { return first.type() == second.type() ? (first == second) : false; } template static int32_t compareTo(VAR first, VAR second) { return first < second ? -1 : (first == second ? 0 : 1); } }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/Weight.h000066400000000000000000000077431456444476200217670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WEIGHT_H #define WEIGHT_H #include "LuceneObject.h" namespace Lucene { /// Calculate query weights and build query scorers. /// /// The purpose of {@link Weight} is to ensure searching does not modify a {@link Query}, so that a /// {@link Query} instance can be reused. /// {@link Searcher} dependent state of the query should reside in the {@link Weight}. /// {@link IndexReader} dependent state should reside in the {@link Scorer}. /// /// Weight is used in the following way: ///
    ///
  1. A Weight is constructed by a top-level query, given a Searcher ({@link Query#createWeight(Searcher)}). ///
  2. The {@link #sumOfSquaredWeights()} method is called on the Weight to compute the query normalization /// factor {@link Similarity#queryNorm(float)} of the query clauses contained in the query. ///
  3. The query normalization factor is passed to {@link #normalize(float)}. At this point the weighting is /// complete. ///
  4. A Scorer is constructed by {@link #scorer(IndexReaderPtr, bool, bool)}. ///
class LPPAPI Weight : public LuceneObject { public: virtual ~Weight(); LUCENE_CLASS(Weight); public: /// An explanation of the score computation for the named document. /// @param reader sub-reader containing the give doc /// @param doc /// @return an Explanation for the score virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc) = 0; /// The query that this concerns. virtual QueryPtr getQuery() = 0; /// The weight for this query. virtual double getValue() = 0; /// Assigns the query normalization factor to this. virtual void normalize(double norm) = 0; /// Returns a {@link Scorer} which scores documents in/out-of order according to scoreDocsInOrder. /// /// NOTE: even if scoreDocsInOrder is false, it is recommended to check whether the returned Scorer /// indeed scores documents out of order (ie., call {@link #scoresDocsOutOfOrder()}), as some Scorer /// implementations will always return documents in-order. /// /// NOTE: null can be returned if no documents will be scored by this query. /// /// @param reader The {@link IndexReader} for which to return the {@link Scorer}. /// @param scoreDocsInOrder Specifies whether in-order scoring of documents is required. Note that if /// set to false (i.e., out-of-order scoring is required), this method can return whatever scoring mode /// it supports, as every in-order scorer is also an out-of-order one. However, an out-of-order scorer /// may not support {@link Scorer#nextDoc()} and/or {@link Scorer#advance(int)}, therefore it is /// recommended to request an in-order scorer if use of these methods is required. /// @param topScorer If true, {@link Scorer#score(CollectorPtr)} will be called; if false, {@link /// Scorer#nextDoc()} and/or {@link Scorer#advance(int)} will be called. /// @return a {@link Scorer} which scores documents in/out-of order. virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) = 0; /// The sum of squared weights of contained query clauses. virtual double sumOfSquaredWeights() = 0; /// Returns true if this implementation scores docs only out of order. This method is used in conjunction /// with {@link Collector}'s {@link Collector#acceptsDocsOutOfOrder() acceptsDocsOutOfOrder} and /// {@link #scorer(IndexReaderPtr, bool, bool)} to create a matching {@link Scorer} instance for a given /// {@link Collector}, or vice versa. /// /// NOTE: the default implementation returns false, ie. the Scorer scores documents in-order. virtual bool scoresDocsOutOfOrder(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/WhitespaceAnalyzer.h000066400000000000000000000015111456444476200243250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WHITESPACEANALYZER_H #define WHITESPACEANALYZER_H #include "Analyzer.h" namespace Lucene { /// An Analyzer that uses {@link WhitespaceTokenizer}. class LPPAPI WhitespaceAnalyzer : public Analyzer { public: virtual ~WhitespaceAnalyzer(); LUCENE_CLASS(WhitespaceAnalyzer); public: virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/WhitespaceTokenizer.h000066400000000000000000000024661456444476200245240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WHITESPACETOKENIZER_H #define WHITESPACETOKENIZER_H #include "CharTokenizer.h" namespace Lucene { /// A WhitespaceTokenizer is a tokenizer that divides text at whitespace. Adjacent sequences of non-Whitespace /// characters form tokens. class LPPAPI WhitespaceTokenizer : public CharTokenizer { public: /// Construct a new WhitespaceTokenizer. WhitespaceTokenizer(const ReaderPtr& input); /// Construct a new WhitespaceTokenizer using a given {@link AttributeSource}. WhitespaceTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); /// Construct a new WhitespaceTokenizer using a given {@link AttributeSource.AttributeFactory}. WhitespaceTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); virtual ~WhitespaceTokenizer(); LUCENE_CLASS(WhitespaceTokenizer); public: /// Collects only characters which do not satisfy {@link Character#isWhitespace(char)}. virtual bool isTokenChar(wchar_t c); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/WildcardQuery.h000066400000000000000000000033041456444476200233040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WILDCARDQUERY_H #define WILDCARDQUERY_H #include "MultiTermQuery.h" namespace Lucene { /// Implements the wildcard search query. Supported wildcards are *, which matches any character sequence /// (including the empty one), and ?, which matches any single character. Note this query can be slow, as /// it needs to iterate over many terms. In order to prevent extremely slow WildcardQueries, a Wildcard /// term should not start with one of the wildcards * or ?. /// /// This query uses the {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} rewrite method. /// @see WildcardTermEnum class LPPAPI WildcardQuery : public MultiTermQuery { public: WildcardQuery(const TermPtr& term); virtual ~WildcardQuery(); LUCENE_CLASS(WildcardQuery); protected: bool termContainsWildcard; bool termIsPrefix; TermPtr term; public: using MultiTermQuery::toString; /// Returns the pattern term. TermPtr getTerm(); virtual QueryPtr rewrite(const IndexReaderPtr& reader); /// Prints a user-readable version of this query. virtual String toString(const String& field); virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); virtual int32_t hashCode(); virtual bool equals(const LuceneObjectPtr& other); protected: virtual FilteredTermEnumPtr getEnum(const IndexReaderPtr& reader); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/WildcardTermEnum.h000066400000000000000000000031021456444476200237270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WILDCARDTERMENUM_H #define WILDCARDTERMENUM_H #include "FilteredTermEnum.h" namespace Lucene { /// Subclass of FilteredTermEnum for enumerating all terms that match the specified wildcard filter term. /// /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than /// all that precede it. class LPPAPI WildcardTermEnum : public FilteredTermEnum { public: /// Creates a new WildcardTermEnum. /// /// After calling the constructor the enumeration is already pointing to the first valid term if such /// a term exists. WildcardTermEnum(const IndexReaderPtr& reader, const TermPtr& term); virtual ~WildcardTermEnum(); LUCENE_CLASS(WildcardTermEnum); public: static const wchar_t WILDCARD_STRING; static const wchar_t WILDCARD_CHAR; TermPtr searchTerm; String field; String text; String pre; int32_t preLen; bool _endEnum; public: virtual double difference(); /// Determines if a word matches a wildcard pattern. static bool wildcardEquals(const String& pattern, int32_t patternIdx, const String& string, int32_t stringIdx); protected: virtual bool termCompare(const TermPtr& term); virtual bool endEnum(); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/WordlistLoader.h000066400000000000000000000037571456444476200234770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WORDLISTLOADER_H #define WORDLISTLOADER_H #include "LuceneObject.h" namespace Lucene { /// Loader for text files that represent a list of stopwords. class LPPAPI WordlistLoader : public LuceneObject { public: virtual ~WordlistLoader(); LUCENE_CLASS(WordlistLoader); public: /// Loads a text file and adds every line as an entry to a HashSet (omitting leading and trailing whitespace). /// Every line of the file should contain only one word. The words need to be in lowercase if you make use of an /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer). /// /// @param wordfile File name containing the wordlist /// @param comment The comment string to ignore /// @return A set with the file's words static HashSet getWordSet(const String& wordfile, const String& comment = EmptyString); /// Loads a text file and adds every line as an entry to a HashSet (omitting leading and trailing whitespace). /// Every line of the file should contain only one word. The words need to be in lowercase if you make use of an /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer). /// /// @param reader Reader containing the wordlist /// @param comment The comment string to ignore /// @return A set with the file's words static HashSet getWordSet(const ReaderPtr& reader, const String& comment = EmptyString); /// Reads a stem dictionary. Each line contains: ///
word\tstem
/// (ie. two tab separated words) /// @return stem dictionary that overrules the stemming algorithm static MapStringString getStemDict(const String& wordstemfile); }; } #endif LucenePlusPlus-rel_3.0.9/include/lucene++/targetver.h000066400000000000000000000015011456444476200225250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TARGETVER_H #define TARGETVER_H #ifdef _WIN32 #ifndef WINVER // Specifies that the minimum required platform is Windows XP. #define WINVER 0x0501 // Change this to the appropriate value to target other versions of Windows. #endif #ifndef _WIN32_WINNT // Specifies that the minimum required platform is Windows XP. #define _WIN32_WINNT 0x0501 // Change this to the appropriate value to target other versions of Windows. #endif #endif #endif LucenePlusPlus-rel_3.0.9/options.cmake000066400000000000000000000033531456444476200200310ustar00rootroot00000000000000# lucene++ project build options #======================================================================== # linker args #======================================================================== option(LUCENE_BUILD_SHARED "Build shared library" ON ) option(ENABLE_PACKAGING "Create build scripts for creating lucene++ packages" OFF ) option(LUCENE_USE_STATIC_BOOST_LIBS "Use static boost libraries" OFF ) option(ENABLE_BOOST_INTEGER "Enable boost integer types" OFF ) option(ENABLE_CYCLIC_CHECK "Enable cyclic checking" OFF ) # build options #======================================================================== option( ENABLE_TEST "Enable the tests" ON) option( ENABLE_DEMO "Enable building demo applications" ON) OPTION( ENABLE_DOCS "Build the Lucene++ documentation." OFF) OPTION( ENABLE_CONTRIB "Enable building contrib library" ON) # documentation options #======================================================================== mark_as_advanced(DOCS_HTML_HELP) OPTION( DOCS_HTML_HELP "Doxygen should compile HTML into a Help file (CHM)." NO ) mark_as_advanced(DOCS_HTML) OPTION( DOCS_HTML "Doxygen should build HTML documentation." YES ) mark_as_advanced(DOCS_XML) OPTION( DOCS_XML "Doxygen should build XML documentation." NO ) mark_as_advanced(DOCS_RTF) OPTION( DOCS_RTF "Doxygen should build RTF documentation." NO ) mark_as_advanced(DOCS_MAN) OPTION( DOCS_MAN "Doxygen should build man documentation." NO ) mark_as_advanced(DOCS_TAGFILE) OPTION( DOCS_TAGFILE "Doxygen should build a tagfile." NO ) mark_as_advanced(DOCS_LATEX) OPTION( DOCS_LATEX "Doxygen should build Latex documentation." NO ) LucenePlusPlus-rel_3.0.9/src/000077500000000000000000000000001456444476200161175ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/CMakeLists.txt000066400000000000000000000003371456444476200206620ustar00rootroot00000000000000 add_subdirectory(core) add_subdirectory(config) if(ENABLE_CONTRIB) add_subdirectory(contrib) endif() if(ENABLE_DEMO) add_subdirectory(demo) endif() if(ENABLE_TEST) enable_testing() add_subdirectory(test) endif()LucenePlusPlus-rel_3.0.9/src/config/000077500000000000000000000000001456444476200173645ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/config/CMakeLists.txt000066400000000000000000000011371456444476200221260ustar00rootroot00000000000000#################################### # Set config vars #################################### set(core_libname "lucene++") set(contrib_libname "lucene++-contrib") set( PACKAGE_CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}/lucene++/") set( PACKAGE_CMAKE_INSTALL_LIBDIR "${LIB_DESTINATION}") #################################### # include CMakePackageConfigHelpers #################################### include(CMakePackageConfigHelpers) #################################### # include directories #################################### add_subdirectory(core) add_subdirectory(contrib)LucenePlusPlus-rel_3.0.9/src/config/contrib/000077500000000000000000000000001456444476200210245ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/config/contrib/CMakeLists.txt000066400000000000000000000022531456444476200235660ustar00rootroot00000000000000################################# # install pkconfig ################################# if(NOT WIN32) configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/liblucene++-contrib.pc.in" "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contrib.pc" @ONLY) install( FILES "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contrib.pc" DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) endif() ################################# # install config.cmake ################################# configure_package_config_file( "${CMAKE_CURRENT_SOURCE_DIR}/liblucene++-contribConfig.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfig.cmake" INSTALL_DESTINATION "${LIB_DESTINATION}/cmake/liblucene++-contrib" PATH_VARS contrib_libname PACKAGE_CMAKE_INSTALL_INCLUDEDIR PACKAGE_CMAKE_INSTALL_LIBDIR) write_basic_package_version_file( "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfigVersion.cmake" VERSION ${lucene++_VERSION} COMPATIBILITY SameMajorVersion ) install( FILES "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfig.cmake" "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfigVersion.cmake" DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/liblucene++-contrib") LucenePlusPlus-rel_3.0.9/src/config/contrib/liblucene++-contrib.pc.in000066400000000000000000000006261456444476200255070ustar00rootroot00000000000000prefix=@CMAKE_INSTALL_PREFIX@ exec_prefix=${prefix}/bin libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ includedir=${prefix}/include/lucene++ lib=lucene++-contrib Name: liblucene++-contrib Description: Contributions for Lucene++ - a C++ search engine, ported from the popular Apache Lucene Version: @lucene++_VERSION@ Libs: -L${libdir} -l${lib} Cflags: -I${includedir} Requires: liblucene++ = @lucene++_VERSION@ LucenePlusPlus-rel_3.0.9/src/config/contrib/liblucene++-contribConfig.cmake.in000066400000000000000000000015601456444476200273110ustar00rootroot00000000000000# Config module for Lucene++-contrib # # Provides the following variables # liblucene++-contrib_INCLUDE_DIRS - Directories to include # liblucene++-contrib_LIBRARIES - Libraries to link # liblucene++-contrib_LIBRARY_DIRS - Library directories to search for link libraries @PACKAGE_INIT@ # This should only be used for meson if (NOT DEFINED set_and_check) macro(set_and_check _var _file) set(${_var} "${_file}") if(NOT EXISTS "${_file}") message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !") endif() endmacro() endif() set_and_check(liblucene++-contrib_INCLUDE_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@") set_and_check(liblucene++-contrib_LIBRARY_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_LIBDIR@") set(liblucene++-contrib_LIBRARIES "@contrib_libname@") LucenePlusPlus-rel_3.0.9/src/config/core/000077500000000000000000000000001456444476200203145ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/config/core/CMakeLists.txt000066400000000000000000000021211456444476200230500ustar00rootroot00000000000000################################# # install pkconfig ################################# if(NOT WIN32) configure_file( "${CMAKE_CURRENT_SOURCE_DIR}/liblucene++.pc.in" "${CMAKE_CURRENT_BINARY_DIR}/liblucene++.pc" @ONLY) install( FILES "${CMAKE_CURRENT_BINARY_DIR}/liblucene++.pc" DESTINATION "${LIB_DESTINATION}/pkgconfig") endif() ################################# # install config.cmake ################################# configure_package_config_file( "${CMAKE_CURRENT_SOURCE_DIR}/liblucene++Config.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/liblucene++Config.cmake" INSTALL_DESTINATION "${LIB_DESTINATION}/cmake/liblucene++" PATH_VARS core_libname PACKAGE_CMAKE_INSTALL_INCLUDEDIR PACKAGE_CMAKE_INSTALL_LIBDIR) write_basic_package_version_file( ${CMAKE_CURRENT_BINARY_DIR}/liblucene++ConfigVersion.cmake VERSION ${lucene++_VERSION} COMPATIBILITY SameMajorVersion ) install( FILES "${CMAKE_CURRENT_BINARY_DIR}/liblucene++Config.cmake" "${CMAKE_CURRENT_BINARY_DIR}/liblucene++ConfigVersion.cmake" DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/liblucene++) LucenePlusPlus-rel_3.0.9/src/config/core/liblucene++.pc.in000066400000000000000000000005111456444476200233320ustar00rootroot00000000000000prefix=@CMAKE_INSTALL_PREFIX@ exec_prefix=${prefix}/bin libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ includedir=${prefix}/include/lucene++ lib=lucene++ Name: liblucene++ Description: Lucene++ - a C++ search engine, ported from the popular Apache Lucene Version: @lucene++_VERSION@ Libs: -L${libdir} -l${lib} Cflags: -I${includedir} LucenePlusPlus-rel_3.0.9/src/config/core/liblucene++Config.cmake.in000066400000000000000000000014671456444476200251510ustar00rootroot00000000000000# Config module for Lucene++ # # Provides the following variables # liblucene++_INCLUDE_DIRS - Directories to include # liblucene++_LIBRARIES - Libraries to link # liblucene++_LIBRARY_DIRS - Library directories to search for link libraries @PACKAGE_INIT@ # This should only be used for meson if (NOT DEFINED set_and_check) macro(set_and_check _var _file) set(${_var} "${_file}") if(NOT EXISTS "${_file}") message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !") endif() endmacro() endif() set_and_check(liblucene++_INCLUDE_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@") set_and_check(liblucene++_LIBRARY_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_LIBDIR@") set(liblucene++_LIBRARIES "@core_libname@") LucenePlusPlus-rel_3.0.9/src/contrib/000077500000000000000000000000001456444476200175575ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/CMakeLists.txt000066400000000000000000000045631456444476200223270ustar00rootroot00000000000000project(contrib) #################################### # create library target #################################### add_subdirectory(include) if (LUCENE_BUILD_SHARED) add_library(lucene++-contrib SHARED) else() add_library(lucene++-contrib STATIC) endif() set(lucene++-contrib_SOVERSION "0") set(lucene++-contrib_VERSION "${lucene++_VERSION_MAJOR}.${lucene++_VERSION_MINOR}.${lucene++_VERSION_PATCH}") add_library(lucene++::lucene++-contrib ALIAS lucene++-contrib) #################################### # src #################################### file(GLOB_RECURSE contrib_sources "analyzers/*.cpp" "highlighter/*.cpp" "memory/*.cpp" "index/*.cpp" "mevc/*.cpp" "snowball/*.cpp" "snowball/libstemmer_c/libstemmer/libstemmer_utf8.c" "snowball/libstemmer_c/src_c/*.c" "snowball/libstemmer_c/runtime/*.c" ) file(GLOB_RECURSE contrib_headers "${lucene++-lib_SOURCE_DIR}/include/*.h") target_sources(lucene++-contrib PRIVATE ${contrib_sources}) #################################### # include directories #################################### target_include_directories(lucene++-contrib PUBLIC $ $ $ $ ${Boost_INCLUDE_DIRS} ) #################################### # dependencies #################################### target_link_libraries(lucene++-contrib Boost::boost Boost::date_time Boost::filesystem Boost::iostreams Boost::regex Boost::system Boost::thread ZLIB::ZLIB lucene++::lucene++) if(WIN32) target_link_libraries(lucene++-contrib ws2_32) endif() #################################### # link args #################################### target_compile_options(lucene++-contrib PRIVATE -DLPP_BUILDING_LIB) set_target_properties(lucene++-contrib PROPERTIES COTIRE_CXX_PREFIX_HEADER_INIT "include/ContribInc.h" CXX_VISIBILITY_PRESET hidden VISIBILITY_INLINES_HIDDEN 1 VERSION ${lucene++_VERSION} SOVERSION ${lucene++_SOVERSION}) cotire(lucene++-contrib) install(TARGETS lucene++-contrib DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT runtime) install( FILES ${contrib_headers} DESTINATION "include/lucene++" COMPONENT development-contrib) LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/000077500000000000000000000000001456444476200215675ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/000077500000000000000000000000001456444476200230575ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/000077500000000000000000000000001456444476200247025ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/ar/000077500000000000000000000000001456444476200253045ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/ar/ArabicAnalyzer.cpp000066400000000000000000000200701456444476200306760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ArabicAnalyzer.h" #include "ArabicLetterTokenizer.h" #include "LowerCaseFilter.h" #include "StopFilter.h" #include "ArabicNormalizationFilter.h" #include "ArabicStemFilter.h" #include "StringUtils.h" namespace Lucene { /// Default Arabic stopwords in UTF-8 format. /// /// Generated from http://members.unine.ch/jacques.savoy/clef/index.html /// The stopword list is BSD-Licensed. const uint8_t ArabicAnalyzer::DEFAULT_STOPWORD_FILE[] = { 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd9, 0x85, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd9, 0x85, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd9, 0x81, 0xd9, 0x8a, 0x0a, 0xd9, 0x88, 0xd9, 0x81, 0xd9, 0x8a, 0x0a, 0xd9, 0x81, 0xd9, 0x8a, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd9, 0x81, 0xd9, 0x8a, 0xd9, 0x87, 0x0a, 0xd9, 0x88, 0x0a, 0xd9, 0x81, 0x0a, 0xd8, 0xab, 0xd9, 0x85, 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0x0a, 0xd8, 0xa3, 0xd9, 0x88, 0x0a, 0xd8, 0xa8, 0x0a, 0xd8, 0xa8, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xa8, 0xd9, 0x87, 0x0a, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0x0a, 0xd8, 0xa7, 0xd9, 0x89, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, 0xa3, 0xd9, 0x8a, 0x0a, 0xd8, 0xa3, 0xd9, 0x89, 0x0a, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd8, 0xa5, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd9, 0x84, 0xd9, 0x83, 0xd9, 0x86, 0x0a, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, 0x88, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, 0x81, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xb9, 0xd9, 0x86, 0x0a, 0xd9, 0x85, 0xd8, 0xb9, 0x0a, 0xd8, 0xa7, 0xd8, 0xb0, 0xd8, 0xa7, 0x0a, 0xd8, 0xa5, 0xd8, 0xb0, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa3, 0xd9, 0x86, 0x0a, 0xd8, 0xa5, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xa5, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd8, 0xa3, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd8, 0xa5, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd8, 0xa3, 0xd9, 0x86, 0x0a, 0xd9, 0x81, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x81, 0xd8, 0xa3, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd8, 0xa3, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd8, 0xa5, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xaa, 0xd9, 0x89, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xaa, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xb0, 0xd9, 0x89, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xb0, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xb0, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd9, 0x89, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd9, 0x8a, 0x0a, 0xd8, 0xa5, 0xd9, 0x84, 0xd9, 0x89, 0x0a, 0xd8, 0xa5, 0xd9, 0x84, 0xd9, 0x8a, 0x0a, 0xd8, 0xb9, 0xd9, 0x84, 0xd9, 0x89, 0x0a, 0xd8, 0xb9, 0xd9, 0x84, 0xd9, 0x8a, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xb9, 0xd9, 0x84, 0xd9, 0x8a, 0xd9, 0x87, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xa5, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xb6, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0xd9, 0x8a, 0xd8, 0xb6, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x84, 0x0a, 0xd9, 0x88, 0xd9, 0x83, 0xd9, 0x84, 0x0a, 0xd9, 0x84, 0xd9, 0x85, 0x0a, 0xd9, 0x88, 0xd9, 0x84, 0xd9, 0x85, 0x0a, 0xd9, 0x84, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x84, 0xd9, 0x86, 0x0a, 0xd9, 0x87, 0xd9, 0x89, 0x0a, 0xd9, 0x87, 0xd9, 0x8a, 0x0a, 0xd9, 0x87, 0xd9, 0x88, 0x0a, 0xd9, 0x88, 0xd9, 0x87, 0xd9, 0x89, 0x0a, 0xd9, 0x88, 0xd9, 0x87, 0xd9, 0x8a, 0x0a, 0xd9, 0x88, 0xd9, 0x87, 0xd9, 0x88, 0x0a, 0xd9, 0x81, 0xd9, 0x87, 0xd9, 0x89, 0x0a, 0xd9, 0x81, 0xd9, 0x87, 0xd9, 0x8a, 0x0a, 0xd9, 0x81, 0xd9, 0x87, 0xd9, 0x88, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaa, 0x0a, 0xd8, 0xa3, 0xd9, 0x86, 0xd8, 0xaa, 0x0a, 0xd9, 0x84, 0xd9, 0x83, 0x0a, 0xd9, 0x84, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd9, 0x84, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd8, 0xb0, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd8, 0xb0, 0xd8, 0xa7, 0x0a, 0xd8, 0xaa, 0xd9, 0x84, 0xd9, 0x83, 0x0a, 0xd8, 0xb0, 0xd9, 0x84, 0xd9, 0x83, 0x0a, 0xd9, 0x87, 0xd9, 0x86, 0xd8, 0xa7, 0xd9, 0x83, 0x0a, 0xd9, 0x83, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaa, 0x0a, 0xd9, 0x83, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x8a, 0xd9, 0x83, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xaa, 0xd9, 0x83, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x83, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaa, 0x0a, 0xd9, 0x88, 0xd9, 0x83, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xba, 0xd9, 0x8a, 0xd8, 0xb1, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xb6, 0x0a, 0xd9, 0x82, 0xd8, 0xaf, 0x0a, 0xd9, 0x86, 0xd8, 0xad, 0xd9, 0x88, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd9, 0x86, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, 0x85, 0xd9, 0x86, 0xd8, 0xb0, 0x0a, 0xd8, 0xb6, 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd8, 0xad, 0xd9, 0x8a, 0xd8, 0xab, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa2, 0xd9, 0x86, 0x0a, 0xd8, 0xae, 0xd9, 0x84, 0xd8, 0xa7, 0xd9, 0x84, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xaf, 0x0a, 0xd9, 0x82, 0xd8, 0xa8, 0xd9, 0x84, 0x0a, 0xd8, 0xad, 0xd8, 0xaa, 0xd9, 0x89, 0x0a, 0xd8, 0xb9, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb9, 0xd9, 0x86, 0xd8, 0xaf, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, 0x84, 0xd8, 0xaf, 0xd9, 0x89, 0x0a, 0xd8, 0xac, 0xd9, 0x85, 0xd9, 0x8a, 0xd8, 0xb9, 0x0a }; ArabicAnalyzer::ArabicAnalyzer(LuceneVersion::Version matchVersion) { this->stoptable = getDefaultStopSet(); this->matchVersion = matchVersion; } ArabicAnalyzer::ArabicAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stoptable = stopwords; this->matchVersion = matchVersion; } ArabicAnalyzer::~ArabicAnalyzer() { } const HashSet ArabicAnalyzer::getDefaultStopSet() { static HashSet stopSet; LUCENE_RUN_ONCE( String stopWords(UTF8_TO_STRING(DEFAULT_STOPWORD_FILE)); Collection words(StringUtils::split(stopWords, L"\n")); stopSet = HashSet::newInstance(words.begin(), words.end()); ); return stopSet; } TokenStreamPtr ArabicAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { TokenStreamPtr result = newLucene(reader); result = newLucene(result); // the order here is important: the stopword list is not normalized result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); result = newLucene(result); result = newLucene(result); return result; } TokenStreamPtr ArabicAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { ArabicAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(reader); streams->result = newLucene(streams->source); // the order here is important: the stopword list is not normalized streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); streams->result = newLucene(streams->result); streams->result = newLucene(streams->result); setPreviousTokenStream(streams); } else { streams->source->reset(reader); } return streams->result; } ArabicAnalyzerSavedStreams::~ArabicAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/ar/ArabicLetterTokenizer.cpp000066400000000000000000000020171456444476200322440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ArabicLetterTokenizer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { ArabicLetterTokenizer::ArabicLetterTokenizer(const ReaderPtr& input) : LetterTokenizer(input) { } ArabicLetterTokenizer::ArabicLetterTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : LetterTokenizer(source, input) { } ArabicLetterTokenizer::ArabicLetterTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : LetterTokenizer(factory, input) { } ArabicLetterTokenizer::~ArabicLetterTokenizer() { } bool ArabicLetterTokenizer::isTokenChar(wchar_t c) { return LetterTokenizer::isTokenChar(c) || UnicodeUtil::isNonSpacing(c); } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/ar/ArabicNormalizationFilter.cpp000066400000000000000000000020131456444476200331020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ArabicNormalizationFilter.h" #include "ArabicNormalizer.h" #include "TermAttribute.h" namespace Lucene { ArabicNormalizationFilter::ArabicNormalizationFilter(const TokenStreamPtr& input) : TokenFilter(input) { normalizer = newLucene(); termAtt = addAttribute(); } ArabicNormalizationFilter::~ArabicNormalizationFilter() { } bool ArabicNormalizationFilter::incrementToken() { if (input->incrementToken()) { int32_t newlen = normalizer->normalize(termAtt->termBuffer().get(), termAtt->termLength()); termAtt->setTermLength(newlen); return true; } else { return false; } } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/ar/ArabicNormalizer.cpp000066400000000000000000000045571456444476200312470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ArabicNormalizer.h" #include "MiscUtils.h" namespace Lucene { const wchar_t ArabicNormalizer::ALEF = (wchar_t)0x0627; const wchar_t ArabicNormalizer::ALEF_MADDA = (wchar_t)0x0622; const wchar_t ArabicNormalizer::ALEF_HAMZA_ABOVE = (wchar_t)0x0623; const wchar_t ArabicNormalizer::ALEF_HAMZA_BELOW = (wchar_t)0x0625; const wchar_t ArabicNormalizer::YEH = (wchar_t)0x064a; const wchar_t ArabicNormalizer::DOTLESS_YEH = (wchar_t)0x0649; const wchar_t ArabicNormalizer::TEH_MARBUTA = (wchar_t)0x0629; const wchar_t ArabicNormalizer::HEH = (wchar_t)0x0647; const wchar_t ArabicNormalizer::TATWEEL = (wchar_t)0x0640; const wchar_t ArabicNormalizer::FATHATAN = (wchar_t)0x064b; const wchar_t ArabicNormalizer::DAMMATAN = (wchar_t)0x064c; const wchar_t ArabicNormalizer::KASRATAN = (wchar_t)0x064d; const wchar_t ArabicNormalizer::FATHA = (wchar_t)0x064e; const wchar_t ArabicNormalizer::DAMMA = (wchar_t)0x064f; const wchar_t ArabicNormalizer::KASRA = (wchar_t)0x0650; const wchar_t ArabicNormalizer::SHADDA = (wchar_t)0x0651; const wchar_t ArabicNormalizer::SUKUN = (wchar_t)0x0652; ArabicNormalizer::~ArabicNormalizer() { } int32_t ArabicNormalizer::normalize(wchar_t* s, int32_t len) { for (int32_t i = 0; i < len; ++i) { switch (s[i]) { case ALEF_MADDA: case ALEF_HAMZA_ABOVE: case ALEF_HAMZA_BELOW: s[i] = ALEF; break; case DOTLESS_YEH: s[i] = YEH; break; case TEH_MARBUTA: s[i] = HEH; break; case TATWEEL: case KASRATAN: case DAMMATAN: case FATHATAN: case FATHA: case DAMMA: case KASRA: case SHADDA: case SUKUN: len = deleteChar(s, i--, len); break; default: break; } } return len; } int32_t ArabicNormalizer::deleteChar(wchar_t* s, int32_t pos, int32_t len) { if (pos < len) { MiscUtils::arrayCopy(s, pos + 1, s, pos, len - pos - 1); } return len - 1; } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/ar/ArabicStemFilter.cpp000066400000000000000000000017041456444476200311720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ArabicStemFilter.h" #include "ArabicStemmer.h" #include "TermAttribute.h" namespace Lucene { ArabicStemFilter::ArabicStemFilter(const TokenStreamPtr& input) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); } ArabicStemFilter::~ArabicStemFilter() { } bool ArabicStemFilter::incrementToken() { if (input->incrementToken()) { int32_t newlen = stemmer->stem(termAtt->termBuffer().get(), termAtt->termLength()); termAtt->setTermLength(newlen); return true; } else { return false; } } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/ar/ArabicStemmer.cpp000066400000000000000000000105471456444476200305350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ArabicStemmer.h" #include "MiscUtils.h" namespace Lucene { const wchar_t ArabicStemmer::ALEF = (wchar_t)0x0627; const wchar_t ArabicStemmer::BEH = (wchar_t)0x0628; const wchar_t ArabicStemmer::TEH_MARBUTA = (wchar_t)0x0629; const wchar_t ArabicStemmer::TEH = (wchar_t)0x062a; const wchar_t ArabicStemmer::FEH = (wchar_t)0x0641; const wchar_t ArabicStemmer::KAF = (wchar_t)0x0643; const wchar_t ArabicStemmer::LAM = (wchar_t)0x0644; const wchar_t ArabicStemmer::NOON = (wchar_t)0x0646; const wchar_t ArabicStemmer::HEH = (wchar_t)0x0647; const wchar_t ArabicStemmer::WAW = (wchar_t)0x0648; const wchar_t ArabicStemmer::YEH = (wchar_t)0x064a; ArabicStemmer::~ArabicStemmer() { } const Collection ArabicStemmer::prefixes() { static Collection _prefixes; LUCENE_RUN_ONCE( _prefixes = Collection::newInstance(); _prefixes.add(String(L"") + ALEF + LAM); _prefixes.add(String(L"") + WAW + ALEF + LAM); _prefixes.add(String(L"") + BEH + ALEF + LAM); _prefixes.add(String(L"") + KAF + ALEF + LAM); _prefixes.add(String(L"") + FEH + ALEF + LAM); _prefixes.add(String(L"") + LAM + LAM); _prefixes.add(String(L"") + WAW); ); return _prefixes; } const Collection ArabicStemmer::suffixes() { static Collection _suffixes; LUCENE_RUN_ONCE( _suffixes = Collection::newInstance(); _suffixes.add(String(L"") + HEH + ALEF); _suffixes.add(String(L"") + ALEF + NOON); _suffixes.add(String(L"") + ALEF + TEH); _suffixes.add(String(L"") + WAW + NOON); _suffixes.add(String(L"") + YEH + NOON); _suffixes.add(String(L"") + YEH + HEH); _suffixes.add(String(L"") + YEH + TEH_MARBUTA); _suffixes.add(String(L"") + HEH); _suffixes.add(String(L"") + TEH_MARBUTA); _suffixes.add(String(L"") + YEH); ); return _suffixes; } int32_t ArabicStemmer::stem(wchar_t* s, int32_t len) { len = stemPrefix(s, len); len = stemSuffix(s, len); return len; } int32_t ArabicStemmer::stemPrefix(wchar_t* s, int32_t len) { Collection stemPrefixes(prefixes()); for (int32_t i = 0; i < stemPrefixes.size(); ++i) { if (startsWith(s, len, stemPrefixes[i])) { return deleteChars(s, 0, len, (int32_t)stemPrefixes[i].length()); } } return len; } int32_t ArabicStemmer::stemSuffix(wchar_t* s, int32_t len) { Collection stemSuffixes(suffixes()); for (int32_t i = 0; i < stemSuffixes.size(); ++i) { if (endsWith(s, len, stemSuffixes[i])) { len = (int32_t)deleteChars(s, (int32_t)(len - stemSuffixes[i].length()), len, (int32_t)stemSuffixes[i].length()); } } return len; } bool ArabicStemmer::startsWith(wchar_t* s, int32_t len, const String& prefix) { if (prefix.length() == 1 && len < 4) { // wa- prefix requires at least 3 characters return false; } else if (len < (int32_t)prefix.length() + 2) { // other prefixes require only 2 return false; } else { for (int32_t i = 0; i < (int32_t)prefix.length(); ++i) { if (s[i] != prefix[i]) { return false; } } return true; } } bool ArabicStemmer::endsWith(wchar_t* s, int32_t len, const String& suffix) { if (len < (int32_t)suffix.length() + 2) { // all suffixes require at least 2 characters after stemming return false; } else { for (int32_t i = 0; i < (int32_t)suffix.length(); ++i) { if (s[len - suffix.length() + i] != suffix[i]) { return false; } } return true; } } int32_t ArabicStemmer::deleteChars(wchar_t* s, int32_t pos, int32_t len, int32_t chars) { for (int32_t i = 0; i < chars; ++i) { len = deleteChar(s, pos, len); } return len; } int32_t ArabicStemmer::deleteChar(wchar_t* s, int32_t pos, int32_t len) { if (pos < len) { MiscUtils::arrayCopy(s, pos + 1, s, pos, len - pos - 1); } return len - 1; } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/br/000077500000000000000000000000001456444476200253055ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/br/BrazilianAnalyzer.cpp000066400000000000000000000105571456444476200314420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "BrazilianAnalyzer.h" #include "StandardTokenizer.h" #include "StandardFilter.h" #include "LowerCaseFilter.h" #include "StopFilter.h" #include "BrazilianStemFilter.h" namespace Lucene { const wchar_t* BrazilianAnalyzer::_BRAZILIAN_STOP_WORDS[] = { L"a", L"ainda", L"alem", L"ambas", L"ambos", L"antes", L"ao", L"aonde", L"aos", L"apos", L"aquele", L"aqueles", L"as", L"assim", L"com", L"como", L"contra", L"contudo", L"cuja", L"cujas", L"cujo", L"cujos", L"da", L"das", L"de", L"dela", L"dele", L"deles", L"demais", L"depois", L"desde", L"desta", L"deste", L"dispoe", L"dispoem", L"diversa", L"diversas", L"diversos", L"do", L"dos", L"durante", L"e", L"ela", L"elas", L"ele", L"eles", L"em", L"entao", L"entre", L"essa", L"essas", L"esse", L"esses", L"esta", L"estas", L"este", L"estes", L"ha", L"isso", L"isto", L"logo", L"mais", L"mas", L"mediante", L"menos", L"mesma", L"mesmas", L"mesmo", L"mesmos", L"na", L"nas", L"nao", L"nas", L"nem", L"nesse", L"neste", L"nos", L"o", L"os", L"ou", L"outra", L"outras", L"outro", L"outros", L"pelas", L"pelas", L"pelo", L"pelos", L"perante", L"pois", L"por", L"porque", L"portanto", L"proprio", L"propios", L"quais", L"qual", L"qualquer", L"quando", L"quanto", L"que", L"quem", L"quer", L"se", L"seja", L"sem", L"sendo", L"seu", L"seus", L"sob", L"sobre", L"sua", L"suas", L"tal", L"tambem", L"teu", L"teus", L"toda", L"todas", L"todo", L"todos", L"tua", L"tuas", L"tudo", L"um", L"uma", L"umas", L"uns" }; BrazilianAnalyzer::BrazilianAnalyzer(LuceneVersion::Version matchVersion) { this->stoptable = getDefaultStopSet(); this->matchVersion = matchVersion; } BrazilianAnalyzer::BrazilianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stoptable = stopwords; this->matchVersion = matchVersion; } BrazilianAnalyzer::BrazilianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions) { this->stoptable = stopwords; this->excltable = exclusions; this->matchVersion = matchVersion; } BrazilianAnalyzer::~BrazilianAnalyzer() { } const HashSet BrazilianAnalyzer::getDefaultStopSet() { static HashSet stopSet; LUCENE_RUN_ONCE( stopSet = HashSet::newInstance(_BRAZILIAN_STOP_WORDS, _BRAZILIAN_STOP_WORDS + SIZEOF_ARRAY(_BRAZILIAN_STOP_WORDS)); ); return stopSet; } void BrazilianAnalyzer::setStemExclusionTable(HashSet exclusions) { excltable = exclusions; setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created } TokenStreamPtr BrazilianAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { TokenStreamPtr result = newLucene(matchVersion, reader); result = newLucene(result); result = newLucene(result); result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); result = newLucene(result, excltable); return result; } TokenStreamPtr BrazilianAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { BrazilianAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(matchVersion, reader); streams->result = newLucene(streams->source); streams->result = newLucene(streams->result); streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); streams->result = newLucene(streams->result, excltable); setPreviousTokenStream(streams); } else { streams->source->reset(reader); } return streams->result; } BrazilianAnalyzerSavedStreams::~BrazilianAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/br/BrazilianStemFilter.cpp000066400000000000000000000026721456444476200317320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "BrazilianStemFilter.h" #include "BrazilianStemmer.h" #include "TermAttribute.h" namespace Lucene { BrazilianStemFilter::BrazilianStemFilter(const TokenStreamPtr& input) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); } BrazilianStemFilter::BrazilianStemFilter(const TokenStreamPtr& input, HashSet exclusiontable) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); exclusions = exclusiontable; } BrazilianStemFilter::~BrazilianStemFilter() { } bool BrazilianStemFilter::incrementToken() { if (input->incrementToken()) { String term(termAtt->term()); // Check the exclusion table. if (!exclusions || !exclusions.contains(term)) { String s(stemmer->stem(term)); // If not stemmed, don't waste the time adjusting the token. if (!s.empty() && s != term) { termAtt->setTermBuffer(s); } } return true; } else { return false; } } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/br/BrazilianStemmer.cpp000066400000000000000000000672051456444476200312730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "BrazilianStemmer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { BrazilianStemmer::~BrazilianStemmer() { } String BrazilianStemmer::stem(const String& term) { // creates CT createCT(term); if (!isIndexable(CT)) { return L""; } if (!isStemmable(CT)) { return CT; } R1 = getR1(CT); R2 = getR1(R1); RV = getRV(CT); TERM = term + L";" + CT; bool altered = step1(); if (!altered) { altered = step2(); } if (altered) { step3(); } else { step4(); } step5(); return CT; } bool BrazilianStemmer::isStemmable(const String& term) { for (int32_t c = 0; c < (int32_t)term.length(); ++c) { // Discard terms that contain non-letter characters. if (!UnicodeUtil::isAlpha(term[c])) { return false; } } return true; } bool BrazilianStemmer::isIndexable(const String& term) { return (term.length() < 30) && (term.length() > 2); } bool BrazilianStemmer::isVowel(wchar_t value) { return (value == L'a' || value == L'e' || value == L'i' || value == L'o' || value == L'u'); } String BrazilianStemmer::getR1(const String& value) { if (value.empty()) { return L""; } // find 1st vowel int32_t i = (int32_t)(value.length() - 1); int32_t j = 0; for (; j < i; ++j) { if (isVowel(value[j])) { break; } } if (j >= i) { return L""; } // find 1st non-vowel for (; j < i; ++j) { if (!isVowel(value[j])) { break; } } if (j >= i) { return L""; } return value.substr(j + 1); } String BrazilianStemmer::getRV(const String& value) { if (value.empty()) { return L""; } int32_t i = (int32_t)(value.length() - 1); // RV - IF the second letter is a consonant, RV is the region after the next following vowel if (i > 0 && !isVowel(value[1])) { int32_t j = 2; // find 1st vowel for (; j < i; ++j) { if (isVowel(value[j])) { break; } } if (j < i) { return value.substr(j + 1); } } // RV - OR if the first two letters are vowels, RV is the region after the next consonant, if (i > 1 && isVowel(value[0]) && isVowel(value[1])) { int32_t j = 2; // find 1st consonant for (; j < i; ++j) { if (!isVowel(value[j])) { break; } } if (j < i) { return value.substr(j + 1); } } // RV - AND otherwise (consonant-vowel case) RV is the region after the third letter. if (i > 2) { return value.substr(3); } return L""; } String BrazilianStemmer::changeTerm(const String& value) { if (value.empty()) { return L""; } String lowerValue(StringUtils::toLower(value)); String r; for (int32_t j = 0; j < (int32_t)value.length(); ++j) { if (value[j] == 0x00e1 || value[j] == 0x00e2 || value[j] == 0x00e3) { r += L"a"; continue; } if (value[j] == 0x00e9 || value[j] == 0x00ea) { r += L"e"; continue; } if (value[j] == 0x00ed) { r += L"i"; continue; } if (value[j] == 0x00f3 || value[j] == 0x00f4 || value[j] == 0x00f5) { r += L"o"; continue; } if (value[j] == 0x00fa || value[j] == 0x00fc) { r += L"u"; continue; } if (value[j] == 0x00e7) { r += L"c"; continue; } if (value[j] == 0x00f1) { r += L"n"; continue; } r += value[j]; } return r ; } bool BrazilianStemmer::checkSuffix(const String& value, const String& suffix) { if (value.empty() || suffix.empty()) { return false; } if (suffix.length() > value.length()) { return false; } return (value.substr(value.length() - suffix.length()) == suffix); } String BrazilianStemmer::replaceSuffix(const String& value, const String& toReplace, const String& changeTo) { if (value.empty() || toReplace.empty() || changeTo.empty()) { return value; } String vvalue = removeSuffix(value, toReplace); if (value == vvalue) { return value; } else { return vvalue + changeTo; } } String BrazilianStemmer::removeSuffix(const String& value, const String& toRemove) { if (value.empty() || toRemove.empty() || !checkSuffix(value, toRemove)) { return value; } return value.substr(0, value.length() - toRemove.length()); } bool BrazilianStemmer::suffixPreceded(const String& value, const String& suffix, const String& preceded) { if (value.empty() || suffix.empty() || preceded.empty() || !checkSuffix(value, suffix)) { return false; } return checkSuffix(removeSuffix(value, suffix), preceded); } void BrazilianStemmer::createCT(const String& term) { CT = changeTerm(term); if (CT.length() < 2) { return; } // if the first character is ... , remove it if (CT[0] == L'"' || CT[0] == L'\'' || CT[0] == L'-' || CT[0] == L',' || CT[0] == L';' || CT[0] == L'.' || CT[0] == L'?' || CT[0] == L'!') { CT = CT.substr(1); } if (CT.length() < 2) { return; } // if the last character is ... , remove it if (CT[CT.length() - 1] == L'-' || CT[CT.length() - 1] == L',' || CT[CT.length() - 1] == L';' || CT[CT.length() - 1] == L'.' || CT[CT.length() - 1] == L'?' || CT[CT.length() - 1] == L'!' || CT[CT.length() - 1] == L'\'' || CT[CT.length() - 1] == L'"') { CT = CT.substr(0, CT.length() - 1); } } bool BrazilianStemmer::step1() { if (CT.empty()) { return false; } // suffix length = 7 if (checkSuffix(CT, L"uciones") && checkSuffix(R2, L"uciones")) { CT = replaceSuffix(CT, L"uciones", L"u"); return true; } // suffix length = 6 if (CT.length() >= 6) { if (checkSuffix(CT, L"imentos") && checkSuffix(R2, L"imentos")) { CT = removeSuffix(CT, L"imentos"); return true; } if (checkSuffix(CT, L"amentos") && checkSuffix(R2, L"amentos")) { CT = removeSuffix(CT, L"amentos"); return true; } if (checkSuffix(CT, L"adores") && checkSuffix(R2, L"adores")) { CT = removeSuffix(CT, L"adores"); return true; } if (checkSuffix(CT, L"adoras") && checkSuffix(R2, L"adoras")) { CT = removeSuffix(CT, L"adoras"); return true; } if (checkSuffix(CT, L"logias") && checkSuffix(R2, L"logias")) { replaceSuffix(CT, L"logias", L"log"); return true; } if (checkSuffix(CT, L"encias") && checkSuffix(R2, L"encias")) { CT = replaceSuffix(CT, L"encias", L"ente"); return true; } if (checkSuffix(CT, L"amente") && checkSuffix(R1, L"amente")) { CT = removeSuffix(CT, L"amente"); return true; } if (checkSuffix(CT, L"idades") && checkSuffix(R2, L"idades")) { CT = removeSuffix(CT, L"idades"); return true; } } // suffix length = 5 if (CT.length() >= 5) { if (checkSuffix(CT, L"acoes") && checkSuffix(R2, L"acoes")) { CT = removeSuffix(CT, L"acoes"); return true; } if (checkSuffix(CT, L"imento") && checkSuffix(R2, L"imento")) { CT = removeSuffix(CT, L"imento"); return true; } if (checkSuffix(CT, L"amento") && checkSuffix(R2, L"amento")) { CT = removeSuffix(CT, L"amento"); return true; } if (checkSuffix(CT, L"adora") && checkSuffix(R2, L"adora")) { CT = removeSuffix(CT, L"adora"); return true; } if (checkSuffix(CT, L"ismos") && checkSuffix(R2, L"ismos")) { CT = removeSuffix(CT, L"ismos"); return true; } if (checkSuffix(CT, L"istas") && checkSuffix(R2, L"istas")) { CT = removeSuffix(CT, L"istas"); return true; } if (checkSuffix(CT, L"logia") && checkSuffix(R2, L"logia")) { CT = replaceSuffix(CT, L"logia", L"log"); return true; } if (checkSuffix(CT, L"ucion") && checkSuffix(R2, L"ucion")) { CT = replaceSuffix(CT, L"ucion", L"u"); return true; } if (checkSuffix(CT, L"encia") && checkSuffix(R2, L"encia")) { CT = replaceSuffix(CT, L"encia", L"ente"); return true; } if (checkSuffix(CT, L"mente") && checkSuffix(R2, L"mente")) { CT = removeSuffix(CT, L"mente"); return true; } if (checkSuffix(CT, L"idade") && checkSuffix(R2, L"idade")) { CT = removeSuffix(CT, L"idade"); return true; } } // suffix length = 4 if (CT.length() >= 4) { if (checkSuffix(CT, L"acao") && checkSuffix(R2, L"acao")) { CT = removeSuffix(CT, L"acao"); return true; } if (checkSuffix(CT, L"ezas") && checkSuffix(R2, L"ezas")) { CT = removeSuffix(CT, L"ezas"); return true; } if (checkSuffix(CT, L"icos") && checkSuffix(R2, L"icos")) { CT = removeSuffix(CT, L"icos"); return true; } if (checkSuffix(CT, L"icas") && checkSuffix(R2, L"icas")) { CT = removeSuffix(CT, L"icas"); return true; } if (checkSuffix(CT, L"ismo") && checkSuffix(R2, L"ismo")) { CT = removeSuffix(CT, L"ismo"); return true; } if (checkSuffix(CT, L"avel") && checkSuffix(R2, L"avel")) { CT = removeSuffix(CT, L"avel"); return true; } if (checkSuffix(CT, L"ivel") && checkSuffix(R2, L"ivel")) { CT = removeSuffix(CT, L"ivel"); return true; } if (checkSuffix(CT, L"ista") && checkSuffix(R2, L"ista")) { CT = removeSuffix(CT, L"ista"); return true; } if (checkSuffix(CT, L"osos") && checkSuffix(R2, L"osos")) { CT = removeSuffix(CT, L"osos"); return true; } if (checkSuffix(CT, L"osas") && checkSuffix(R2, L"osas")) { CT = removeSuffix(CT, L"osas"); return true; } if (checkSuffix(CT, L"ador") && checkSuffix(R2, L"ador")) { CT = removeSuffix(CT, L"ador"); return true; } if (checkSuffix(CT, L"ivas") && checkSuffix(R2, L"ivas")) { CT = removeSuffix(CT, L"ivas"); return true; } if (checkSuffix(CT, L"ivos") && checkSuffix(R2, L"ivos")) { CT = removeSuffix(CT, L"ivos"); return true; } if (checkSuffix(CT, L"iras") && checkSuffix(RV, L"iras") && suffixPreceded(CT, L"iras", L"e")) { CT = replaceSuffix(CT, L"iras", L"ir"); return true; } } // suffix length = 3 if (CT.length() >= 3) { if (checkSuffix(CT, L"eza") && checkSuffix(R2, L"eza")) { CT = removeSuffix(CT, L"eza"); return true; } if (checkSuffix(CT, L"ico") && checkSuffix(R2, L"ico")) { CT = removeSuffix(CT, L"ico"); return true; } if (checkSuffix(CT, L"ica") && checkSuffix(R2, L"ica")) { CT = removeSuffix(CT, L"ica"); return true; } if (checkSuffix(CT, L"oso") && checkSuffix(R2, L"oso")) { CT = removeSuffix(CT, L"oso"); return true; } if (checkSuffix(CT, L"osa") && checkSuffix(R2, L"osa")) { CT = removeSuffix(CT, L"osa"); return true; } if (checkSuffix(CT, L"iva") && checkSuffix(R2, L"iva")) { CT = removeSuffix(CT, L"iva"); return true; } if (checkSuffix(CT, L"ivo") && checkSuffix(R2, L"ivo")) { CT = removeSuffix(CT, L"ivo"); return true; } if (checkSuffix(CT, L"ira") && checkSuffix(RV, L"ira") && suffixPreceded(CT, L"ira", L"e")) { CT = replaceSuffix(CT, L"ira", L"ir"); return true; } } // no ending was removed by step1 return false; } bool BrazilianStemmer::step2() { if (RV.empty()) { return false; } // suffix lenght = 7 if (RV.length() >= 7) { if (checkSuffix(RV, L"issemos")) { CT = removeSuffix(CT, L"issemos"); return true; } if (checkSuffix(RV, L"essemos")) { CT = removeSuffix(CT, L"essemos"); return true; } if (checkSuffix(RV, L"assemos")) { CT = removeSuffix(CT, L"assemos"); return true; } if (checkSuffix(RV, L"ariamos")) { CT = removeSuffix(CT, L"ariamos"); return true; } if (checkSuffix(RV, L"eriamos")) { CT = removeSuffix(CT, L"eriamos"); return true; } if (checkSuffix(RV, L"iriamos")) { CT = removeSuffix(CT, L"iriamos"); return true; } } // suffix length = 6 if (RV.length() >= 6) { if (checkSuffix(RV, L"iremos")) { CT = removeSuffix(CT, L"iremos"); return true; } if (checkSuffix(RV, L"eremos")) { CT = removeSuffix(CT, L"eremos"); return true; } if (checkSuffix(RV, L"aremos")) { CT = removeSuffix(CT, L"aremos"); return true; } if (checkSuffix(RV, L"avamos")) { CT = removeSuffix(CT, L"avamos"); return true; } if (checkSuffix(RV, L"iramos")) { CT = removeSuffix(CT, L"iramos"); return true; } if (checkSuffix(RV, L"eramos")) { CT = removeSuffix(CT, L"eramos"); return true; } if (checkSuffix(RV, L"aramos")) { CT = removeSuffix(CT, L"aramos"); return true; } if (checkSuffix(RV, L"asseis")) { CT = removeSuffix(CT, L"asseis"); return true; } if (checkSuffix(RV, L"esseis")) { CT = removeSuffix(CT, L"esseis"); return true; } if (checkSuffix(RV, L"isseis")) { CT = removeSuffix(CT, L"isseis"); return true; } if (checkSuffix(RV, L"arieis")) { CT = removeSuffix(CT, L"arieis"); return true; } if (checkSuffix(RV, L"erieis")) { CT = removeSuffix(CT, L"erieis"); return true; } if (checkSuffix(RV, L"irieis")) { CT = removeSuffix(CT, L"irieis"); return true; } } // suffix length = 5 if (RV.length() >= 5) { if (checkSuffix(RV, L"irmos")) { CT = removeSuffix(CT, L"irmos"); return true; } if (checkSuffix(RV, L"iamos")) { CT = removeSuffix(CT, L"iamos"); return true; } if (checkSuffix(RV, L"armos")) { CT = removeSuffix(CT, L"armos"); return true; } if (checkSuffix(RV, L"ermos")) { CT = removeSuffix(CT, L"ermos"); return true; } if (checkSuffix(RV, L"areis")) { CT = removeSuffix(CT, L"areis"); return true; } if (checkSuffix(RV, L"ereis")) { CT = removeSuffix(CT, L"ereis"); return true; } if (checkSuffix(RV, L"ireis")) { CT = removeSuffix(CT, L"ireis"); return true; } if (checkSuffix(RV, L"asses")) { CT = removeSuffix(CT, L"asses"); return true; } if (checkSuffix(RV, L"esses")) { CT = removeSuffix(CT, L"esses"); return true; } if (checkSuffix(RV, L"isses")) { CT = removeSuffix(CT, L"isses"); return true; } if (checkSuffix(RV, L"astes")) { CT = removeSuffix(CT, L"astes"); return true; } if (checkSuffix(RV, L"assem")) { CT = removeSuffix(CT, L"assem"); return true; } if (checkSuffix(RV, L"essem")) { CT = removeSuffix(CT, L"essem"); return true; } if (checkSuffix(RV, L"issem")) { CT = removeSuffix(CT, L"issem"); return true; } if (checkSuffix(RV, L"ardes")) { CT = removeSuffix(CT, L"ardes"); return true; } if (checkSuffix(RV, L"erdes")) { CT = removeSuffix(CT, L"erdes"); return true; } if (checkSuffix(RV, L"irdes")) { CT = removeSuffix(CT, L"irdes"); return true; } if (checkSuffix(RV, L"ariam")) { CT = removeSuffix(CT, L"ariam"); return true; } if (checkSuffix(RV, L"eriam")) { CT = removeSuffix(CT, L"eriam"); return true; } if (checkSuffix(RV, L"iriam")) { CT = removeSuffix(CT, L"iriam"); return true; } if (checkSuffix(RV, L"arias")) { CT = removeSuffix(CT, L"arias"); return true; } if (checkSuffix(RV, L"erias")) { CT = removeSuffix(CT, L"erias"); return true; } if (checkSuffix(RV, L"irias")) { CT = removeSuffix(CT, L"irias"); return true; } if (checkSuffix(RV, L"estes")) { CT = removeSuffix(CT, L"estes"); return true; } if (checkSuffix(RV, L"istes")) { CT = removeSuffix(CT, L"istes"); return true; } if (checkSuffix(RV, L"areis")) { CT = removeSuffix(CT, L"areis"); return true; } if (checkSuffix(RV, L"aveis")) { CT = removeSuffix(CT, L"aveis"); return true; } } // suffix length = 4 if (RV.length() >= 4) { if (checkSuffix(RV, L"aria")) { CT = removeSuffix(CT, L"aria"); return true; } if (checkSuffix(RV, L"eria")) { CT = removeSuffix(CT, L"eria"); return true; } if (checkSuffix(RV, L"iria")) { CT = removeSuffix(CT, L"iria"); return true; } if (checkSuffix(RV, L"asse")) { CT = removeSuffix(CT, L"asse"); return true; } if (checkSuffix(RV, L"esse")) { CT = removeSuffix(CT, L"esse"); return true; } if (checkSuffix(RV, L"isse")) { CT = removeSuffix(CT, L"isse"); return true; } if (checkSuffix(RV, L"aste")) { CT = removeSuffix(CT, L"aste"); return true; } if (checkSuffix(RV, L"este")) { CT = removeSuffix(CT, L"este"); return true; } if (checkSuffix(RV, L"iste")) { CT = removeSuffix(CT, L"iste"); return true; } if (checkSuffix(RV, L"arei")) { CT = removeSuffix(CT, L"arei"); return true; } if (checkSuffix(RV, L"erei")) { CT = removeSuffix(CT, L"erei"); return true; } if (checkSuffix(RV, L"irei")) { CT = removeSuffix(CT, L"irei"); return true; } if (checkSuffix(RV, L"aram")) { CT = removeSuffix(CT, L"aram"); return true; } if (checkSuffix(RV, L"eram")) { CT = removeSuffix(CT, L"eram"); return true; } if (checkSuffix(RV, L"iram")) { CT = removeSuffix(CT, L"iram"); return true; } if (checkSuffix(RV, L"avam")) { CT = removeSuffix(CT, L"avam"); return true; } if (checkSuffix(RV, L"arem")) { CT = removeSuffix(CT, L"arem"); return true; } if (checkSuffix(RV, L"erem")) { CT = removeSuffix(CT, L"erem"); return true; } if (checkSuffix(RV, L"irem")) { CT = removeSuffix(CT, L"irem"); return true; } if (checkSuffix(RV, L"ando")) { CT = removeSuffix(CT, L"ando"); return true; } if (checkSuffix(RV, L"endo")) { CT = removeSuffix(CT, L"endo"); return true; } if (checkSuffix(RV, L"indo")) { CT = removeSuffix(CT, L"indo"); return true; } if (checkSuffix(RV, L"arao")) { CT = removeSuffix(CT, L"arao"); return true; } if (checkSuffix(RV, L"erao")) { CT = removeSuffix(CT, L"erao"); return true; } if (checkSuffix(RV, L"irao")) { CT = removeSuffix(CT, L"irao"); return true; } if (checkSuffix(RV, L"adas")) { CT = removeSuffix(CT, L"adas"); return true; } if (checkSuffix(RV, L"idas")) { CT = removeSuffix(CT, L"idas"); return true; } if (checkSuffix(RV, L"aras")) { CT = removeSuffix(CT, L"aras"); return true; } if (checkSuffix(RV, L"eras")) { CT = removeSuffix(CT, L"eras"); return true; } if (checkSuffix(RV, L"iras")) { CT = removeSuffix(CT, L"iras"); return true; } if (checkSuffix(RV, L"avas")) { CT = removeSuffix(CT, L"avas"); return true; } if (checkSuffix(RV, L"ares")) { CT = removeSuffix(CT, L"ares"); return true; } if (checkSuffix(RV, L"eres")) { CT = removeSuffix(CT, L"eres"); return true; } if (checkSuffix(RV, L"ires")) { CT = removeSuffix(CT, L"ires"); return true; } if (checkSuffix(RV, L"ados")) { CT = removeSuffix(CT, L"ados"); return true; } if (checkSuffix(RV, L"idos")) { CT = removeSuffix(CT, L"idos"); return true; } if (checkSuffix(RV, L"amos")) { CT = removeSuffix(CT, L"amos"); return true; } if (checkSuffix(RV, L"emos")) { CT = removeSuffix(CT, L"emos"); return true; } if (checkSuffix(RV, L"imos")) { CT = removeSuffix(CT, L"imos"); return true; } if (checkSuffix(RV, L"iras")) { CT = removeSuffix(CT, L"iras"); return true; } if (checkSuffix(RV, L"ieis")) { CT = removeSuffix(CT, L"ieis"); return true; } } // suffix length = 3 if (RV.length() >= 3) { if (checkSuffix(RV, L"ada")) { CT = removeSuffix(CT, L"ada"); return true; } if (checkSuffix(RV, L"ida")) { CT = removeSuffix(CT, L"ida"); return true; } if (checkSuffix(RV, L"ara")) { CT = removeSuffix(CT, L"ara"); return true; } if (checkSuffix(RV, L"era")) { CT = removeSuffix(CT, L"era"); return true; } if (checkSuffix(RV, L"ira")) { CT = removeSuffix(CT, L"ava"); return true; } if (checkSuffix(RV, L"iam")) { CT = removeSuffix(CT, L"iam"); return true; } if (checkSuffix(RV, L"ado")) { CT = removeSuffix(CT, L"ado"); return true; } if (checkSuffix(RV, L"ido")) { CT = removeSuffix(CT, L"ido"); return true; } if (checkSuffix(RV, L"ias")) { CT = removeSuffix(CT, L"ias"); return true; } if (checkSuffix(RV, L"ais")) { CT = removeSuffix(CT, L"ais"); return true; } if (checkSuffix(RV, L"eis")) { CT = removeSuffix(CT, L"eis"); return true; } if (checkSuffix(RV, L"ira")) { CT = removeSuffix(CT, L"ira"); return true; } if (checkSuffix(RV, L"ear")) { CT = removeSuffix(CT, L"ear"); return true; } } // suffix length = 2 if (RV.length() >= 2) { if (checkSuffix(RV, L"ia")) { CT = removeSuffix(CT, L"ia"); return true; } if (checkSuffix(RV, L"ei")) { CT = removeSuffix(CT, L"ei"); return true; } if (checkSuffix(RV, L"am")) { CT = removeSuffix(CT, L"am"); return true; } if (checkSuffix(RV, L"em")) { CT = removeSuffix(CT, L"em"); return true; } if (checkSuffix(RV, L"ar")) { CT = removeSuffix(CT, L"ar"); return true; } if (checkSuffix(RV, L"er")) { CT = removeSuffix(CT, L"er"); return true; } if (checkSuffix(RV, L"ir")) { CT = removeSuffix(CT, L"ir"); return true; } if (checkSuffix(RV, L"as")) { CT = removeSuffix(CT, L"as"); return true; } if (checkSuffix(RV, L"es")) { CT = removeSuffix(CT, L"es"); return true; } if (checkSuffix(RV, L"is")) { CT = removeSuffix(CT, L"is"); return true; } if (checkSuffix(RV, L"eu")) { CT = removeSuffix(CT, L"eu"); return true; } if (checkSuffix(RV, L"iu")) { CT = removeSuffix(CT, L"iu"); return true; } if (checkSuffix(RV, L"iu")) { CT = removeSuffix(CT, L"iu"); return true; } if (checkSuffix(RV, L"ou")) { CT = removeSuffix(CT, L"ou"); return true; } } // no ending was removed by step2 return false; } void BrazilianStemmer::step3() { if (RV.empty()) { return; } if (checkSuffix(RV, L"i") && suffixPreceded(RV, L"i", L"c")) { CT = removeSuffix(CT, L"i"); } } void BrazilianStemmer::step4() { if (RV.empty()) { return; } if (checkSuffix(RV, L"os")) { CT = removeSuffix(CT, L"os"); return; } if (checkSuffix(RV, L"a")) { CT = removeSuffix(CT, L"a"); return; } if (checkSuffix(RV, L"i")) { CT = removeSuffix(CT, L"i"); return; } if (checkSuffix(RV, L"o")) { CT = removeSuffix(CT, L"o"); return; } } void BrazilianStemmer::step5() { if (RV.empty()) { return; } if (checkSuffix(RV, L"e")) { if (suffixPreceded(RV, L"e", L"gu")) { CT = removeSuffix(CT, L"e"); CT = removeSuffix(CT, L"u"); return; } if (suffixPreceded(RV, L"e", L"ci")) { CT = removeSuffix(CT, L"e"); CT = removeSuffix(CT, L"i"); return; } CT = removeSuffix(CT, L"e"); return; } } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/cjk/000077500000000000000000000000001456444476200254515ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/cjk/CJKAnalyzer.cpp000066400000000000000000000044471456444476200303030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "CJKAnalyzer.h" #include "CJKTokenizer.h" #include "StopFilter.h" namespace Lucene { const wchar_t* CJKAnalyzer::_STOP_WORDS[] = { L"a", L"and", L"are", L"as", L"at", L"be", L"but", L"by", L"for", L"if", L"in", L"into", L"is", L"it", L"no", L"not", L"of", L"on", L"or", L"s", L"such", L"t", L"that", L"the", L"their", L"then", L"there", L"these", L"they", L"this", L"to", L"was", L"will", L"with", L"", L"www" }; CJKAnalyzer::CJKAnalyzer(LuceneVersion::Version matchVersion) { this->stoptable = getDefaultStopSet(); this->matchVersion = matchVersion; } CJKAnalyzer::CJKAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stoptable = stopwords; this->matchVersion = matchVersion; } CJKAnalyzer::~CJKAnalyzer() { } const HashSet CJKAnalyzer::getDefaultStopSet() { static HashSet stopSet; LUCENE_RUN_ONCE( stopSet = HashSet::newInstance(_STOP_WORDS, _STOP_WORDS + SIZEOF_ARRAY(_STOP_WORDS)); ); return stopSet; } TokenStreamPtr CJKAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { return newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), newLucene(reader), stoptable); } TokenStreamPtr CJKAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { CJKAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(reader); streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->source, stoptable); setPreviousTokenStream(streams); } else { streams->source->reset(reader); } return streams->result; } CJKAnalyzerSavedStreams::~CJKAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/cjk/CJKTokenizer.cpp000066400000000000000000000166571456444476200304760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "CJKTokenizer.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "TypeAttribute.h" #include "Reader.h" #include "CharFolder.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { /// Word token type const int32_t CJKTokenizer::WORD_TYPE = 0; /// Single byte token type const int32_t CJKTokenizer::SINGLE_TOKEN_TYPE = 1; /// Double byte token type const int32_t CJKTokenizer::DOUBLE_TOKEN_TYPE = 2; /// Names for token types const wchar_t* CJKTokenizer::TOKEN_TYPE_NAMES[] = {L"word", L"single", L"double"}; const int32_t CJKTokenizer::MAX_WORD_LEN = 255; const int32_t CJKTokenizer::IO_BUFFER_SIZE = 256; CJKTokenizer::CJKTokenizer(const ReaderPtr& input) : Tokenizer(input) { } CJKTokenizer::CJKTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : Tokenizer(source, input) { } CJKTokenizer::CJKTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : Tokenizer(factory, input) { } CJKTokenizer::~CJKTokenizer() { } void CJKTokenizer::initialize() { offset = 0; bufferIndex = 0; dataLen = 0; buffer = CharArray::newInstance(MAX_WORD_LEN); ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); tokenType = WORD_TYPE; preIsTokened = false; termAtt = addAttribute(); offsetAtt = addAttribute(); typeAtt = addAttribute(); } CJKTokenizer::UnicodeBlock CJKTokenizer::unicodeBlock(wchar_t c) { if (c >= 0x0000 && c <= 0x007f) { return BASIC_LATIN; } else if (c >= 0xff00 && c <= 0xffef) { return HALFWIDTH_AND_FULLWIDTH_FORMS; } return NONE; } bool CJKTokenizer::incrementToken() { clearAttributes(); while (true) { // loop until we find a non-empty token int32_t length = 0; // the position used to create Token int32_t start = offset; while (true) { // loop until we've found a full token wchar_t c = 0; UnicodeBlock ub = NONE; ++offset; if (bufferIndex >= dataLen) { dataLen = input->read(ioBuffer.get(), 0, ioBuffer.size()); bufferIndex = 0; } if (dataLen == -1) { if (length > 0) { if (preIsTokened == true) { length = 0; preIsTokened = false; } else { --offset; } break; } else { --offset; return false; } } else { // get current character c = ioBuffer[bufferIndex++]; // get the UnicodeBlock of the current character ub = unicodeBlock(c); } // if the current character is ASCII or Extend ASCII if (ub == BASIC_LATIN || ub == HALFWIDTH_AND_FULLWIDTH_FORMS) { if (ub == HALFWIDTH_AND_FULLWIDTH_FORMS) { int32_t i = (int32_t)c; if (i >= 65281 && i <= 65374) { // convert certain HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN i = i - 65248; c = (wchar_t)i; } } // if the current character is a letter or "_" "+" "#" if (UnicodeUtil::isAlnum(c) || c == L'_' || c == L'+' || c == L'#') { if (length == 0) { // "javaC1C2C3C4linux"
// ^--: the current character begin to token the ASCII // letter start = offset - 1; } else if (tokenType == DOUBLE_TOKEN_TYPE) { // "javaC1C2C3C4linux"
// ^--: the previous non-ASCII // : the current character --offset; --bufferIndex; if (preIsTokened) { // there is only one non-ASCII has been stored length = 0; preIsTokened = false; break; } else { break; } } // store the LowerCase(c) in the buffer buffer[length++] = CharFolder::toLower(c); tokenType = SINGLE_TOKEN_TYPE; // break the procedure if buffer overflowed! if (length == MAX_WORD_LEN) { break; } } else if (length > 0) { if (preIsTokened) { length = 0; preIsTokened = false; } else { break; } } } else { // non-ASCII letter, e.g."C1C2C3C4" if (UnicodeUtil::isAlpha(c)) { if (length == 0) { start = offset - 1; buffer[length++] = c; tokenType = DOUBLE_TOKEN_TYPE; } else { if (tokenType == SINGLE_TOKEN_TYPE) { --offset; --bufferIndex; // return the previous ASCII characters break; } else { buffer[length++] = c; tokenType = DOUBLE_TOKEN_TYPE; if (length == 2) { --offset; --bufferIndex; preIsTokened = true; break; } } } } else if (length > 0) { if (preIsTokened) { // empty the buffer length = 0; preIsTokened = false; } else { break; } } } } if (length > 0) { termAtt->setTermBuffer(buffer.get(), 0, length); offsetAtt->setOffset(correctOffset(start), correctOffset(start + length)); typeAtt->setType(TOKEN_TYPE_NAMES[tokenType]); return true; } else if (dataLen == -1) { --offset; return false; } // Cycle back and try for the next token (don't return an empty string) } } void CJKTokenizer::end() { // set final offset int32_t finalOffset = correctOffset(offset); offsetAtt->setOffset(finalOffset, finalOffset); } void CJKTokenizer::reset() { Tokenizer::reset(); offset = 0; bufferIndex = 0; dataLen = 0; preIsTokened = false; tokenType = WORD_TYPE; } void CJKTokenizer::reset(const ReaderPtr& input) { Tokenizer::reset(input); reset(); } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/cn/000077500000000000000000000000001456444476200253025ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/cn/ChineseAnalyzer.cpp000066400000000000000000000025471456444476200311020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ChineseAnalyzer.h" #include "ChineseTokenizer.h" #include "ChineseFilter.h" namespace Lucene { ChineseAnalyzer::~ChineseAnalyzer() { } TokenStreamPtr ChineseAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { TokenStreamPtr result = newLucene(reader); result = newLucene(result); return result; } TokenStreamPtr ChineseAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { ChineseAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(reader); streams->result = newLucene(streams->source); setPreviousTokenStream(streams); } else { streams->source->reset(reader); } return streams->result; } ChineseAnalyzerSavedStreams::~ChineseAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp000066400000000000000000000033521456444476200305350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ChineseFilter.h" #include "TermAttribute.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { const wchar_t* ChineseFilter::STOP_WORDS[] = { L"and", L"are", L"as", L"at", L"be", L"but", L"by", L"for", L"if", L"in", L"into", L"is", L"it", L"no", L"not", L"of", L"on", L"or", L"such", L"that", L"the", L"their", L"then", L"there", L"these", L"they", L"this", L"to", L"was", L"will", L"with" }; ChineseFilter::ChineseFilter(const TokenStreamPtr& input) : TokenFilter(input) { stopTable = HashSet::newInstance(STOP_WORDS, STOP_WORDS + SIZEOF_ARRAY(STOP_WORDS)); termAtt = addAttribute(); } ChineseFilter::~ChineseFilter() { } bool ChineseFilter::incrementToken() { while (input->incrementToken()) { String text(termAtt->term()); if (!stopTable.contains(text)) { if (UnicodeUtil::isLower(text[0]) || UnicodeUtil::isUpper(text[0])) { // English word/token should larger than 1 character. if (text.length() > 1) { return true; } } else if (UnicodeUtil::isOther(text[0]) || UnicodeUtil::isDigit(text[0])) { // One Chinese character as one Chinese word. // Chinese word extraction to be added later here. return true; } } } return false; } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp000066400000000000000000000071271456444476200312660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ChineseTokenizer.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "Reader.h" #include "CharFolder.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { const int32_t ChineseTokenizer::MAX_WORD_LEN = 255; const int32_t ChineseTokenizer::IO_BUFFER_SIZE = 1024; ChineseTokenizer::ChineseTokenizer(const ReaderPtr& input) : Tokenizer(input) { } ChineseTokenizer::ChineseTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : Tokenizer(source, input) { } ChineseTokenizer::ChineseTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : Tokenizer(factory, input) { } ChineseTokenizer::~ChineseTokenizer() { } void ChineseTokenizer::initialize() { offset = 0; bufferIndex = 0; dataLen = 0; buffer = CharArray::newInstance(MAX_WORD_LEN); ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); length = 0; start = 0; termAtt = addAttribute(); offsetAtt = addAttribute(); } void ChineseTokenizer::push(wchar_t c) { if (length == 0) { start = offset - 1; // start of token } buffer[length++] = CharFolder::toLower(c); // buffer it } bool ChineseTokenizer::flush() { if (length > 0) { termAtt->setTermBuffer(buffer.get(), 0, length); offsetAtt->setOffset(correctOffset(start), correctOffset(start + length)); return true; } else { return false; } } bool ChineseTokenizer::incrementToken() { clearAttributes(); length = 0; start = offset; bool last_is_en = false, last_is_num = false; while (true) { wchar_t c; ++offset; if (bufferIndex >= dataLen) { dataLen = input->read(ioBuffer.get(), 0, ioBuffer.size()); bufferIndex = 0; } if (dataLen == -1) { --offset; return flush(); } else { c = ioBuffer[bufferIndex++]; } if (UnicodeUtil::isLower(c) || UnicodeUtil::isUpper(c)) { if (last_is_num) { --bufferIndex; --offset; return flush(); } push(c); if (length == MAX_WORD_LEN) { return flush(); } last_is_en = true; } else if (UnicodeUtil::isDigit(c)) { if (last_is_en) { --bufferIndex; --offset; return flush(); } push(c); if (length == MAX_WORD_LEN) { return flush(); } last_is_num = true; } else if (UnicodeUtil::isOther(c)) { if (length > 0) { --bufferIndex; --offset; return flush(); } push(c); return flush(); } else if (length > 0) { return flush(); } } } void ChineseTokenizer::end() { // set final offset int32_t finalOffset = correctOffset(offset); offsetAtt->setOffset(finalOffset, finalOffset); } void ChineseTokenizer::reset() { Tokenizer::reset(); offset = 0; bufferIndex = 0; dataLen = 0; } void ChineseTokenizer::reset(const ReaderPtr& input) { Tokenizer::reset(input); reset(); } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/cz/000077500000000000000000000000001456444476200253165ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/cz/CzechAnalyzer.cpp000066400000000000000000000173221456444476200305710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "CzechAnalyzer.h" #include "StandardTokenizer.h" #include "StandardFilter.h" #include "LowerCaseFilter.h" #include "StopFilter.h" #include "StringUtils.h" namespace Lucene { /// Default Czech stopwords in UTF-8 format. const uint8_t CzechAnalyzer::_CZECH_STOP_WORDS[] = { 0x61, 0x0a, 0x73, 0x0a, 0x6b, 0x0a, 0x6f, 0x0a, 0x69, 0x0a, 0x75, 0x0a, 0x76, 0x0a, 0x7a, 0x0a, 0x64, 0x6e, 0x65, 0x73, 0x0a, 0x63, 0x7a, 0x0a, 0x74, 0xc3, 0xad, 0x6d, 0x74, 0x6f, 0x0a, 0x62, 0x75, 0x64, 0x65, 0xc5, 0xa1, 0x0a, 0x62, 0x75, 0x64, 0x65, 0x6d, 0x0a, 0x62, 0x79, 0x6c, 0x69, 0x0a, 0x6a, 0x73, 0x65, 0xc5, 0xa1, 0x0a, 0x6d, 0x75, 0x6a, 0x0a, 0x73, 0x76, 0xc3, 0xbd, 0x6d, 0x0a, 0x74, 0x61, 0x0a, 0x74, 0x6f, 0x6d, 0x74, 0x6f, 0x0a, 0x74, 0x6f, 0x68, 0x6c, 0x65, 0x0a, 0x74, 0x75, 0x74, 0x6f, 0x0a, 0x74, 0x79, 0x74, 0x6f, 0x0a, 0x6a, 0x65, 0x6a, 0x0a, 0x7a, 0x64, 0x61, 0x0a, 0x70, 0x72, 0x6f, 0x63, 0x0a, 0x6d, 0xc3, 0xa1, 0x74, 0x65, 0x0a, 0x74, 0x61, 0x74, 0x6f, 0x0a, 0x6b, 0x61, 0x6d, 0x0a, 0x74, 0x6f, 0x68, 0x6f, 0x74, 0x6f, 0x0a, 0x6b, 0x64, 0x6f, 0x0a, 0x6b, 0x74, 0x65, 0x72, 0xc3, 0xad, 0x0a, 0x6d, 0x69, 0x0a, 0x6e, 0xc3, 0xa1, 0x6d, 0x0a, 0x74, 0x6f, 0x6d, 0x0a, 0x74, 0x6f, 0x6d, 0x75, 0x74, 0x6f, 0x0a, 0x6d, 0xc3, 0xad, 0x74, 0x0a, 0x6e, 0x69, 0x63, 0x0a, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x0a, 0x6b, 0x74, 0x65, 0x72, 0x6f, 0x75, 0x0a, 0x62, 0x79, 0x6c, 0x61, 0x0a, 0x74, 0x6f, 0x68, 0x6f, 0x0a, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0xc5, 0xbe, 0x65, 0x0a, 0x61, 0x73, 0x69, 0x0a, 0x68, 0x6f, 0x0a, 0x6e, 0x61, 0xc5, 0xa1, 0x69, 0x0a, 0x6e, 0x61, 0x70, 0x69, 0xc5, 0xa1, 0x74, 0x65, 0x0a, 0x72, 0x65, 0x0a, 0x63, 0x6f, 0xc5, 0xbe, 0x0a, 0x74, 0xc3, 0xad, 0x6d, 0x0a, 0x74, 0x61, 0x6b, 0xc5, 0xbe, 0x65, 0x0a, 0x73, 0x76, 0xc3, 0xbd, 0x63, 0x68, 0x0a, 0x6a, 0x65, 0x6a, 0xc3, 0xad, 0x0a, 0x73, 0x76, 0xc3, 0xbd, 0x6d, 0x69, 0x0a, 0x6a, 0x73, 0x74, 0x65, 0x0a, 0x61, 0x6a, 0x0a, 0x74, 0x75, 0x0a, 0x74, 0x65, 0x64, 0x79, 0x0a, 0x74, 0x65, 0x74, 0x6f, 0x0a, 0x62, 0x79, 0x6c, 0x6f, 0x0a, 0x6b, 0x64, 0x65, 0x0a, 0x6b, 0x65, 0x0a, 0x70, 0x72, 0x61, 0x76, 0xc3, 0xa9, 0x0a, 0x6a, 0x69, 0x0a, 0x6e, 0x61, 0x64, 0x0a, 0x6e, 0x65, 0x6a, 0x73, 0x6f, 0x75, 0x0a, 0x63, 0x69, 0x0a, 0x70, 0x6f, 0x64, 0x0a, 0x74, 0xc3, 0xa9, 0x6d, 0x61, 0x0a, 0x6d, 0x65, 0x7a, 0x69, 0x0a, 0x70, 0x72, 0x65, 0x73, 0x0a, 0x74, 0x79, 0x0a, 0x70, 0x61, 0x6b, 0x0a, 0x76, 0xc3, 0xa1, 0x6d, 0x0a, 0x61, 0x6e, 0x69, 0x0a, 0x6b, 0x64, 0x79, 0xc5, 0xbe, 0x0a, 0x76, 0xc5, 0xa1, 0x61, 0x6b, 0x0a, 0x6e, 0x65, 0x67, 0x0a, 0x6a, 0x73, 0x65, 0x6d, 0x0a, 0x74, 0x65, 0x6e, 0x74, 0x6f, 0x0a, 0x63, 0x6c, 0xc3, 0xa1, 0x6e, 0x6b, 0x75, 0x0a, 0x63, 0x6c, 0xc3, 0xa1, 0x6e, 0x6b, 0x79, 0x0a, 0x61, 0x62, 0x79, 0x0a, 0x6a, 0x73, 0x6d, 0x65, 0x0a, 0x70, 0x72, 0x65, 0x64, 0x0a, 0x70, 0x74, 0x61, 0x0a, 0x6a, 0x65, 0x6a, 0x69, 0x63, 0x68, 0x0a, 0x62, 0x79, 0x6c, 0x0a, 0x6a, 0x65, 0xc5, 0xa1, 0x74, 0x65, 0x0a, 0x61, 0xc5, 0xbe, 0x0a, 0x62, 0x65, 0x7a, 0x0a, 0x74, 0x61, 0x6b, 0xc3, 0xa9, 0x0a, 0x70, 0x6f, 0x75, 0x7a, 0x65, 0x0a, 0x70, 0x72, 0x76, 0x6e, 0xc3, 0xad, 0x0a, 0x76, 0x61, 0xc5, 0xa1, 0x65, 0x0a, 0x6b, 0x74, 0x65, 0x72, 0xc3, 0xa1, 0x0a, 0x6e, 0xc3, 0xa1, 0x73, 0x0a, 0x6e, 0x6f, 0x76, 0xc3, 0xbd, 0x0a, 0x74, 0x69, 0x70, 0x79, 0x0a, 0x70, 0x6f, 0x6b, 0x75, 0x64, 0x0a, 0x6d, 0x75, 0xc5, 0xbe, 0x65, 0x0a, 0x73, 0x74, 0x72, 0x61, 0x6e, 0x61, 0x0a, 0x6a, 0x65, 0x68, 0x6f, 0x0a, 0x73, 0x76, 0xc3, 0xa9, 0x0a, 0x6a, 0x69, 0x6e, 0xc3, 0xa9, 0x0a, 0x7a, 0x70, 0x72, 0xc3, 0xa1, 0x76, 0x79, 0x0a, 0x6e, 0x6f, 0x76, 0xc3, 0xa9, 0x0a, 0x6e, 0x65, 0x6e, 0xc3, 0xad, 0x0a, 0x76, 0xc3, 0xa1, 0x73, 0x0a, 0x6a, 0x65, 0x6e, 0x0a, 0x70, 0x6f, 0x64, 0x6c, 0x65, 0x0a, 0x7a, 0x64, 0x65, 0x0a, 0x75, 0xc5, 0xbe, 0x0a, 0x62, 0xc3, 0xbd, 0x74, 0x0a, 0x76, 0xc3, 0xad, 0x63, 0x65, 0x0a, 0x62, 0x75, 0x64, 0x65, 0x0a, 0x6a, 0x69, 0xc5, 0xbe, 0x0a, 0x6e, 0x65, 0xc5, 0xbe, 0x0a, 0x6b, 0x74, 0x65, 0x72, 0xc3, 0xbd, 0x0a, 0x62, 0x79, 0x0a, 0x6b, 0x74, 0x65, 0x72, 0xc3, 0xa9, 0x0a, 0x63, 0x6f, 0x0a, 0x6e, 0x65, 0x62, 0x6f, 0x0a, 0x74, 0x65, 0x6e, 0x0a, 0x74, 0x61, 0x6b, 0x0a, 0x6d, 0xc3, 0xa1, 0x0a, 0x70, 0x72, 0x69, 0x0a, 0x6f, 0x64, 0x0a, 0x70, 0x6f, 0x0a, 0x6a, 0x73, 0x6f, 0x75, 0x0a, 0x6a, 0x61, 0x6b, 0x0a, 0x64, 0x61, 0x6c, 0xc5, 0xa1, 0xc3, 0xad, 0x0a, 0x61, 0x6c, 0x65, 0x0a, 0x73, 0x69, 0x0a, 0x73, 0x65, 0x0a, 0x76, 0x65, 0x0a, 0x74, 0x6f, 0x0a, 0x6a, 0x61, 0x6b, 0x6f, 0x0a, 0x7a, 0x61, 0x0a, 0x7a, 0x70, 0x65, 0x74, 0x0a, 0x7a, 0x65, 0x0a, 0x64, 0x6f, 0x0a, 0x70, 0x72, 0x6f, 0x0a, 0x6a, 0x65, 0x0a, 0x6e, 0x61, 0x0a, 0x61, 0x74, 0x64, 0x0a, 0x61, 0x74, 0x70, 0x0a, 0x6a, 0x61, 0x6b, 0x6d, 0x69, 0x6c, 0x65, 0x0a, 0x70, 0x72, 0x69, 0x63, 0x65, 0x6d, 0xc5, 0xbe, 0x0a, 0x6a, 0xc3, 0xa1, 0x0a, 0x6f, 0x6e, 0x0a, 0x6f, 0x6e, 0x61, 0x0a, 0x6f, 0x6e, 0x6f, 0x0a, 0x6f, 0x6e, 0x69, 0x0a, 0x6f, 0x6e, 0x79, 0x0a, 0x6d, 0x79, 0x0a, 0x76, 0x79, 0x0a, 0x6a, 0xc3, 0xad, 0x0a, 0x6a, 0x69, 0x0a, 0x6d, 0x65, 0x0a, 0x6d, 0x6e, 0x65, 0x0a, 0x6a, 0x65, 0x6d, 0x75, 0x0a, 0x74, 0x6f, 0x6d, 0x75, 0x0a, 0x74, 0x65, 0x6d, 0x0a, 0x74, 0x65, 0x6d, 0x75, 0x0a, 0x6e, 0x65, 0x6d, 0x75, 0x0a, 0x6e, 0x65, 0x6d, 0x75, 0xc5, 0xbe, 0x0a, 0x6a, 0x65, 0x68, 0x6f, 0xc5, 0xbe, 0x0a, 0x6a, 0xc3, 0xad, 0xc5, 0xbe, 0x0a, 0x6a, 0x65, 0x6c, 0x69, 0x6b, 0x6f, 0xc5, 0xbe, 0x0a, 0x6a, 0x65, 0xc5, 0xbe, 0x0a, 0x6a, 0x61, 0x6b, 0x6f, 0xc5, 0xbe, 0x0a, 0x6e, 0x61, 0x63, 0x65, 0xc5, 0xbe, 0x0a }; CzechAnalyzer::CzechAnalyzer(LuceneVersion::Version matchVersion) { this->stoptable = getDefaultStopSet(); this->matchVersion = matchVersion; } CzechAnalyzer::CzechAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stoptable = stopwords; this->matchVersion = matchVersion; } CzechAnalyzer::~CzechAnalyzer() { } const HashSet CzechAnalyzer::getDefaultStopSet() { static HashSet stopSet; LUCENE_RUN_ONCE( String stopWords(UTF8_TO_STRING(_CZECH_STOP_WORDS)); Collection words(StringUtils::split(stopWords, L"\n")); stopSet = HashSet::newInstance(words.begin(), words.end()); ); return stopSet; } TokenStreamPtr CzechAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { TokenStreamPtr result = newLucene(matchVersion, reader); result = newLucene(result); result = newLucene(result); result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); return result; } TokenStreamPtr CzechAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { CzechAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(matchVersion, reader); streams->result = newLucene(streams->source); streams->result = newLucene(streams->result); streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); setPreviousTokenStream(streams); } else { streams->source->reset(reader); } return streams->result; } CzechAnalyzerSavedStreams::~CzechAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/de/000077500000000000000000000000001456444476200252725ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/de/GermanAnalyzer.cpp000066400000000000000000000067331456444476200307260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "GermanAnalyzer.h" #include "StandardTokenizer.h" #include "StandardFilter.h" #include "LowerCaseFilter.h" #include "StopFilter.h" #include "GermanStemFilter.h" namespace Lucene { const wchar_t* GermanAnalyzer::_GERMAN_STOP_WORDS[] = { L"einer", L"eine", L"eines", L"einem", L"einen", L"der", L"die", L"das", L"dass", L"da\x00df", L"du", L"er", L"sie", L"es", L"was", L"wer", L"wie", L"wir", L"und", L"oder", L"ohne", L"mit", L"am", L"im", L"in", L"aus", L"auf", L"ist", L"sein", L"war", L"wird", L"ihr", L"ihre", L"ihres", L"als", L"f\x00fcr", L"von", L"mit", L"dich", L"dir", L"mich", L"mir", L"mein", L"sein", L"kein", L"durch", L"wegen", L"wird" }; GermanAnalyzer::GermanAnalyzer(LuceneVersion::Version matchVersion) { this->stopSet = getDefaultStopSet(); this->matchVersion = matchVersion; } GermanAnalyzer::GermanAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stopSet = stopwords; this->matchVersion = matchVersion; } GermanAnalyzer::GermanAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions) { this->stopSet = stopwords; this->exclusionSet = exclusions; this->matchVersion = matchVersion; } GermanAnalyzer::~GermanAnalyzer() { } const HashSet GermanAnalyzer::getDefaultStopSet() { static HashSet stopSet; LUCENE_RUN_ONCE( stopSet = HashSet::newInstance(_GERMAN_STOP_WORDS, _GERMAN_STOP_WORDS + SIZEOF_ARRAY(_GERMAN_STOP_WORDS)); ); return stopSet; } void GermanAnalyzer::setStemExclusionTable(HashSet exclusions) { exclusionSet = exclusions; setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created } TokenStreamPtr GermanAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { TokenStreamPtr result = newLucene(matchVersion, reader); result = newLucene(result); result = newLucene(result); result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); result = newLucene(result, exclusionSet); return result; } TokenStreamPtr GermanAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { GermanAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(matchVersion, reader); streams->result = newLucene(streams->source); streams->result = newLucene(streams->result); streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); streams->result = newLucene(streams->result, exclusionSet); setPreviousTokenStream(streams); } else { streams->source->reset(reader); } return streams->result; } GermanAnalyzerSavedStreams::~GermanAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/de/GermanStemFilter.cpp000066400000000000000000000032241456444476200312070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "GermanStemFilter.h" #include "GermanStemmer.h" #include "TermAttribute.h" namespace Lucene { GermanStemFilter::GermanStemFilter(const TokenStreamPtr& input) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); } GermanStemFilter::GermanStemFilter(const TokenStreamPtr& input, HashSet exclusionSet) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); this->exclusionSet = exclusionSet; } GermanStemFilter::~GermanStemFilter() { } bool GermanStemFilter::incrementToken() { if (input->incrementToken()) { String term(termAtt->term()); // Check the exclusion table. if (!exclusionSet || !exclusionSet.contains(term)) { String s(stemmer->stem(term)); // If not stemmed, don't waste the time adjusting the token. if (!s.empty() && s != term) { termAtt->setTermBuffer(s); } } return true; } else { return false; } } void GermanStemFilter::setStemmer(const GermanStemmerPtr& stemmer) { if (stemmer) { this->stemmer = stemmer; } } void GermanStemFilter::setExclusionSet(HashSet exclusionSet) { this->exclusionSet = exclusionSet; } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/de/GermanStemmer.cpp000066400000000000000000000134241456444476200305500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include #include "GermanStemmer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { GermanStemmer::GermanStemmer() { substCount = 0; } GermanStemmer::~GermanStemmer() { } String GermanStemmer::stem(const String& term) { // Use lowercase for medium stemming. buffer = StringUtils::toLower(term); if (!isStemmable()) { return buffer; } // Stemming starts here substitute(); strip(); optimize(); resubstitute(); removeParticleDenotion(); return buffer; } bool GermanStemmer::isStemmable() { for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) { if (!UnicodeUtil::isAlpha(buffer[c])) { return false; } } return true; } void GermanStemmer::strip() { bool doMore = true; while (doMore && buffer.length() > 3) { if (buffer.length() + substCount > 5 && boost::ends_with(buffer, L"nd")) { buffer.resize(buffer.length() - 2); } else if (buffer.length() + substCount > 4 && boost::ends_with(buffer, L"em")) { buffer.resize(buffer.length() - 2); } else if (buffer.length() + substCount > 4 && boost::ends_with(buffer, L"er")) { buffer.resize(buffer.length() - 2); } else if (buffer[buffer.length() - 1] == L'e') { buffer.resize(buffer.length() - 1); } else if (buffer[buffer.length() - 1] == L's') { buffer.resize(buffer.length() - 1); } else if (buffer[buffer.length() - 1] == L'n') { buffer.resize(buffer.length() - 1); } // "t" occurs only as suffix of verbs. else if (buffer[buffer.length() - 1] == L't') { buffer.resize(buffer.length() - 1); } else { doMore = false; } } } void GermanStemmer::optimize() { // Additional step for female plurals of professions and inhabitants. if (buffer.length() > 5 && boost::ends_with(buffer, L"erin*")) { buffer.resize(buffer.length() - 1); strip(); } // Additional step for irregular plural nouns like "Matrizen -> Matrix". if (buffer[buffer.length() - 1] == L'z') { buffer[buffer.length() - 1] = L'x'; } } void GermanStemmer::removeParticleDenotion() { if (buffer.length() > 4) { for (int32_t c = 0; c < (int32_t)buffer.length() - 3; ++c) { if (buffer.substr(c, 4) == L"gege") { buffer.erase(c, 2); return; } } } } void GermanStemmer::substitute() { substCount = 0; for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) { // Replace the second char of a pair of the equal characters with an asterisk if (c > 0 && buffer[c] == buffer[c - 1]) { buffer[c] = L'*'; } // Substitute Umlauts. else if (buffer[c] == L'\x00e4') { buffer[c] = L'a'; } else if (buffer[c] == L'\x00f6') { buffer[c] = L'o'; } else if (buffer[c] == L'\x00fc') { buffer[c] = L'u'; } // Fix bug so that 'ß' at the end of a word is replaced. else if (buffer[c] == L'\x00df') { buffer[c] = L's'; buffer.insert(c + 1, 1, L's'); ++substCount; } // Take care that at least one character is left left side from the current one if (c < (int32_t)buffer.length() - 1) { // Masking several common character combinations with an token if (c < (int32_t)buffer.length() - 2 && buffer[c] == L's' && buffer[c + 1] == L'c' && buffer[c + 2] == L'h') { buffer[c] = L'$'; buffer.erase(c + 1, 2); substCount += 2; } else if (buffer[c] == L'c' && buffer[c + 1] == L'h') { buffer[c] = L'\x00a7'; buffer.erase(c + 1, 1); ++substCount; } else if (buffer[c] == L'e' && buffer[c + 1] == L'i') { buffer[c] = L'%'; buffer.erase(c + 1, 1); ++substCount; } else if (buffer[c] == L'i' && buffer[c + 1] == L'e') { buffer[c] = L'&'; buffer.erase(c + 1, 1); ++substCount; } else if (buffer[c] == L'i' && buffer[c + 1] == L'g') { buffer[c] = L'#'; buffer.erase(c + 1, 1); ++substCount; } else if (buffer[c] == L's' && buffer[c + 1] == L't') { buffer[c] = L'!'; buffer.erase(c + 1, 1); ++substCount; } } } } void GermanStemmer::resubstitute() { for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) { if (buffer[c] == L'*') { buffer[c] = buffer[c - 1]; } else if (buffer[c] == L'$') { buffer[c] = L's'; buffer.insert(c + 1, L"ch"); } else if (buffer[c] == L'\x00a7') { buffer[c] = L'c'; buffer.insert(c + 1, 1, L'h'); } else if (buffer[c] == L'%') { buffer[c] = L'e'; buffer.insert(c + 1, 1, L'i'); } else if (buffer[c] == L'&') { buffer[c] = L'i'; buffer.insert(c + 1, 1, L'e'); } else if (buffer[c] == L'#') { buffer[c] = L'i'; buffer.insert(c + 1, 1, L'g'); } else if (buffer[c] == L'!') { buffer[c] = L's'; buffer.insert(c + 1, 1, L't'); } } } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/el/000077500000000000000000000000001456444476200253025ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/el/GreekAnalyzer.cpp000066400000000000000000000146431456444476200305610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "GreekAnalyzer.h" #include "StandardTokenizer.h" #include "GreekLowerCaseFilter.h" #include "StopFilter.h" #include "StringUtils.h" namespace Lucene { /// Default Greek stopwords in UTF-8 format. const uint8_t GreekAnalyzer::_GREEK_STOP_WORDS[] = { 0xce, 0xbf, 0x0a, 0xce, 0xb7, 0x0a, 0xcf, 0x84, 0xce, 0xbf, 0x0a, 0xce, 0xbf, 0xce, 0xb9, 0x0a, 0xcf, 0x84, 0xce, 0xb1, 0x0a, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x85, 0x0a, 0xcf, 0x84, 0xce, 0xb7, 0xcf, 0x83, 0x0a, 0xcf, 0x84, 0xcf, 0x89, 0xce, 0xbd, 0x0a, 0xcf, 0x84, 0xce, 0xbf, 0xce, 0xbd, 0x0a, 0xcf, 0x84, 0xce, 0xb7, 0xce, 0xbd, 0x0a, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x0a, 0xce, 0xba, 0xce, 0xb9, 0x0a, 0xce, 0xba, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbc, 0xce, 0xb1, 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83, 0xce, 0xb1, 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xb1, 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbc, 0xce, 0xb1, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xb5, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xb5, 0x0a, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xbf, 0x0a, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xbf, 0xce, 0xbd, 0x0a, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xb7, 0x0a, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xb7, 0xce, 0xbd, 0x0a, 0xce, 0xbc, 0xce, 0xb1, 0x0a, 0xce, 0xb1, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0x0a, 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbf, 0x0a, 0xce, 0xb3, 0xce, 0xb9, 0xce, 0xb1, 0x0a, 0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x83, 0x0a, 0xce, 0xbc, 0xce, 0xb5, 0x0a, 0xcf, 0x83, 0xce, 0xb5, 0x0a, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xcf, 0x80, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0x0a, 0xce, 0xb1, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb9, 0x0a, 0xce, 0xba, 0xce, 0xb1, 0xcf, 0x84, 0xce, 0xb1, 0x0a, 0xce, 0xbc, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb1, 0x0a, 0xce, 0xb8, 0xce, 0xb1, 0x0a, 0xce, 0xbd, 0xce, 0xb1, 0x0a, 0xce, 0xb4, 0xce, 0xb5, 0x0a, 0xce, 0xb4, 0xce, 0xb5, 0xce, 0xbd, 0x0a, 0xce, 0xbc, 0xce, 0xb7, 0x0a, 0xce, 0xbc, 0xce, 0xb7, 0xce, 0xbd, 0x0a, 0xce, 0xb5, 0xcf, 0x80, 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xbd, 0xcf, 0x89, 0x0a, 0xce, 0xb5, 0xce, 0xb1, 0xce, 0xbd, 0x0a, 0xce, 0xb1, 0xce, 0xbd, 0x0a, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x84, 0xce, 0xb5, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xcf, 0x85, 0x0a, 0xcf, 0x80, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xcf, 0x83, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xb1, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xce, 0xb9, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xb5, 0xcf, 0x83, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xcf, 0x89, 0xce, 0xbd, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x83, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xb7, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xbf, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xbf, 0xce, 0xb9, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xcf, 0x89, 0xce, 0xbd, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xb5, 0xcf, 0x83, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xb1, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xbf, 0xcf, 0x83, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xb7, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xbf, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xbf, 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xb5, 0xcf, 0x83, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xb1, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xcf, 0x89, 0xce, 0xbd, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0x0a, 0xce, 0xbf, 0xcf, 0x80, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xce, 0xbf, 0xce, 0xbc, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xce, 0xb9, 0xcf, 0x83, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xce, 0xbf, 0xcf, 0x83, 0xce, 0xbf, 0x0a, 0xce, 0xbf, 0xcf, 0x84, 0xce, 0xb9, 0x0a }; GreekAnalyzer::GreekAnalyzer(LuceneVersion::Version matchVersion) { this->stopSet = getDefaultStopSet(); this->matchVersion = matchVersion; } GreekAnalyzer::GreekAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stopSet = stopwords; this->matchVersion = matchVersion; } GreekAnalyzer::~GreekAnalyzer() { } const HashSet GreekAnalyzer::getDefaultStopSet() { static HashSet stopSet; LUCENE_RUN_ONCE( String stopWords(UTF8_TO_STRING(_GREEK_STOP_WORDS)); Collection words(StringUtils::split(stopWords, L"\n")); stopSet = HashSet::newInstance(words.begin(), words.end()); ); return stopSet; } TokenStreamPtr GreekAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { TokenStreamPtr result = newLucene(matchVersion, reader); result = newLucene(result); result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); return result; } TokenStreamPtr GreekAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { GreekAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(matchVersion, reader); streams->result = newLucene(streams->source); streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); setPreviousTokenStream(streams); } else { streams->source->reset(reader); } return streams->result; } GreekAnalyzerSavedStreams::~GreekAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/el/GreekLowerCaseFilter.cpp000066400000000000000000000054751456444476200320310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "GreekLowerCaseFilter.h" #include "TermAttribute.h" #include "CharFolder.h" namespace Lucene { GreekLowerCaseFilter::GreekLowerCaseFilter(const TokenStreamPtr& input) : TokenFilter(input) { termAtt = addAttribute(); } GreekLowerCaseFilter::~GreekLowerCaseFilter() { } bool GreekLowerCaseFilter::incrementToken() { if (input->incrementToken()) { wchar_t* chArray = termAtt->termBufferArray(); int32_t chLen = termAtt->termLength(); for (int32_t i = 0; i < chLen; ++i) { chArray[i] = lowerCase(chArray[i]); } return true; } else { return false; } } wchar_t GreekLowerCaseFilter::lowerCase(wchar_t codepoint) { switch (codepoint) { case L'\x03c2': // small final sigma return 0x03c3; // small sigma // Some Greek characters contain diacritics. // This filter removes these, converting to the lowercase base form. case L'\x0386': // capital alpha with tonos case L'\x03ac': // small alpha with tonos return L'\x03b1'; // small alpha case L'\x0388': // capital epsilon with tonos case L'\x03ad': // small epsilon with tonos return L'\x03b5'; // small epsilon case L'\x0389': // capital eta with tonos case L'\x03ae': // small eta with tonos return L'\x03b7'; // small eta case L'\x038a': // capital iota with tonos case L'\x03aa': // capital iota with dialytika case L'\x03af': // small iota with tonos case L'\x03ca': // small iota with dialytika case L'\x0390': // small iota with dialytika and tonos return L'\x03b9'; // small iota case L'\x038e': // capital upsilon with tonos case L'\x03ab': // capital upsilon with dialytika case L'\x03cd': // small upsilon with tonos case L'\x03cb': // small upsilon with dialytika case L'\x03b0': // small upsilon with dialytika and tonos return L'\x03c5'; // small upsilon case L'\x038c': // capital omicron with tonos case L'\x03cc': // small omicron with tonos return L'\x03bf'; // small omicron case L'\x038f': // capital omega with tonos case L'\x03ce': // small omega with tonos return L'\x03c9'; // small omega // The previous implementation did the conversion below. // Only implemented for backwards compatibility with old indexes. case L'\x03a2': // reserved return L'\x03c2'; // small final sigma default: return CharFolder::toLower(codepoint); } } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/fa/000077500000000000000000000000001456444476200252705ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/fa/PersianAnalyzer.cpp000066400000000000000000000470271456444476200311150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "PersianAnalyzer.h" #include "ArabicLetterTokenizer.h" #include "ArabicNormalizationFilter.h" #include "LowerCaseFilter.h" #include "StopFilter.h" #include "PersianNormalizationFilter.h" #include "StringUtils.h" namespace Lucene { /// Default Persian stopwords in UTF-8 format. /// /// Generated from http://members.unine.ch/jacques.savoy/clef/index.html /// The stopword list is BSD-Licensed. const uint8_t PersianAnalyzer::DEFAULT_STOPWORD_FILE[] = { 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x86, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xb3, 0xd8, 0xb1, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xb1, 0x0a, 0xd8, 0xae, 0xd9, 0x8a, 0xd8, 0xa7, 0xd9, 0x87, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd8, 0xaa, 0xd8, 0xa7, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb4, 0xd8, 0xaa, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xaf, 0xd9, 0x88, 0xd9, 0x85, 0x0a, 0xd9, 0xbe, 0xd8, 0xb3, 0x0a, 0xd9, 0x86, 0xd8, 0xa7, 0xd8, 0xb4, 0xd9, 0x8a, 0x0a, 0xd9, 0x88, 0xda, 0xaf, 0xd9, 0x88, 0x0a, 0xd9, 0x8a, 0xd8, 0xa7, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb3, 0xd9, 0xbe, 0xd8, 0xb3, 0x0a, 0xd9, 0x87, 0xd9, 0x86, 0xda, 0xaf, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd9, 0x87, 0xd8, 0xb1, 0xda, 0xaf, 0xd8, 0xb2, 0x0a, 0xd9, 0xbe, 0xd9, 0x86, 0xd8, 0xac, 0x0a, 0xd9, 0x86, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x84, 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xda, 0xaf, 0xd8, 0xb1, 0x0a, 0xda, 0xaf, 0xd8, 0xb1, 0xd9, 0x88, 0xd9, 0x87, 0xd9, 0x8a, 0x0a, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xda, 0x86, 0xd8, 0xb7, 0xd9, 0x88, 0xd8, 0xb1, 0x0a, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x88, 0x0a, 0xd8, 0xaf, 0xd9, 0x88, 0x0a, 0xd9, 0x86, 0xd8, 0xae, 0xd8, 0xb3, 0xd8, 0xaa, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x84, 0xd9, 0x8a, 0x0a, 0xda, 0x86, 0xd8, 0xb1, 0xd8, 0xa7, 0x0a, 0xda, 0x86, 0xd9, 0x87, 0x0a, 0xd9, 0x88, 0xd8, 0xb3, 0xd8, 0xb7, 0x0a, 0xd9, 0x87, 0x0a, 0xd9, 0x83, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd9, 0x82, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x84, 0x0a, 0xd9, 0x8a, 0xd9, 0x83, 0x0a, 0xd8, 0xb1, 0xd9, 0x81, 0xd8, 0xaa, 0x0a, 0xd9, 0x87, 0xd9, 0x81, 0xd8, 0xaa, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xda, 0x86, 0xd9, 0x86, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xaf, 0xd8, 0xb1, 0x0a, 0xd9, 0x87, 0xd8, 0xb2, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd8, 0xa8, 0xd9, 0x84, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd9, 0x84, 0xd9, 0x8a, 0x0a, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xb4, 0xd9, 0x86, 0xd8, 0xa7, 0xd8, 0xb3, 0xd9, 0x8a, 0x0a, 0xda, 0xaf, 0xd8, 0xb1, 0xd9, 0x81, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xaf, 0xd9, 0x87, 0xd8, 0xaf, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x86, 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xd9, 0x85, 0xd9, 0x8a, 0xd9, 0x84, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd9, 0x88, 0xd9, 0x82, 0xd8, 0xaa, 0xd9, 0x8a, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xaf, 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x0a, 0xd8, 0xac, 0xd8, 0xb2, 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd9, 0x84, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xd8, 0xae, 0xd8, 0xaf, 0xd9, 0x85, 0xd8, 0xa7, 0xd8, 0xaa, 0x0a, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xae, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd8, 0xb3, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xac, 0xd9, 0x84, 0xd9, 0x88, 0xda, 0xaf, 0xd9, 0x8a, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xad, 0xd9, 0x82, 0x0a, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd9, 0x86, 0xd9, 0x88, 0xd8, 0xb9, 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x86, 0xd8, 0xb8, 0xd9, 0x8a, 0xd8, 0xb1, 0x0a, 0xd9, 0x86, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x86, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd9, 0x87, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd8, 0xac, 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x8a, 0x0a, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf, 0x0a, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x84, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd8, 0xb3, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x82, 0x0a, 0xd9, 0x87, 0xd9, 0x8a, 0xda, 0x86, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xac, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x85, 0xd8, 0xaa, 0xd8, 0xb1, 0x0a, 0xd9, 0x83, 0xd8, 0xac, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xda, 0xaf, 0xd8, 0xb1, 0xd8, 0xaf, 0xd8, 0xaf, 0x0a, 0xd9, 0x83, 0xd8, 0xb3, 0xd9, 0x8a, 0x0a, 0xd8, 0xaa, 0xd8, 0xb1, 0x0a, 0xd9, 0x85, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x85, 0x0a, 0xd8, 0xaa, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb3, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xac, 0xd8, 0xaf, 0xd8, 0xa7, 0x0a, 0xd9, 0x86, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd9, 0x85, 0xda, 0xaf, 0xd8, 0xb1, 0x0a, 0xd9, 0x8a, 0xd9, 0x83, 0xd8, 0xaf, 0xd9, 0x8a, 0xda, 0xaf, 0xd8, 0xb1, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd8, 0xaf, 0xd9, 0x87, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, 0x86, 0xd8, 0xa7, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd9, 0x87, 0xd9, 0x86, 0xda, 0xaf, 0xd8, 0xa7, 0xd9, 0x85, 0xd9, 0x8a, 0x0a, 0xd8, 0xb3, 0xd9, 0x85, 0xd8, 0xaa, 0x0a, 0xd8, 0xac, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xda, 0x86, 0xd9, 0x87, 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xaf, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb2, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xaf, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xab, 0xd8, 0xb1, 0x0a, 0xd8, 0xa8, 0xd8, 0xaf, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x87, 0xd8, 0xaa, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb4, 0xd8, 0xaa, 0xd8, 0xb1, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa8, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xa7, 0xd8, 0xb3, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xb6, 0xd9, 0x8a, 0x0a, 0xda, 0xaf, 0xd8, 0xb1, 0xd9, 0x81, 0xd8, 0xaa, 0x0a, 0xd8, 0xaa, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd9, 0x85, 0xd9, 0x8a, 0xd9, 0x84, 0xd9, 0x8a, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0x0a, 0xd8, 0xac, 0xd8, 0xb1, 0xd9, 0x8a, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xaa, 0xd9, 0x88, 0xd9, 0x84, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0x0a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xa7, 0xd8, 0xa8, 0xd8, 0xb1, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xd9, 0x85, 0xd8, 0xaf, 0xd8, 0xaa, 0xd9, 0x8a, 0x0a, 0xda, 0xaf, 0xd9, 0x88, 0xd9, 0x8a, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xaa, 0xd8, 0xa7, 0x0a, 0xd8, 0xaa, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xac, 0xd8, 0xaf, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xda, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x86, 0x0a, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x85, 0x0a, 0xda, 0xaf, 0xd9, 0x88, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xd9, 0x86, 0xd9, 0x85, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd8, 0xb2, 0xd8, 0xaf, 0x0a, 0xd8, 0xb1, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd9, 0x82, 0xd8, 0xb5, 0xd8, 0xaf, 0x0a, 0xd9, 0x81, 0xd9, 0x82, 0xd8, 0xb7, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xda, 0xaf, 0xd8, 0xb1, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xb2, 0x0a, 0xd8, 0xaa, 0xd9, 0x88, 0xd8, 0xb3, 0xd8, 0xb7, 0x0a, 0xd8, 0xb3, 0xd9, 0x88, 0xd9, 0x85, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb3, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa, 0xd9, 0x81, 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xb4, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xd8, 0xb3, 0xd8, 0xa7, 0xd8, 0xae, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xb7, 0xd9, 0x88, 0xd8, 0xb1, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xb1, 0xd9, 0x81, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd9, 0x86, 0xd8, 0xae, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd9, 0x86, 0xd8, 0xb2, 0xd8, 0xaf, 0xd9, 0x8a, 0xd9, 0x83, 0x0a, 0xd8, 0xb7, 0xd9, 0x8a, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xb2, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xaa, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x85, 0xd9, 0x8a, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x0a, 0xd9, 0x8a, 0xd9, 0x83, 0xd9, 0x8a, 0x0a, 0xd8, 0xb7, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x82, 0x0a, 0xd8, 0xa7, 0xd8, 0xb4, 0x0a, 0xda, 0x86, 0xd9, 0x8a, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8, 0x0a, 0xd9, 0x86, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xda, 0xaf, 0xd9, 0x81, 0xd8, 0xaa, 0x0a, 0xda, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xda, 0x86, 0xd9, 0x8a, 0xd8, 0xb2, 0xd9, 0x8a, 0x0a, 0xd8, 0xaa, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xa7, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd8, 0xaa, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x86, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xda, 0xaf, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xb1, 0xd8, 0xa7, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xb2, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xda, 0x86, 0xd9, 0x86, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0xbe, 0xd8, 0xa7, 0xd8, 0xb9, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd9, 0x83, 0xd8, 0xb3, 0x0a, 0xd8, 0xad, 0xd8, 0xaf, 0xd9, 0x88, 0xd8, 0xaf, 0x0a, 0xd9, 0x85, 0xd8, 0xae, 0xd8, 0xaa, 0xd9, 0x84, 0xd9, 0x81, 0x0a, 0xd9, 0x85, 0xd9, 0x82, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x84, 0x0a, 0xda, 0x86, 0xd9, 0x8a, 0xd8, 0xb2, 0x0a, 0xda, 0xaf, 0xd9, 0x8a, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd9, 0x86, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd8, 0xb6, 0xd8, 0xaf, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xda, 0x86, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xb3, 0xd8, 0xa7, 0xd8, 0xb2, 0xd9, 0x8a, 0x0a, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x85, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x87, 0x0a, 0xd9, 0x85, 0xd8, 0xb1, 0xd8, 0xb3, 0xd9, 0x8a, 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd9, 0x8a, 0xd8, 0xb4, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xda, 0x86, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xae, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xac, 0x0a, 0xd8, 0xb4, 0xd8, 0xb4, 0x0a, 0xd9, 0x87, 0xd9, 0x86, 0xd9, 0x88, 0xd8, 0xb2, 0x0a, 0xd8, 0xaa, 0xd8, 0xad, 0xd8, 0xaa, 0x0a, 0xd8, 0xb6, 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd9, 0x87, 0xd8, 0xb3, 0xd8, 0xaa, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xda, 0xaf, 0xd9, 0x81, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd9, 0x81, 0xd9, 0x83, 0xd8, 0xb1, 0x0a, 0xd8, 0xa8, 0xd8, 0xb3, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd9, 0xbe, 0xd9, 0x8a, 0xd8, 0xb4, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xb2, 0xd9, 0x87, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xd9, 0x86, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x84, 0x0a, 0xd9, 0x88, 0xd9, 0x82, 0xd8, 0xaa, 0xd9, 0x8a, 0x0a, 0xd9, 0x83, 0xd9, 0x8a, 0x0a, 0xda, 0x86, 0xd9, 0x86, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xda, 0xaf, 0xd9, 0x8a, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd9, 0x8a, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd9, 0x83, 0xd8, 0xac, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd9, 0x86, 0xd9, 0x8a, 0xd8, 0xb2, 0x0a, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xa8, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, 0x86, 0xd8, 0xaf, 0xd9, 0x8a, 0x0a, 0xd8, 0xad, 0xd8, 0xaa, 0xd9, 0x8a, 0x0a, 0xd8, 0xaa, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb9, 0xd9, 0x82, 0xd8, 0xa8, 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xaa, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xd9, 0x87, 0x0a, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd9, 0x85, 0xd8, 0xab, 0xd9, 0x84, 0x0a, 0xd8, 0xb4, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x87, 0x0a, 0xd8, 0xb7, 0xd8, 0xa8, 0xd9, 0x82, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xda, 0xaf, 0xd8, 0xb1, 0x0a, 0xd8, 0xb5, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaa, 0x0a, 0xd8, 0xba, 0xd9, 0x8a, 0xd8, 0xb1, 0x0a, 0xd8, 0xac, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb4, 0x0a, 0xd8, 0xb1, 0xd9, 0x8a, 0xd8, 0xb2, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb2, 0xd9, 0x8a, 0xd8, 0xb1, 0xd8, 0xa7, 0x0a, 0xda, 0x86, 0xda, 0xaf, 0xd9, 0x88, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd9, 0x84, 0xd8, 0xb7, 0xd9, 0x81, 0xd8, 0xa7, 0x0a, 0xd9, 0x85, 0xd9, 0x8a, 0x0a, 0xd8, 0xaf, 0xd8, 0xb1, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x87, 0x0a, 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xda, 0xaf, 0xd8, 0xb0, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xb9, 0xd9, 0x84, 0xd8, 0xaa, 0x0a, 0xda, 0xaf, 0xd8, 0xb0, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0x0a, 0xd9, 0x81, 0xd9, 0x88, 0xd9, 0x82, 0x0a, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xb4, 0xd9, 0x88, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xaf, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xd9, 0x88, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd8, 0xb1, 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0xd9, 0x84, 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xda, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd9, 0x86, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xb2, 0x0a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x87, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd9, 0x82, 0xd8, 0xa8, 0xd9, 0x84, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x85, 0x0a, 0xd8, 0xb3, 0xd8, 0xb9, 0xd9, 0x8a, 0x0a, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xb2, 0xd9, 0x87, 0x0a, 0xd8, 0xb1, 0xd8, 0xa7, 0x0a, 0xd9, 0x87, 0xd8, 0xb3, 0xd8, 0xaa, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb2, 0xd9, 0x8a, 0xd8, 0xb1, 0x0a, 0xd8, 0xac, 0xd9, 0x84, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd8, 0xb9, 0xd9, 0x86, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0x0a }; PersianAnalyzer::PersianAnalyzer(LuceneVersion::Version matchVersion) { this->stoptable = getDefaultStopSet(); this->matchVersion = matchVersion; } PersianAnalyzer::PersianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stoptable = stopwords; this->matchVersion = matchVersion; } PersianAnalyzer::~PersianAnalyzer() { } const HashSet PersianAnalyzer::getDefaultStopSet() { static HashSet stopSet; LUCENE_RUN_ONCE( String stopWords(UTF8_TO_STRING(DEFAULT_STOPWORD_FILE)); Collection words(StringUtils::split(stopWords, L"\n")); stopSet = HashSet::newInstance(words.begin(), words.end()); ); return stopSet; } TokenStreamPtr PersianAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { TokenStreamPtr result = newLucene(reader); result = newLucene(result); result = newLucene(result); // additional Persian-specific normalization result = newLucene(result); // the order here is important: the stopword list is not normalized result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); return result; } TokenStreamPtr PersianAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { PersianAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(reader); streams->result = newLucene(streams->source); streams->result = newLucene(streams->result); // additional Persian-specific normalization streams->result = newLucene(streams->result); // the order here is important: the stopword list is not normalized streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); setPreviousTokenStream(streams); } else { streams->source->reset(reader); } return streams->result; } PersianAnalyzerSavedStreams::~PersianAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/fa/PersianNormalizationFilter.cpp000066400000000000000000000020231456444476200333070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "PersianNormalizationFilter.h" #include "PersianNormalizer.h" #include "TermAttribute.h" namespace Lucene { PersianNormalizationFilter::PersianNormalizationFilter(const TokenStreamPtr& input) : TokenFilter(input) { normalizer = newLucene(); termAtt = addAttribute(); } PersianNormalizationFilter::~PersianNormalizationFilter() { } bool PersianNormalizationFilter::incrementToken() { if (input->incrementToken()) { int32_t newlen = normalizer->normalize(termAtt->termBuffer().get(), termAtt->termLength()); termAtt->setTermLength(newlen); return true; } else { return false; } } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/fa/PersianNormalizer.cpp000066400000000000000000000033561456444476200314470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "PersianNormalizer.h" #include "MiscUtils.h" namespace Lucene { const wchar_t PersianNormalizer::YEH = (wchar_t)0x064a; const wchar_t PersianNormalizer::FARSI_YEH = (wchar_t)0x06cc; const wchar_t PersianNormalizer::YEH_BARREE = (wchar_t)0x06d2; const wchar_t PersianNormalizer::KEHEH = (wchar_t)0x06a9; const wchar_t PersianNormalizer::KAF = (wchar_t)0x0643; const wchar_t PersianNormalizer::HAMZA_ABOVE = (wchar_t)0x0654; const wchar_t PersianNormalizer::HEH_YEH = (wchar_t)0x06c0; const wchar_t PersianNormalizer::HEH_GOAL = (wchar_t)0x06c1; const wchar_t PersianNormalizer::HEH = (wchar_t)0x0647; PersianNormalizer::~PersianNormalizer() { } int32_t PersianNormalizer::normalize(wchar_t* s, int32_t len) { for (int32_t i = 0; i < len; ++i) { switch (s[i]) { case FARSI_YEH: case YEH_BARREE: s[i] = YEH; break; case KEHEH: s[i] = KAF; break; case HEH_YEH: case HEH_GOAL: s[i] = HEH; break; case HAMZA_ABOVE: // necessary for HEH + HAMZA len = deleteChar(s, i--, len); break; default: break; } } return len; } int32_t PersianNormalizer::deleteChar(wchar_t* s, int32_t pos, int32_t len) { if (pos < len) { MiscUtils::arrayCopy(s, pos + 1, s, pos, len - pos - 1); } return len - 1; } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/fr/000077500000000000000000000000001456444476200253115ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/fr/ElisionFilter.cpp000066400000000000000000000037171456444476200305750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ElisionFilter.h" #include "CharArraySet.h" #include "TermAttribute.h" namespace Lucene { const wchar_t ElisionFilter::apostrophes[] = {L'\'', L'\x2019'}; ElisionFilter::ElisionFilter(const TokenStreamPtr& input) : TokenFilter(input) { articles = newLucene(newCollection(L"l", L"m", L"t", L"qu", L"n", L"s", L"j"), true); termAtt = addAttribute(); } ElisionFilter::ElisionFilter(const TokenStreamPtr& input, HashSet articles) : TokenFilter(input) { setArticles(articles); termAtt = addAttribute(); } ElisionFilter::~ElisionFilter() { } void ElisionFilter::setArticles(HashSet articles) { this->articles = newLucene(articles, true); } bool ElisionFilter::incrementToken() { if (input->incrementToken()) { wchar_t* termBuffer = termAtt->termBufferArray(); int32_t termLength = termAtt->termLength(); int32_t minPoz = INT_MAX; for (int32_t i = 0; i < SIZEOF_ARRAY(apostrophes); ++i) { wchar_t apos = apostrophes[i]; for (int32_t poz = 0; poz < termLength; ++poz) { if (termBuffer[poz] == apos) { minPoz = std::min(poz, minPoz); break; } } } // An apostrophe has been found. If the prefix is an article strip it off. if (minPoz != INT_MAX && articles->contains(termBuffer, 0, minPoz)) { termAtt->setTermBuffer(termBuffer, minPoz + 1, termLength - (minPoz + 1)); } return true; } else { return false; } } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/fr/FrenchAnalyzer.cpp000066400000000000000000000125731456444476200307400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "FrenchAnalyzer.h" #include "StandardTokenizer.h" #include "StandardFilter.h" #include "LowerCaseFilter.h" #include "StopFilter.h" #include "FrenchStemFilter.h" namespace Lucene { const wchar_t* FrenchAnalyzer::_FRENCH_STOP_WORDS[] = { L"a", L"afin", L"ai", L"ainsi", L"apr\x00e8s", L"attendu", L"au", L"aujourd", L"auquel", L"aussi", L"autre", L"autres", L"aux", L"auxquelles", L"auxquels", L"avait", L"avant", L"avec", L"avoir", L"c", L"car", L"ce", L"ceci", L"cela", L"celle", L"celles", L"celui", L"cependant", L"certain", L"certaine", L"certaines", L"certains", L"ces", L"cet", L"cette", L"ceux", L"chez", L"ci", L"combien", L"comme", L"comment", L"concernant", L"contre", L"d", L"dans", L"de", L"debout", L"dedans", L"dehors", L"del\x00e0", L"depuis", L"derri\x00e8re", L"des", L"d\x00e9sormais", L"desquelles", L"desquels", L"dessous", L"dessus", L"devant", L"devers", L"devra", L"divers", L"diverse", L"diverses", L"doit", L"donc", L"dont", L"du", L"duquel", L"durant", L"d\x00e8s", L"elle", L"elles", L"en", L"entre", L"environ", L"est", L"et", L"etc", L"etre", L"eu", L"eux", L"except\x00e9", L"hormis", L"hors", L"h\x00e9las", L"hui", L"il", L"ils", L"j", L"je", L"jusqu", L"jusque", L"l", L"la", L"laquelle", L"le", L"lequel", L"les", L"lesquelles", L"lesquels", L"leur", L"leurs", L"lorsque", L"lui", L"l\x00e0", L"ma", L"mais", L"malgr\x00e9", L"me", L"merci", L"mes", L"mien", L"mienne", L"miennes", L"miens", L"moi", L"moins", L"mon", L"moyennant", L"m\x00eame", L"m\x00eames", L"n", L"ne", L"ni", L"non", L"nos", L"notre", L"nous", L"n\x00e9anmoins", L"n\x00f4tre", L"n\x00f4tres", L"on", L"ont", L"ou", L"outre", L"o\x00f9", L"par", L"parmi", L"partant", L"pas", L"pass\x00e9", L"pendant", L"plein", L"plus", L"plusieurs", L"pour", L"pourquoi", L"proche", L"pr\x00e8s", L"puisque", L"qu", L"quand", L"que", L"quel", L"quelle", L"quelles", L"quels", L"qui", L"quoi", L"quoique", L"revoici", L"revoil\x00e0", L"s", L"sa", L"sans", L"sauf", L"se", L"selon", L"seront", L"ses", L"si", L"sien", L"sienne", L"siennes", L"siens", L"sinon", L"soi", L"soit", L"son", L"sont", L"sous", L"suivant", L"sur", L"ta", L"te", L"tes", L"tien", L"tienne", L"tiennes", L"tiens", L"toi", L"ton", L"tous", L"tout", L"toute", L"toutes", L"tu", L"un", L"une", L"va", L"vers", L"voici", L"voil\x00e0", L"vos", L"votre", L"vous", L"vu", L"v\x00f4tre", L"v\x00f4tres", L"y", L"\x00e0", L"\x00e7a", L"\x00e8s", L"\x00e9t\x00e9", L"\x00eatre", L"\x00f4" }; FrenchAnalyzer::FrenchAnalyzer(LuceneVersion::Version matchVersion) { this->stoptable = getDefaultStopSet(); this->matchVersion = matchVersion; } FrenchAnalyzer::FrenchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stoptable = stopwords; this->matchVersion = matchVersion; } FrenchAnalyzer::FrenchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions) { this->stoptable = stopwords; this->excltable = exclusions; this->matchVersion = matchVersion; } FrenchAnalyzer::~FrenchAnalyzer() { } const HashSet FrenchAnalyzer::getDefaultStopSet() { static HashSet stoptable; LUCENE_RUN_ONCE( stoptable = HashSet::newInstance(_FRENCH_STOP_WORDS, _FRENCH_STOP_WORDS + SIZEOF_ARRAY(_FRENCH_STOP_WORDS)); ); return stoptable; } void FrenchAnalyzer::setStemExclusionTable(HashSet exclusions) { excltable = exclusions; setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created } TokenStreamPtr FrenchAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { TokenStreamPtr result = newLucene(matchVersion, reader); result = newLucene(result); result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); result = newLucene(result, excltable); // Convert to lowercase after stemming result = newLucene(result); return result; } TokenStreamPtr FrenchAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { FrenchAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(matchVersion, reader); streams->result = newLucene(streams->source); streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); streams->result = newLucene(streams->result, excltable); // Convert to lowercase after stemming streams->result = newLucene(streams->result); setPreviousTokenStream(streams); } else { streams->source->reset(reader); } return streams->result; } FrenchAnalyzerSavedStreams::~FrenchAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/fr/FrenchStemFilter.cpp000066400000000000000000000032241456444476200312220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "FrenchStemFilter.h" #include "FrenchStemmer.h" #include "TermAttribute.h" namespace Lucene { FrenchStemFilter::FrenchStemFilter(const TokenStreamPtr& input) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); } FrenchStemFilter::FrenchStemFilter(const TokenStreamPtr& input, HashSet exclusiontable) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); this->exclusions = exclusiontable; } FrenchStemFilter::~FrenchStemFilter() { } bool FrenchStemFilter::incrementToken() { if (input->incrementToken()) { String term(termAtt->term()); // Check the exclusion table. if (!exclusions || !exclusions.contains(term)) { String s(stemmer->stem(term)); // If not stemmed, don't waste the time adjusting the token. if (!s.empty() && s != term) { termAtt->setTermBuffer(s); } } return true; } else { return false; } } void FrenchStemFilter::setStemmer(const FrenchStemmerPtr& stemmer) { if (stemmer) { this->stemmer = stemmer; } } void FrenchStemFilter::setExclusionSet(HashSet exclusiontable) { this->exclusions = exclusiontable; } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/fr/FrenchStemmer.cpp000066400000000000000000000377651456444476200306010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include #include "FrenchStemmer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { FrenchStemmer::FrenchStemmer() { suite = false; modified = false; } FrenchStemmer::~FrenchStemmer() { } String FrenchStemmer::stem(const String& term) { if (!isStemmable(term)) { return term; } // Use lowercase for medium stemming. stringBuffer = StringUtils::toLower(term); // reset the booleans modified = false; suite = false; treatVowels(stringBuffer); setStrings(); step1(); if (!modified || suite) { if (!RV.empty()) { suite = step2a(); if (!suite) { step2b(); } } } if (modified || suite) { step3(); } else { step4(); } step5(); step6(); return stringBuffer; } void FrenchStemmer::setStrings() { // set the strings R0 = stringBuffer; RV = retrieveRV(stringBuffer); R1 = retrieveR(stringBuffer); if (!R1.empty()) { tempBuffer = R1; R2 = retrieveR(tempBuffer); } else { R2.clear(); } } void FrenchStemmer::step1() { Collection suffix = newCollection(L"ances", L"iqUes", L"ismes", L"ables", L"istes", L"ance", L"iqUe", L"isme", L"able", L"iste"); deleteFrom(R2, suffix); replaceFrom(R2, newCollection(L"logies", L"logie"), L"log"); replaceFrom(R2, newCollection(L"usions", L"utions", L"usion", L"ution"), L"u"); replaceFrom(R2, newCollection(L"ences", L"ence"), L"ent"); Collection search = newCollection(L"atrices", L"ateurs", L"ations", L"atrice", L"ateur", L"ation"); deleteButSuffixFromElseReplace(R2, search, L"ic", true, R0, L"iqU"); deleteButSuffixFromElseReplace(R2, newCollection(L"ements", L"ement"), L"eus", false, R0, L"eux"); deleteButSuffixFrom(R2, newCollection(L"ements", L"ement"), L"ativ", false); deleteButSuffixFrom(R2, newCollection(L"ements", L"ement"), L"iv", false); deleteButSuffixFrom(R2, newCollection(L"ements", L"ement"), L"abl", false); deleteButSuffixFrom(R2, newCollection(L"ements", L"ement"), L"iqU", false); deleteFromIfTestVowelBeforeIn(R1, newCollection(L"issements", L"issement"), false, R0); deleteFrom(RV, newCollection(L"ements", L"ement")); deleteButSuffixFromElseReplace(R2, newCollection(L"it\x00e9s", L"it\x00e9"), L"abil", false, R0, L"abl"); deleteButSuffixFromElseReplace(R2, newCollection(L"it\x00e9s", L"it\x00e9"), L"ic", false, R0, L"iqU"); deleteButSuffixFrom(R2, newCollection(L"it\x00e9s", L"it\x00e9"), L"iv", true); Collection autre = newCollection(L"ifs", L"ives", L"if", L"ive"); deleteButSuffixFromElseReplace(R2, autre, L"icat", false, R0, L"iqU"); deleteButSuffixFromElseReplace(R2, autre, L"at", true, R2, L"iqU"); replaceFrom(R0, newCollection(L"eaux"), L"eau"); replaceFrom(R1, newCollection(L"aux"), L"al"); deleteButSuffixFromElseReplace(R2, newCollection(L"euses", L"euse"), L"", true, R1, L"eux"); deleteFrom(R2, newCollection(L"eux")); // if one of the next steps is performed, we will need to perform step2a if (replaceFrom(RV, newCollection(L"amment"), L"ant")) { suite = true; } if (replaceFrom(RV, newCollection(L"emment"), L"ent")) { suite = true; } if (deleteFromIfTestVowelBeforeIn(RV, newCollection(L"ments", L"ment"), true, RV)) { suite = true; } } bool FrenchStemmer::step2a() { static Collection search; static const wchar_t* _search[] = { L"\x00eemes", L"\x00eetes", L"iraIent", L"irait", L"irais", L"irai", L"iras", L"ira", L"irent", L"iriez", L"irez", L"irions", L"irons", L"iront", L"issaIent", L"issais", L"issantes", L"issante", L"issants", L"issant", L"issait", L"issais", L"issions", L"issons", L"issiez", L"issez", L"issent", L"isses", L"isse", L"ir", L"is", L"\x00eet", L"it", L"ies", L"ie", L"i" }; LUCENE_RUN_ONCE( search = Collection::newInstance(_search, _search + SIZEOF_ARRAY(_search)); ); return deleteFromIfTestVowelBeforeIn(RV, search, false, RV); } void FrenchStemmer::step2b() { static Collection suffix; static const wchar_t* _suffix[] = { L"eraIent", L"erais", L"erait", L"erai", L"eras", L"erions", L"eriez", L"erons", L"eront", L"erez", L"\x00e8rent", L"era", L"\x00e9es", L"iez", L"\x00e9e", L"\x00e9s", L"er", L"ez", L"\x00e9" }; LUCENE_RUN_ONCE( suffix = Collection::newInstance(_suffix, _suffix + SIZEOF_ARRAY(_suffix)); ); deleteFrom(RV, suffix); static Collection search; static const wchar_t* _search[] = { L"assions", L"assiez", L"assent", L"asses", L"asse", L"aIent", L"antes", L"aIent", L"Aient", L"ante", L"\x00e2mes", L"\x00e2tes", L"ants", L"ant", L"ait", L"a\x00eet", L"ais", L"Ait", L"A\x00eet", L"Ais", L"\x00e2t", L"as", L"ai", L"Ai", L"a" }; LUCENE_RUN_ONCE( search = Collection::newInstance(_search, _search + SIZEOF_ARRAY(_search)); ); deleteButSuffixFrom(RV, search, L"e", true); deleteFrom(R2, newCollection(L"ions")); } void FrenchStemmer::step3() { if (!stringBuffer.empty()) { wchar_t ch = stringBuffer[stringBuffer.length() - 1]; if (ch == L'Y') { stringBuffer[stringBuffer.length() - 1] = L'i'; setStrings(); } else if (ch == L'\x00e7') { stringBuffer[stringBuffer.length() - 1] = L'c'; setStrings(); } } } void FrenchStemmer::step4() { if (stringBuffer.length() > 1) { wchar_t ch = stringBuffer[stringBuffer.length() - 1]; if (ch == L's') { wchar_t b = stringBuffer[stringBuffer.length() - 2]; if (b != L'a' && b != L'i' && b != L'o' && b != L'u' && b != L'\x00e8' && b != L's') { stringBuffer.resize(stringBuffer.length() - 1); setStrings(); } } } if (!deleteFromIfPrecededIn(R2, newCollection(L"ion"), RV, L"s")) { deleteFromIfPrecededIn(R2, newCollection(L"ion"), RV, L"t"); } replaceFrom(RV, newCollection(L"I\x00e8re", L"i\x00e8re", L"Ier", L"ier"), L"i"); deleteFrom(RV, newCollection(L"e")); deleteFromIfPrecededIn(RV, newCollection(L"\x00eb"), R0, L"gu"); } void FrenchStemmer::step5() { if (!R0.empty()) { if (boost::ends_with(R0, L"enn") || boost::ends_with(R0, L"onn") || boost::ends_with(R0, L"ett") || boost::ends_with(R0, L"ell") || boost::ends_with(R0, L"eill")) { stringBuffer.resize(stringBuffer.length() - 1); setStrings(); } } } void FrenchStemmer::step6() { if (!R0.empty()) { bool seenVowel = false; bool seenConson = false; int32_t pos = -1; for (int32_t i = (int32_t)(R0.length() - 1); i > -1; --i) { wchar_t ch = R0[i]; if (isVowel(ch)) { if (!seenVowel) { if (ch == L'\x00e9' || ch == L'\x00e8') { pos = i; break; } } seenVowel = true; } else { if (seenVowel) { break; } else { seenConson = true; } } } if (pos > -1 && seenConson && !seenVowel) { stringBuffer[pos] = L'e'; } } } bool FrenchStemmer::deleteFromIfPrecededIn(const String& source, Collection search, const String& from, const String& prefix) { bool found = false; if (!source.empty()) { for (int32_t i = 0; i < search.size(); ++i) { if (boost::ends_with(source, search[i])) { if (!from.empty() && boost::ends_with(from, prefix + search[i])) { stringBuffer.resize(stringBuffer.length() - search[i].length()); found = true; setStrings(); break; } } } } return found; } bool FrenchStemmer::deleteFromIfTestVowelBeforeIn(const String& source, Collection search, bool vowel, const String& from) { bool found = false; if (!source.empty() && !from.empty()) { for (int32_t i = 0; i < search.size(); ++i) { if (boost::ends_with(source, search[i])) { if ((search[i].length() + 1) <= from.length()) { bool test = isVowel(stringBuffer[stringBuffer.length() - (search[i].length() + 1)]); if (test == vowel) { stringBuffer.resize(stringBuffer.length() - search[i].length()); modified = true; found = true; setStrings(); break; } } } } } return found; } void FrenchStemmer::deleteButSuffixFrom(const String& source, Collection search, const String& prefix, bool without) { if (!source.empty()) { for (int32_t i = 0; i < search.size(); ++i) { if (boost::ends_with(source, prefix + search[i])) { stringBuffer.resize(stringBuffer.length() - (prefix.length() + search[i].length())); modified = true; setStrings(); break; } else if (without && boost::ends_with(source, search[i])) { stringBuffer.resize(stringBuffer.length() - search[i].length()); modified = true; setStrings(); break; } } } } void FrenchStemmer::deleteButSuffixFromElseReplace(const String& source, Collection search, const String& prefix, bool without, const String& from, const String& replace) { if (!source.empty()) { for (int32_t i = 0; i < search.size(); ++i) { if (boost::ends_with(source, prefix + search[i])) { stringBuffer.resize(stringBuffer.length() - (prefix.length() + search[i].length())); modified = true; setStrings(); break; } else if (!from.empty() && boost::ends_with(from, prefix + search[i])) { stringBuffer.resize(stringBuffer.length() - (prefix.length() + search[i].length())); stringBuffer += replace; modified = true; setStrings(); break; } else if (without && boost::ends_with(source, search[i])) { stringBuffer.resize(stringBuffer.length() - search[i].length()); modified = true; setStrings(); break; } } } } bool FrenchStemmer::replaceFrom(const String& source, Collection search, const String& replace) { bool found = false; if (!source.empty()) { for (int32_t i = 0; i < search.size(); ++i) { if (boost::ends_with(source, search[i])) { stringBuffer.resize(stringBuffer.length() - search[i].length()); stringBuffer += replace; modified = true; found = true; setStrings(); break; } } } return found; } void FrenchStemmer::deleteFrom(const String& source, Collection suffix) { if (!source.empty()) { for (int32_t i = 0; i < suffix.size(); ++i) { if (boost::ends_with(source, suffix[i])) { stringBuffer.resize(stringBuffer.length() - suffix[i].length()); modified = true; setStrings(); break; } } } } bool FrenchStemmer::isVowel(wchar_t ch) { switch (ch) { case L'a': case L'e': case L'i': case L'o': case L'u': case L'y': case L'\x00e2': case L'\x00e0': case L'\x00eb': case L'\x00e9': case L'\x00ea': case L'\x00e8': case L'\x00ef': case L'\x00ee': case L'\x00f4': case L'\x00fc': case L'\x00f9': case L'\x00fb': return true; default: return false; } } String FrenchStemmer::retrieveR(const String& buffer) { int32_t len = (int32_t)buffer.length(); int32_t pos = -1; for (int32_t c = 0; c < len; ++c) { if (isVowel(buffer[c])) { pos = c; break; } } if (pos > -1) { int32_t consonne = -1; for (int32_t c = pos; c < len; ++c) { if (!isVowel(buffer[c])) { consonne = c; break; } } if (consonne > -1 && (consonne + 1) < len) { return buffer.substr(consonne + 1); } else { return L""; } } else { return L""; } } String FrenchStemmer::retrieveRV(const String& buffer) { int32_t len = (int32_t)buffer.length(); if (buffer.length() > 3) { if (isVowel(buffer[0]) && isVowel(buffer[1])) { return buffer.substr(3); } else { int32_t pos = 0; for (int32_t c = 1; c < len; ++c) { if (isVowel(buffer[c])) { pos = c; break; } } if (pos + 1 < len) { return buffer.substr(pos + 1); } else { return L""; } } } else { return L""; } } void FrenchStemmer::treatVowels(String& buffer) { for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) { wchar_t ch = buffer[c]; if (c == 0) { // first char if (buffer.length() > 1) { if (ch == L'y' && isVowel(buffer[c + 1])) { buffer[c] = L'Y'; } } } else if (c == buffer.length() - 1) { // last char if (ch == L'u' && buffer[c - 1] == L'q') { buffer[c] = L'U'; } if (ch == L'y' && isVowel(buffer[c - 1])) { buffer[c] = L'Y'; } } else { // other cases if (ch == L'u') { if (buffer[c - 1] == L'q') { buffer[c] = L'U'; } else if (isVowel(buffer[c - 1]) && isVowel(buffer[c + 1])) { buffer[c] = L'U'; } } if (ch == L'i') { if (isVowel(buffer[c - 1]) && isVowel(buffer[c + 1])) { buffer[c] = L'I'; } } if (ch == L'y') { if (isVowel(buffer[c - 1]) || isVowel(buffer[c + 1])) { buffer[c] = L'Y'; } } } } } bool FrenchStemmer::isStemmable(const String& term) { bool upper = false; int32_t first = -1; for (int32_t c = 0; c < (int32_t)term.length(); ++c) { // Discard terms that contain non-letter characters. if (!UnicodeUtil::isAlpha(term[c])) { return false; } // Discard terms that contain multiple uppercase letters. if (UnicodeUtil::isUpper(term[c])) { if (upper) { return false; } else { // First encountered uppercase letter, set flag and save position. first = c; upper = true; } } } // Discard the term if it contains a single uppercase letter that // is not starting the term. if (first > 0) { return false; } return true; } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/nl/000077500000000000000000000000001456444476200253135ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/nl/DutchAnalyzer.cpp000066400000000000000000000102011456444476200305660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "DutchAnalyzer.h" #include "StandardTokenizer.h" #include "StandardFilter.h" #include "StopFilter.h" #include "DutchStemFilter.h" namespace Lucene { const wchar_t* DutchAnalyzer::_DUTCH_STOP_WORDS[] = { L"de", L"en", L"van", L"ik", L"te", L"dat", L"die", L"in", L"een", L"hij", L"het", L"niet", L"zijn", L"is", L"was", L"op", L"aan", L"met", L"als", L"voor", L"had", L"er", L"maar", L"om", L"hem", L"dan", L"zou", L"of", L"wat", L"mijn", L"men", L"dit", L"zo", L"door", L"over", L"ze", L"zich", L"bij", L"ook", L"tot", L"je", L"mij", L"uit", L"der", L"daar", L"haar", L"naar", L"heb", L"hoe", L"heeft", L"hebben", L"deze", L"u", L"want", L"nog", L"zal", L"me", L"zij", L"nu", L"ge", L"geen", L"omdat", L"iets", L"worden", L"toch", L"al", L"waren", L"veel", L"meer", L"doen", L"toen", L"moet", L"ben", L"zonder", L"kan", L"hun", L"dus", L"alles", L"onder", L"ja", L"eens", L"hier", L"wie", L"werd", L"altijd", L"doch", L"wordt", L"wezen", L"kunnen", L"ons", L"zelf", L"tegen", L"na", L"reeds", L"wil", L"kon", L"niets", L"uw", L"iemand", L"geweest", L"andere" }; DutchAnalyzer::DutchAnalyzer(LuceneVersion::Version matchVersion) { this->stoptable = getDefaultStopSet(); this->excltable = HashSet::newInstance(); this->stemdict = MapStringString::newInstance(); this->matchVersion = matchVersion; } DutchAnalyzer::DutchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stoptable = stopwords; this->excltable = HashSet::newInstance(); this->matchVersion = matchVersion; } DutchAnalyzer::DutchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions) { this->stoptable = stopwords; this->excltable = exclusions; this->matchVersion = matchVersion; } DutchAnalyzer::~DutchAnalyzer() { } void DutchAnalyzer::initialize() { stemdict.put(L"fiets", L"fiets"); // otherwise fiet stemdict.put(L"bromfiets", L"bromfiets"); // otherwise bromfiet stemdict.put(L"ei", L"eier"); stemdict.put(L"kind", L"kinder"); } const HashSet DutchAnalyzer::getDefaultStopSet() { static HashSet stoptable; LUCENE_RUN_ONCE( stoptable = HashSet::newInstance(_DUTCH_STOP_WORDS, _DUTCH_STOP_WORDS + SIZEOF_ARRAY(_DUTCH_STOP_WORDS)); ); return stoptable; } void DutchAnalyzer::setStemExclusionTable(HashSet exclusions) { excltable = exclusions; setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created } TokenStreamPtr DutchAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { TokenStreamPtr result = newLucene(matchVersion, reader); result = newLucene(result); result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); result = newLucene(result, excltable); return result; } TokenStreamPtr DutchAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { DutchAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(matchVersion, reader); streams->result = newLucene(streams->source); streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); streams->result = newLucene(streams->result, excltable); setPreviousTokenStream(streams); } else { streams->source->reset(reader); } return streams->result; } DutchAnalyzerSavedStreams::~DutchAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/nl/DutchStemFilter.cpp000066400000000000000000000041361456444476200310710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "DutchStemFilter.h" #include "DutchStemmer.h" #include "TermAttribute.h" namespace Lucene { DutchStemFilter::DutchStemFilter(const TokenStreamPtr& input) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); } DutchStemFilter::DutchStemFilter(const TokenStreamPtr& input, HashSet exclusiontable) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); this->exclusions = exclusiontable; } DutchStemFilter::DutchStemFilter(const TokenStreamPtr& input, HashSet exclusiontable, MapStringString stemdictionary) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); this->exclusions = exclusiontable; this->stemmer->setStemDictionary(stemdictionary); } DutchStemFilter::~DutchStemFilter() { } bool DutchStemFilter::incrementToken() { if (input->incrementToken()) { String term(termAtt->term()); // Check the exclusion table. if (!exclusions || !exclusions.contains(term)) { String s(stemmer->stem(term)); // If not stemmed, don't waste the time adjusting the token. if (!s.empty() && s != term) { termAtt->setTermBuffer(s); } } return true; } else { return false; } } void DutchStemFilter::setStemmer(const DutchStemmerPtr& stemmer) { if (stemmer) { this->stemmer = stemmer; } } void DutchStemFilter::setExclusionSet(HashSet exclusiontable) { this->exclusions = exclusiontable; } void DutchStemFilter::setStemDictionary(MapStringString dict) { if (stemmer) { this->stemmer->setStemDictionary(dict); } } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/nl/DutchStemmer.cpp000066400000000000000000000172131456444476200304270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include #include "DutchStemmer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { DutchStemmer::DutchStemmer() { removedE = false; R1 = 0; R2 = 0; } DutchStemmer::~DutchStemmer() { } String DutchStemmer::stem(const String& term) { // Use lowercase for medium stemming. buffer = StringUtils::toLower(term); if (!isStemmable()) { return buffer; } if (stemDict && stemDict.contains(term)) { return stemDict.get(term); } // Stemming starts here... substitute(); storeYandI(); R1 = getRIndex(0); R1 = std::max((int32_t)3, R1); step1(); step2(); R2 = getRIndex(R1); step3a(); step3b(); step4(); reStoreYandI(); return buffer; } bool DutchStemmer::enEnding() { Collection enend = newCollection(L"ene", L"en"); for (int32_t i = 0; i < enend.size(); ++i) { String end = enend[i]; int32_t index = (int32_t)(buffer.length() - end.length()); if (boost::ends_with(buffer, end) && index >= R1 && isValidEnEnding(index - 1)) { buffer.erase(index, end.length()); unDouble(index); return true; } } return false; } void DutchStemmer::step1() { if (R1 >= (int32_t)buffer.length()) { return; } int32_t lengthR1 = (int32_t)(buffer.length() - R1); int32_t index; if (boost::ends_with(buffer, L"heden")) { buffer.replace(R1, lengthR1, boost::replace_all_copy(buffer.substr(R1, lengthR1), L"heden", L"heid")); return; } if (enEnding()) { return; } index = (int32_t)buffer.length() - 2; if (boost::ends_with(buffer, L"se") && index >= R1 && isValidSEnding(index - 1)) { buffer.erase(index, 2); return; } index = (int32_t)(buffer.length() - 1); if (boost::ends_with(buffer, L"s") && index >= R1 && isValidSEnding(index - 1)) { buffer.erase(index, 1); } } void DutchStemmer::step2() { removedE = false; if (R1 >= (int32_t)buffer.length()) { return; } int32_t index = (int32_t)(buffer.length() - 1); if (index >= R1 && boost::ends_with(buffer, L"e") && !isVowel(buffer[index - 1])) { buffer.erase(index, 1); unDouble(); removedE = true; } } void DutchStemmer::step3a() { if (R2 >= (int32_t)buffer.length()) { return; } int32_t index = (int32_t)(buffer.length() - 4); if (boost::ends_with(buffer, L"heid") && index >= R2 && buffer[index - 1] != L'c') { buffer.erase(index, 4); // remove heid enEnding(); } } void DutchStemmer::step3b() { if (R2 >= (int32_t)buffer.length()) { return; } int32_t index = (int32_t)(buffer.length() - 3); if ((boost::ends_with(buffer, L"end") || boost::ends_with(buffer, L"ing")) && index >= R2) { buffer.erase(index, 3); if (buffer[index - 2] == L'i' && buffer[index - 1] == L'g') { if (buffer[index - 3] != L'e' && index - 2 >= R2) { index -= 2; buffer.erase(index, 2); } } else { unDouble(index); } return; } index = (int32_t)(buffer.length() - 2); if (boost::ends_with(buffer, L"ig") && index >= R2) { if (buffer[index - 1] != L'e') { buffer.erase(index, 2); } return; } index = (int32_t)(buffer.length() - 4); if (boost::ends_with(buffer, L"lijk") && index >= R2) { buffer.erase(index, 4); step2(); return; } index = (int32_t)(buffer.length() - 4); if (boost::ends_with(buffer, L"baar") && index >= R2) { buffer.erase(index, 4); return; } index = (int32_t)(buffer.length() - 3); if (boost::ends_with(buffer, L"bar") && index >= R2) { if (removedE) { buffer.erase(index, 3); } return; } } void DutchStemmer::step4() { if (buffer.length() < 4) { return; } String end(buffer.substr(buffer.length() - 4)); if (end[1] == end[2] && end[3] != L'I' && end[1] != L'i' && isVowel(end[1]) && !isVowel(end[3]) && !isVowel(end[0])) { buffer.erase(buffer.length() - 2, 1); } } bool DutchStemmer::isStemmable() { for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) { if (!UnicodeUtil::isAlnum(buffer[c])) { return false; } } return true; } void DutchStemmer::substitute() { for (int32_t i = 0; i < (int32_t)buffer.length(); ++i) { switch (buffer[i]) { case L'\x00e4': case L'\x00e1': buffer[i] = L'a'; break; case L'\x00eb': case L'\x00e9': buffer[i] = L'e'; break; case L'\x00fc': case L'\x00fa': buffer[i] = L'u'; break; case L'\x00ef': case L'i': buffer[i] = L'i'; break; case L'\x00f6': case L'\x00f3': buffer[i] = L'o'; break; } } } bool DutchStemmer::isValidSEnding(int32_t index) { wchar_t c = buffer[index]; if (isVowel(c) || c == L'j') { return false; } return true; } bool DutchStemmer::isValidEnEnding(int32_t index) { wchar_t c = buffer[index]; if (isVowel(c)) { return false; } if (c < 3) { return false; } // ends with "gem"? if (c == L'm' && buffer[index - 2] == L'g' && buffer[index - 1] == L'e') { return false; } return true; } void DutchStemmer::unDouble() { unDouble((int32_t)buffer.length()); } void DutchStemmer::unDouble(int32_t endIndex) { String s = buffer.substr(0, endIndex); if (boost::ends_with(s, L"kk") || boost::ends_with(s, L"tt") || boost::ends_with(s, L"dd") || boost::ends_with(s, L"nn") || boost::ends_with(s, L"mm") || boost::ends_with(s, L"ff")) { buffer.resize(endIndex - 1); } } int32_t DutchStemmer::getRIndex(int32_t start) { if (start == 0) { start = 1; } int32_t i = start; for (; i < (int32_t)buffer.length(); ++i) { // first non-vowel preceded by a vowel if (!isVowel(buffer[i]) && isVowel(buffer[i - 1])) { return i + 1; } } return i + 1; } void DutchStemmer::storeYandI() { if (buffer[0] == L'y') { buffer[0] = L'Y'; } int32_t last = (int32_t)(buffer.length() - 1); for (int32_t i = 1; i < last; i++) { switch (buffer[i]) { case L'i': if (isVowel(buffer[i - 1]) && isVowel(buffer[i + 1])) { buffer[i] = L'I'; } break; case L'y': if (isVowel(buffer[i - 1])) { buffer[i] = L'Y'; } break; } } if (last > 0 && buffer[last] == L'y' && isVowel(buffer[last - 1])) { buffer[last] = L'Y'; } } void DutchStemmer::reStoreYandI() { boost::replace_all(buffer, L"I", L"i"); boost::replace_all(buffer, L"Y", L"y"); } bool DutchStemmer::isVowel(wchar_t c) { switch (c) { case L'e': case L'a': case L'o': case L'i': case L'u': case L'y': case L'\x00e8': return true; default: return false; } } void DutchStemmer::setStemDictionary(MapStringString dict) { stemDict = dict; } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/reverse/000077500000000000000000000000001456444476200263555ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/reverse/ReverseStringFilter.cpp000066400000000000000000000036361456444476200330410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "ReverseStringFilter.h" #include "TermAttribute.h" namespace Lucene { const wchar_t ReverseStringFilter::NOMARKER = (wchar_t)0xffff; /// Example marker character: U+0001 (START OF HEADING) const wchar_t ReverseStringFilter::START_OF_HEADING_MARKER = (wchar_t)0x0001; /// Example marker character: U+001F (INFORMATION SEPARATOR ONE) const wchar_t ReverseStringFilter::INFORMATION_SEPARATOR_MARKER = (wchar_t)0x001f; /// Example marker character: U+EC00 (PRIVATE USE AREA: EC00) const wchar_t ReverseStringFilter::PUA_EC00_MARKER = (wchar_t)0xec00; /// Example marker character: U+200F (RIGHT-TO-LEFT MARK) const wchar_t ReverseStringFilter::RTL_DIRECTION_MARKER = (wchar_t)0x200f; ReverseStringFilter::ReverseStringFilter(const TokenStreamPtr& input) : TokenFilter(input) { this->marker = NOMARKER; termAtt = addAttribute(); } ReverseStringFilter::ReverseStringFilter(const TokenStreamPtr& input, wchar_t marker) : TokenFilter(input) { this->marker = marker; termAtt = addAttribute(); } ReverseStringFilter::~ReverseStringFilter() { } bool ReverseStringFilter::incrementToken() { if (input->incrementToken()) { int32_t len = termAtt->termLength(); if (marker != NOMARKER) { ++len; termAtt->resizeTermBuffer(len); termAtt->termBuffer()[len - 1] = marker; } CharArray term(termAtt->termBuffer()); std::reverse(term.get(), term.get() + len); termAtt->setTermLength(len); return true; } else { return false; } } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/ru/000077500000000000000000000000001456444476200253305ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/ru/RussianAnalyzer.cpp000066400000000000000000000155701456444476200311760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "RussianAnalyzer.h" #include "RussianLetterTokenizer.h" #include "LowerCaseFilter.h" #include "StopFilter.h" #include "RussianStemFilter.h" #include "StringUtils.h" namespace Lucene { /// Default Russian stopwords in UTF-8 format. const uint8_t RussianAnalyzer::DEFAULT_STOPWORD_FILE[] = { 0xd0, 0xb0, 0x0a, 0xd0, 0xb1, 0xd0, 0xb5, 0xd0, 0xb7, 0x0a, 0xd0, 0xb1, 0xd0, 0xbe, 0xd0, 0xbb, 0xd0, 0xb5, 0xd0, 0xb5, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd0, 0xbb, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd0, 0xbb, 0xd0, 0xb0, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd0, 0xbb, 0xd0, 0xb8, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd0, 0xbb, 0xd0, 0xbe, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd1, 0x82, 0xd1, 0x8c, 0x0a, 0xd0, 0xb2, 0x0a, 0xd0, 0xb2, 0xd0, 0xb0, 0xd0, 0xbc, 0x0a, 0xd0, 0xb2, 0xd0, 0xb0, 0xd1, 0x81, 0x0a, 0xd0, 0xb2, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x8c, 0x0a, 0xd0, 0xb2, 0xd0, 0xbe, 0x0a, 0xd0, 0xb2, 0xd0, 0xbe, 0xd1, 0x82, 0x0a, 0xd0, 0xb2, 0xd1, 0x81, 0xd0, 0xb5, 0x0a, 0xd0, 0xb2, 0xd1, 0x81, 0xd0, 0xb5, 0xd0, 0xb3, 0xd0, 0xbe, 0x0a, 0xd0, 0xb2, 0xd1, 0x81, 0xd0, 0xb5, 0xd1, 0x85, 0x0a, 0xd0, 0xb2, 0xd1, 0x8b, 0x0a, 0xd0, 0xb3, 0xd0, 0xb4, 0xd0, 0xb5, 0x0a, 0xd0, 0xb4, 0xd0, 0xb0, 0x0a, 0xd0, 0xb4, 0xd0, 0xb0, 0xd0, 0xb6, 0xd0, 0xb5, 0x0a, 0xd0, 0xb4, 0xd0, 0xbb, 0xd1, 0x8f, 0x0a, 0xd0, 0xb4, 0xd0, 0xbe, 0x0a, 0xd0, 0xb5, 0xd0, 0xb3, 0xd0, 0xbe, 0x0a, 0xd0, 0xb5, 0xd0, 0xb5, 0x0a, 0xd0, 0xb5, 0xd0, 0xb9, 0x0a, 0xd0, 0xb5, 0xd1, 0x8e, 0x0a, 0xd0, 0xb5, 0xd1, 0x81, 0xd0, 0xbb, 0xd0, 0xb8, 0x0a, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0xd1, 0x8c, 0x0a, 0xd0, 0xb5, 0xd1, 0x89, 0xd0, 0xb5, 0x0a, 0xd0, 0xb6, 0xd0, 0xb5, 0x0a, 0xd0, 0xb7, 0xd0, 0xb0, 0x0a, 0xd0, 0xb7, 0xd0, 0xb4, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x8c, 0x0a, 0xd0, 0xb8, 0x0a, 0xd0, 0xb8, 0xd0, 0xb7, 0x0a, 0xd0, 0xb8, 0xd0, 0xbb, 0xd0, 0xb8, 0x0a, 0xd0, 0xb8, 0xd0, 0xbc, 0x0a, 0xd0, 0xb8, 0xd1, 0x85, 0x0a, 0xd0, 0xba, 0x0a, 0xd0, 0xba, 0xd0, 0xb0, 0xd0, 0xba, 0x0a, 0xd0, 0xba, 0xd0, 0xbe, 0x0a, 0xd0, 0xba, 0xd0, 0xbe, 0xd0, 0xb3, 0xd0, 0xb4, 0xd0, 0xb0, 0x0a, 0xd0, 0xba, 0xd1, 0x82, 0xd0, 0xbe, 0x0a, 0xd0, 0xbb, 0xd0, 0xb8, 0x0a, 0xd0, 0xbb, 0xd0, 0xb8, 0xd0, 0xb1, 0xd0, 0xbe, 0x0a, 0xd0, 0xbc, 0xd0, 0xbd, 0xd0, 0xb5, 0x0a, 0xd0, 0xbc, 0xd0, 0xbe, 0xd0, 0xb6, 0xd0, 0xb5, 0xd1, 0x82, 0x0a, 0xd0, 0xbc, 0xd1, 0x8b, 0x0a, 0xd0, 0xbd, 0xd0, 0xb0, 0x0a, 0xd0, 0xbd, 0xd0, 0xb0, 0xd0, 0xb4, 0xd0, 0xbe, 0x0a, 0xd0, 0xbd, 0xd0, 0xb0, 0xd1, 0x88, 0x0a, 0xd0, 0xbd, 0xd0, 0xb5, 0x0a, 0xd0, 0xbd, 0xd0, 0xb5, 0xd0, 0xb3, 0xd0, 0xbe, 0x0a, 0xd0, 0xbd, 0xd0, 0xb5, 0xd0, 0xb5, 0x0a, 0xd0, 0xbd, 0xd0, 0xb5, 0xd1, 0x82, 0x0a, 0xd0, 0xbd, 0xd0, 0xb8, 0x0a, 0xd0, 0xbd, 0xd0, 0xb8, 0xd1, 0x85, 0x0a, 0xd0, 0xbd, 0xd0, 0xbe, 0x0a, 0xd0, 0xbd, 0xd1, 0x83, 0x0a, 0xd0, 0xbe, 0x0a, 0xd0, 0xbe, 0xd0, 0xb1, 0x0a, 0xd0, 0xbe, 0xd0, 0xb4, 0xd0, 0xbd, 0xd0, 0xb0, 0xd0, 0xba, 0xd0, 0xbe, 0x0a, 0xd0, 0xbe, 0xd0, 0xbd, 0x0a, 0xd0, 0xbe, 0xd0, 0xbd, 0xd0, 0xb0, 0x0a, 0xd0, 0xbe, 0xd0, 0xbd, 0xd0, 0xb8, 0x0a, 0xd0, 0xbe, 0xd0, 0xbd, 0xd0, 0xbe, 0x0a, 0xd0, 0xbe, 0xd1, 0x82, 0x0a, 0xd0, 0xbe, 0xd1, 0x87, 0xd0, 0xb5, 0xd0, 0xbd, 0xd1, 0x8c, 0x0a, 0xd0, 0xbf, 0xd0, 0xbe, 0x0a, 0xd0, 0xbf, 0xd0, 0xbe, 0xd0, 0xb4, 0x0a, 0xd0, 0xbf, 0xd1, 0x80, 0xd0, 0xb8, 0x0a, 0xd1, 0x81, 0x0a, 0xd1, 0x81, 0xd0, 0xbe, 0x0a, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xba, 0x0a, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xba, 0xd0, 0xb6, 0xd0, 0xb5, 0x0a, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xba, 0xd0, 0xbe, 0xd0, 0xb9, 0x0a, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xbc, 0x0a, 0xd1, 0x82, 0xd0, 0xb5, 0x0a, 0xd1, 0x82, 0xd0, 0xb5, 0xd0, 0xbc, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xb3, 0xd0, 0xbe, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xb6, 0xd0, 0xb5, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xb9, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xbb, 0xd1, 0x8c, 0xd0, 0xba, 0xd0, 0xbe, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xbc, 0x0a, 0xd1, 0x82, 0xd1, 0x8b, 0x0a, 0xd1, 0x83, 0x0a, 0xd1, 0x83, 0xd0, 0xb6, 0xd0, 0xb5, 0x0a, 0xd1, 0x85, 0xd0, 0xbe, 0xd1, 0x82, 0xd1, 0x8f, 0x0a, 0xd1, 0x87, 0xd0, 0xb5, 0xd0, 0xb3, 0xd0, 0xbe, 0x0a, 0xd1, 0x87, 0xd0, 0xb5, 0xd0, 0xb9, 0x0a, 0xd1, 0x87, 0xd0, 0xb5, 0xd0, 0xbc, 0x0a, 0xd1, 0x87, 0xd1, 0x82, 0xd0, 0xbe, 0x0a, 0xd1, 0x87, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xb1, 0xd1, 0x8b, 0x0a, 0xd1, 0x87, 0xd1, 0x8c, 0xd0, 0xb5, 0x0a, 0xd1, 0x87, 0xd1, 0x8c, 0xd1, 0x8f, 0x0a, 0xd1, 0x8d, 0xd1, 0x82, 0xd0, 0xb0, 0x0a, 0xd1, 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0x0a, 0xd1, 0x8d, 0xd1, 0x82, 0xd0, 0xbe, 0x0a, 0xd1, 0x8f, 0x0a }; RussianAnalyzer::RussianAnalyzer(LuceneVersion::Version matchVersion) { this->stopSet = getDefaultStopSet(); this->matchVersion = matchVersion; } RussianAnalyzer::RussianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { this->stopSet = stopwords; this->matchVersion = matchVersion; } RussianAnalyzer::~RussianAnalyzer() { } const HashSet RussianAnalyzer::getDefaultStopSet() { static HashSet stopSet; LUCENE_RUN_ONCE( String stopWords(UTF8_TO_STRING(DEFAULT_STOPWORD_FILE)); Collection words(StringUtils::split(stopWords, L"\n")); stopSet = HashSet::newInstance(words.begin(), words.end()); ); return stopSet; } TokenStreamPtr RussianAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { TokenStreamPtr result = newLucene(reader); result = newLucene(result); result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); result = newLucene(result); return result; } TokenStreamPtr RussianAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { RussianAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(reader); streams->result = newLucene(streams->source); streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); streams->result = newLucene(streams->result); setPreviousTokenStream(streams); } else { streams->source->reset(reader); } return streams->result; } RussianAnalyzerSavedStreams::~RussianAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/ru/RussianLetterTokenizer.cpp000066400000000000000000000020101456444476200325240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "RussianLetterTokenizer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { RussianLetterTokenizer::RussianLetterTokenizer(const ReaderPtr& input) : CharTokenizer(input) { } RussianLetterTokenizer::RussianLetterTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : CharTokenizer(source, input) { } RussianLetterTokenizer::RussianLetterTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : CharTokenizer(factory, input) { } RussianLetterTokenizer::~RussianLetterTokenizer() { } bool RussianLetterTokenizer::isTokenChar(wchar_t c) { return (UnicodeUtil::isAlpha(c) || UnicodeUtil::isDigit(c)); } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/ru/RussianLowerCaseFilter.cpp000066400000000000000000000020161456444476200324320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "RussianLowerCaseFilter.h" #include "TermAttribute.h" #include "CharFolder.h" namespace Lucene { RussianLowerCaseFilter::RussianLowerCaseFilter(const TokenStreamPtr& input) : TokenFilter(input) { termAtt = addAttribute(); } RussianLowerCaseFilter::~RussianLowerCaseFilter() { } bool RussianLowerCaseFilter::incrementToken() { if (input->incrementToken()) { wchar_t* buffer = termAtt->termBufferArray(); int32_t length = termAtt->termLength(); for (int32_t i = 0; i < length; ++i) { buffer[i] = CharFolder::toLower(buffer[i]); } return true; } else { return false; } } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/ru/RussianStemFilter.cpp000066400000000000000000000021611456444476200314570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "RussianStemFilter.h" #include "RussianStemmer.h" #include "TermAttribute.h" namespace Lucene { RussianStemFilter::RussianStemFilter(const TokenStreamPtr& input) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); } RussianStemFilter::~RussianStemFilter() { } bool RussianStemFilter::incrementToken() { if (input->incrementToken()) { String term(termAtt->term()); String s(stemmer->stem(term)); if (!s.empty() && s != term) { termAtt->setTermBuffer(s); } return true; } else { return false; } } void RussianStemFilter::setStemmer(const RussianStemmerPtr& stemmer) { if (stemmer) { this->stemmer = stemmer; } } } LucenePlusPlus-rel_3.0.9/src/contrib/analyzers/common/analysis/ru/RussianStemmer.cpp000066400000000000000000000445461456444476200310320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "RussianStemmer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { const wchar_t RussianStemmer::A = L'\x0430'; const wchar_t RussianStemmer::V = L'\x0432'; const wchar_t RussianStemmer::G = L'\x0433'; const wchar_t RussianStemmer::E = L'\x0435'; const wchar_t RussianStemmer::I = L'\x0438'; const wchar_t RussianStemmer::I_ = L'\x0439'; const wchar_t RussianStemmer::L = L'\x043b'; const wchar_t RussianStemmer::M = L'\x043c'; const wchar_t RussianStemmer::N = L'\x043d'; const wchar_t RussianStemmer::O = L'\x043e'; const wchar_t RussianStemmer::S = L'\x0441'; const wchar_t RussianStemmer::T = L'\x0442'; const wchar_t RussianStemmer::U = L'\x0443'; const wchar_t RussianStemmer::X = L'\x0445'; const wchar_t RussianStemmer::SH = L'\x0448'; const wchar_t RussianStemmer::SHCH = L'\x0449'; const wchar_t RussianStemmer::Y = L'\x044b'; const wchar_t RussianStemmer::SOFT = L'\x044c'; const wchar_t RussianStemmer::AE = L'\x044d'; const wchar_t RussianStemmer::IU = L'\x044e'; const wchar_t RussianStemmer::IA = L'\x044f'; const wchar_t RussianStemmer::vowels[] = {A, E, I, O, U, Y, AE, IU, IA}; RussianStemmer::RussianStemmer() { RV = 0; R1 = 0; R2 = 0; } RussianStemmer::~RussianStemmer() { } Collection RussianStemmer::perfectiveGerundEndings1() { static Collection _perfectiveGerundEndings1; LUCENE_RUN_ONCE( _perfectiveGerundEndings1 = Collection::newInstance(); _perfectiveGerundEndings1.add(String(L"") + V); _perfectiveGerundEndings1.add(String(L"") + V + SH + I); _perfectiveGerundEndings1.add(String(L"") + V + SH + I + S + SOFT); ); return _perfectiveGerundEndings1; } Collection RussianStemmer::perfectiveGerund1Predessors() { static Collection _perfectiveGerund1Predessors; LUCENE_RUN_ONCE( _perfectiveGerund1Predessors = Collection::newInstance(); _perfectiveGerund1Predessors.add(String(L"") + A); _perfectiveGerund1Predessors.add(String(L"") + IA); ); return _perfectiveGerund1Predessors; } Collection RussianStemmer::perfectiveGerundEndings2() { static Collection _perfectiveGerundEndings2; LUCENE_RUN_ONCE( _perfectiveGerundEndings2 = Collection::newInstance(); _perfectiveGerundEndings2.add(String(L"") + I + V); _perfectiveGerundEndings2.add(String(L"") + Y + V); _perfectiveGerundEndings2.add(String(L"") + I + V + SH + I); _perfectiveGerundEndings2.add(String(L"") + Y + V + SH + I); _perfectiveGerundEndings2.add(String(L"") + I + V + SH + I + S + SOFT); _perfectiveGerundEndings2.add(String(L"") + Y + V + SH + I + S + SOFT); ); return _perfectiveGerundEndings2; } Collection RussianStemmer::adjectiveEndings() { static Collection _adjectiveEndings; LUCENE_RUN_ONCE( _adjectiveEndings = Collection::newInstance(); _adjectiveEndings.add(String(L"") + E + E); _adjectiveEndings.add(String(L"") + I + E); _adjectiveEndings.add(String(L"") + Y + E); _adjectiveEndings.add(String(L"") + O + E); _adjectiveEndings.add(String(L"") + E + I_); _adjectiveEndings.add(String(L"") + I + I_); _adjectiveEndings.add(String(L"") + Y + I_); _adjectiveEndings.add(String(L"") + O + I_); _adjectiveEndings.add(String(L"") + E + M); _adjectiveEndings.add(String(L"") + I + M); _adjectiveEndings.add(String(L"") + Y + M); _adjectiveEndings.add(String(L"") + O + M); _adjectiveEndings.add(String(L"") + I + X); _adjectiveEndings.add(String(L"") + Y + X); _adjectiveEndings.add(String(L"") + U + IU); _adjectiveEndings.add(String(L"") + IU + IU); _adjectiveEndings.add(String(L"") + A + IA); _adjectiveEndings.add(String(L"") + IA + IA); _adjectiveEndings.add(String(L"") + O + IU); _adjectiveEndings.add(String(L"") + E + IU); _adjectiveEndings.add(String(L"") + I + M + I); _adjectiveEndings.add(String(L"") + Y + M + I); _adjectiveEndings.add(String(L"") + E + G + O); _adjectiveEndings.add(String(L"") + O + G + O); _adjectiveEndings.add(String(L"") + E + M + U); _adjectiveEndings.add(String(L"") + O + M + U); ); return _adjectiveEndings; } Collection RussianStemmer::participleEndings1() { static Collection _participleEndings1; LUCENE_RUN_ONCE( _participleEndings1 = Collection::newInstance(); _participleEndings1.add(String(L"") + SHCH); _participleEndings1.add(String(L"") + E + M); _participleEndings1.add(String(L"") + N + N); _participleEndings1.add(String(L"") + V + SH); _participleEndings1.add(String(L"") + IU + SHCH); ); return _participleEndings1; } Collection RussianStemmer::participleEndings2() { static Collection _participleEndings2; LUCENE_RUN_ONCE( _participleEndings2 = Collection::newInstance(); _participleEndings2.add(String(L"") + I + V + SH); _participleEndings2.add(String(L"") + Y + V + SH); _participleEndings2.add(String(L"") + U + IU + SHCH); ); return _participleEndings2; } Collection RussianStemmer::participle1Predessors() { static Collection _participle1Predessors; LUCENE_RUN_ONCE( _participle1Predessors = Collection::newInstance(); _participle1Predessors.add(String(L"") + A); _participle1Predessors.add(String(L"") + IA); ); return _participle1Predessors; } Collection RussianStemmer::reflexiveEndings() { static Collection _participle1Predessors; LUCENE_RUN_ONCE( _participle1Predessors = Collection::newInstance(); _participle1Predessors.add(String(L"") + S + IA); _participle1Predessors.add(String(L"") + S + SOFT); ); return _participle1Predessors; } Collection RussianStemmer::verbEndings1() { static Collection _verbEndings1; LUCENE_RUN_ONCE( _verbEndings1 = Collection::newInstance(); _verbEndings1.add(String(L"") + I_); _verbEndings1.add(String(L"") + L); _verbEndings1.add(String(L"") + N); _verbEndings1.add(String(L"") + L + O); _verbEndings1.add(String(L"") + N + O); _verbEndings1.add(String(L"") + E + T); _verbEndings1.add(String(L"") + IU + T); _verbEndings1.add(String(L"") + L + A); _verbEndings1.add(String(L"") + N + A); _verbEndings1.add(String(L"") + L + I); _verbEndings1.add(String(L"") + E + M); _verbEndings1.add(String(L"") + N + Y); _verbEndings1.add(String(L"") + E + T + E); _verbEndings1.add(String(L"") + I_ + T + E); _verbEndings1.add(String(L"") + T + SOFT); _verbEndings1.add(String(L"") + E + SH + SOFT); _verbEndings1.add(String(L"") + N + N + O); ); return _verbEndings1; } Collection RussianStemmer::verbEndings2() { static Collection _verbEndings2; LUCENE_RUN_ONCE( _verbEndings2 = Collection::newInstance(); _verbEndings2.add(String(L"") + IU); _verbEndings2.add(String(L"") + U + IU); _verbEndings2.add(String(L"") + E + N); _verbEndings2.add(String(L"") + E + I_); _verbEndings2.add(String(L"") + IA + T); _verbEndings2.add(String(L"") + U + I_); _verbEndings2.add(String(L"") + I + L); _verbEndings2.add(String(L"") + Y + L); _verbEndings2.add(String(L"") + I + M); _verbEndings2.add(String(L"") + Y + M); _verbEndings2.add(String(L"") + I + T); _verbEndings2.add(String(L"") + Y + T); _verbEndings2.add(String(L"") + I + L + A); _verbEndings2.add(String(L"") + Y + L + A); _verbEndings2.add(String(L"") + E + N + A); _verbEndings2.add(String(L"") + I + T + E); _verbEndings2.add(String(L"") + I + L + I); _verbEndings2.add(String(L"") + Y + L + I); _verbEndings2.add(String(L"") + I + L + O); _verbEndings2.add(String(L"") + Y + L + O); _verbEndings2.add(String(L"") + E + N + O); _verbEndings2.add(String(L"") + U + E + T); _verbEndings2.add(String(L"") + U + IU + T); _verbEndings2.add(String(L"") + E + N + Y); _verbEndings2.add(String(L"") + I + T + SOFT); _verbEndings2.add(String(L"") + Y + T + SOFT); _verbEndings2.add(String(L"") + I + SH + SOFT); _verbEndings2.add(String(L"") + E + I_ + T + E); _verbEndings2.add(String(L"") + U + I_ + T + E); ); return _verbEndings2; } Collection RussianStemmer::verb1Predessors() { static Collection _verb1Predessors; LUCENE_RUN_ONCE( _verb1Predessors = Collection::newInstance(); _verb1Predessors.add(String(L"") + A); _verb1Predessors.add(String(L"") + IA); ); return _verb1Predessors; } Collection RussianStemmer::nounEndings() { static Collection _nounEndings; LUCENE_RUN_ONCE( _nounEndings = Collection::newInstance(); _nounEndings.add(String(L"") + A); _nounEndings.add(String(L"") + U); _nounEndings.add(String(L"") + I_); _nounEndings.add(String(L"") + O); _nounEndings.add(String(L"") + U); _nounEndings.add(String(L"") + E); _nounEndings.add(String(L"") + Y); _nounEndings.add(String(L"") + I); _nounEndings.add(String(L"") + SOFT); _nounEndings.add(String(L"") + IA); _nounEndings.add(String(L"") + E + V); _nounEndings.add(String(L"") + O + V); _nounEndings.add(String(L"") + I + E); _nounEndings.add(String(L"") + SOFT + E); _nounEndings.add(String(L"") + IA + X); _nounEndings.add(String(L"") + I + IU); _nounEndings.add(String(L"") + E + I); _nounEndings.add(String(L"") + I + I); _nounEndings.add(String(L"") + E + I_); _nounEndings.add(String(L"") + O + I_); _nounEndings.add(String(L"") + E + M); _nounEndings.add(String(L"") + A + M); _nounEndings.add(String(L"") + O + M); _nounEndings.add(String(L"") + A + X); _nounEndings.add(String(L"") + SOFT + IU); _nounEndings.add(String(L"") + I + IA); _nounEndings.add(String(L"") + SOFT + IA); _nounEndings.add(String(L"") + I + I_); _nounEndings.add(String(L"") + IA + M); _nounEndings.add(String(L"") + IA + M + I); _nounEndings.add(String(L"") + A + M + I); _nounEndings.add(String(L"") + I + E + I_); _nounEndings.add(String(L"") + I + IA + M); _nounEndings.add(String(L"") + I + E + M); _nounEndings.add(String(L"") + I + IA + X); _nounEndings.add(String(L"") + I + IA + M + I); ); return _nounEndings; } Collection RussianStemmer::superlativeEndings() { static Collection _superlativeEndings; LUCENE_RUN_ONCE( _superlativeEndings = Collection::newInstance(); _superlativeEndings.add(String(L"") + E + I_ + SH); _superlativeEndings.add(String(L"") + E + I_ + SH + E); ); return _superlativeEndings; } Collection RussianStemmer::derivationalEndings() { static Collection _derivationalEndings; LUCENE_RUN_ONCE( _derivationalEndings = Collection::newInstance(); _derivationalEndings.add(String(L"") + O + S + T); _derivationalEndings.add(String(L"") + O + S + T + SOFT); ); return _derivationalEndings; } Collection RussianStemmer::doubleN() { static Collection _doubleN; LUCENE_RUN_ONCE( _doubleN = Collection::newInstance(); _doubleN.add(String(L"") + N + N); ); return _doubleN; } String RussianStemmer::stem(const String& input) { markPositions(input); if (RV == 0) { return input; // RV wasn't detected, nothing to stem } String stemmingZone(input.substr(RV)); // stemming goes on in RV // Step 1 if (!perfectiveGerund(stemmingZone)) { reflexive(stemmingZone); if (!adjectival(stemmingZone)) { if (!verb(stemmingZone)) { noun(stemmingZone); } } } // Step 2 removeI(stemmingZone); // Step 3 derivational(stemmingZone); // Step 4 superlative(stemmingZone); undoubleN(stemmingZone); removeSoft(stemmingZone); // return result return input.substr(0, RV) + stemmingZone; } String RussianStemmer::stemWord(const String& word) { return newLucene()->stem(word); } bool RussianStemmer::adjectival(String& stemmingZone) { // look for adjective ending in a stemming zone if (!findAndRemoveEnding(stemmingZone, adjectiveEndings())) { return false; } if (!findAndRemoveEnding(stemmingZone, participleEndings1(), participle1Predessors())) { findAndRemoveEnding(stemmingZone, participleEndings2()); } return true; } bool RussianStemmer::derivational(String& stemmingZone) { int32_t endingLength = findEnding(stemmingZone, derivationalEndings()); if (endingLength == 0) { return false; // no derivational ending found } else { // Ensure that the ending locates in R2 if (R2 - RV <= (int32_t)stemmingZone.length() - endingLength) { stemmingZone.resize(stemmingZone.length() - endingLength); return true; } else { return false; } } } int32_t RussianStemmer::findEnding(String& stemmingZone, int32_t startIndex, Collection theEndingClass) { bool match = false; for (int32_t i = theEndingClass.size() - 1; i >= 0; --i) { String theEnding(theEndingClass[i]); // check if the ending is bigger than stemming zone if (startIndex < (int32_t)theEnding.length() - 1) { match = false; continue; } match = true; int32_t stemmingIndex = startIndex; for (int32_t j = (int32_t)theEnding.length() - 1; j >= 0; --j) { if (stemmingZone[stemmingIndex--] != theEnding[j]) { match = false; break; } } // check if ending was found if (match) { return (int32_t)theEndingClass[i].size(); // cut ending } } return 0; } int32_t RussianStemmer::findEnding(String& stemmingZone, Collection theEndingClass) { return findEnding(stemmingZone, (int32_t)(stemmingZone.length() - 1), theEndingClass); } bool RussianStemmer::findAndRemoveEnding(String& stemmingZone, Collection theEndingClass) { int32_t endingLength = findEnding(stemmingZone, theEndingClass); if (endingLength == 0) { return false; // not found } else { stemmingZone.resize(stemmingZone.length() - endingLength); return true; // cut the ending found } } bool RussianStemmer::findAndRemoveEnding(String& stemmingZone, Collection theEndingClass, Collection thePredessors) { int32_t endingLength = findEnding(stemmingZone, theEndingClass); if (endingLength == 0) { return false; // not found } else { int32_t predessorLength = findEnding(stemmingZone, (int32_t)(stemmingZone.length() - endingLength - 1), thePredessors); if (predessorLength == 0) { return false; } else { stemmingZone.resize(stemmingZone.length() - endingLength); return true; // cut the ending found } } } void RussianStemmer::markPositions(const String& word) { RV = 0; R1 = 0; R2 = 0; int32_t i = 0; // find RV while ((int32_t)word.length() > i && !isVowel(word[i])) { ++i; } if ((int32_t)word.length() - 1 < ++i) { return; // RV zone is empty } RV = i; // find R1 while ((int32_t)word.length() > i && isVowel(word[i])) { ++i; } if ((int32_t)word.length() - 1 < ++i) { return; // R1 zone is empty } R1 = i; // find R2 while ((int32_t)word.length() > i && !isVowel(word[i])) { ++i; } if ((int32_t)word.length() - 1 < ++i) { return; // R2 zone is empty } while ((int32_t)word.length() > i && isVowel(word[i])) { ++i; } if ((int32_t)word.length() - 1 < ++i) { return; // R2 zone is empty } R2 = i; } bool RussianStemmer::isVowel(wchar_t letter) { for (int32_t i = 0; i < SIZEOF_ARRAY(vowels); ++i) { if (letter == vowels[i]) { return true; } } return false; } bool RussianStemmer::noun(String& stemmingZone) { return findAndRemoveEnding(stemmingZone, nounEndings()); } bool RussianStemmer::perfectiveGerund(String& stemmingZone) { return findAndRemoveEnding(stemmingZone, perfectiveGerundEndings1(), perfectiveGerund1Predessors()) || findAndRemoveEnding(stemmingZone, perfectiveGerundEndings2()); } bool RussianStemmer::reflexive(String& stemmingZone) { return findAndRemoveEnding(stemmingZone, reflexiveEndings()); } bool RussianStemmer::removeI(String& stemmingZone) { if ((int32_t)stemmingZone.length() > 0 && stemmingZone[stemmingZone.length() - 1] == I) { stemmingZone.resize(stemmingZone.length() - 1); return true; } else { return false; } } bool RussianStemmer::removeSoft(String& stemmingZone) { if ((int32_t)stemmingZone.length() > 0 && stemmingZone[stemmingZone.length() - 1] == SOFT) { stemmingZone.resize(stemmingZone.length() - 1); return true; } return false; } bool RussianStemmer::superlative(String& stemmingZone) { return findAndRemoveEnding(stemmingZone, superlativeEndings()); } bool RussianStemmer::undoubleN(String& stemmingZone) { if (findEnding(stemmingZone, doubleN()) != 0) { stemmingZone.resize(stemmingZone.length() - 1); return true; } else { return false; } } bool RussianStemmer::verb(String& stemmingZone) { return findAndRemoveEnding(stemmingZone, verbEndings1(), verb1Predessors()) || findAndRemoveEnding(stemmingZone, verbEndings2()); } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/000077500000000000000000000000001456444476200220555ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/DefaultEncoder.cpp000066400000000000000000000010411456444476200254410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "DefaultEncoder.h" namespace Lucene { DefaultEncoder::~DefaultEncoder() { } String DefaultEncoder::encodeText(const String& originalText) { return originalText; } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/Encoder.cpp000066400000000000000000000010411456444476200241340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "Encoder.h" namespace Lucene { Encoder::~Encoder() { } String Encoder::encodeText(const String& originalText) { BOOST_ASSERT(false); return L""; // override } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/Formatter.cpp000066400000000000000000000011151456444476200245220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "Formatter.h" namespace Lucene { Formatter::~Formatter() { } String Formatter::highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup) { BOOST_ASSERT(false); return L""; // override } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/Fragmenter.cpp000066400000000000000000000012321456444476200246510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "Fragmenter.h" namespace Lucene { Fragmenter::~Fragmenter() { } void Fragmenter::start(const String& originalText, const TokenStreamPtr& tokenStream) { BOOST_ASSERT(false); // override } bool Fragmenter::isNewFragment() { BOOST_ASSERT(false); return false; // override } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/GradientFormatter.cpp000066400000000000000000000115231456444476200262040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "GradientFormatter.h" #include "TokenGroup.h" #include "StringUtils.h" namespace Lucene { GradientFormatter::GradientFormatter(double maxScore, const String& minForegroundColor, const String& maxForegroundColor, const String& minBackgroundColor, const String& maxBackgroundColor) { highlightForeground = (!minForegroundColor.empty() && !maxForegroundColor.empty()); if (highlightForeground) { if (minForegroundColor.length() != 7) { boost::throw_exception(IllegalArgumentException(L"minForegroundColor is not 7 bytes long eg a hex RGB value such as #FFFFFF")); } if (maxForegroundColor.length() != 7) { boost::throw_exception(IllegalArgumentException(L"maxForegroundColor is not 7 bytes long eg a hex RGB value such as #FFFFFF")); } fgRMin = hexToInt(minForegroundColor.substr(1, 2)); fgGMin = hexToInt(minForegroundColor.substr(3, 2)); fgBMin = hexToInt(minForegroundColor.substr(5, 2)); fgRMax = hexToInt(maxForegroundColor.substr(1, 2)); fgGMax = hexToInt(maxForegroundColor.substr(3, 2)); fgBMax = hexToInt(maxForegroundColor.substr(5, 2)); } highlightBackground = (!minBackgroundColor.empty() && !maxBackgroundColor.empty()); if (highlightBackground) { if (minBackgroundColor.length() != 7) { boost::throw_exception(IllegalArgumentException(L"minBackgroundColor is not 7 bytes long eg a hex RGB value such as #FFFFFF")); } if (maxBackgroundColor.length() != 7) { boost::throw_exception(IllegalArgumentException(L"maxBackgroundColor is not 7 bytes long eg a hex RGB value such as #FFFFFF")); } bgRMin = hexToInt(minBackgroundColor.substr(1, 2)); bgGMin = hexToInt(minBackgroundColor.substr(3, 2)); bgBMin = hexToInt(minBackgroundColor.substr(5, 2)); bgRMax = hexToInt(maxBackgroundColor.substr(1, 2)); bgGMax = hexToInt(maxBackgroundColor.substr(3, 2)); bgBMax = hexToInt(maxBackgroundColor.substr(5, 2)); } this->maxScore = maxScore; } GradientFormatter::~GradientFormatter() { } String GradientFormatter::highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup) { if (tokenGroup->getTotalScore() == 0) { return originalText; } double score = tokenGroup->getTotalScore(); if (score == 0.0) { return originalText; } StringStream buffer; buffer << L"" << originalText << L""; return buffer.str(); } String GradientFormatter::getForegroundColorString(double score) { int32_t rVal = getColorVal(fgRMin, fgRMax, score); int32_t gVal = getColorVal(fgGMin, fgGMax, score); int32_t bVal = getColorVal(fgBMin, fgBMax, score); StringStream buffer; buffer << L"#" << intToHex(rVal) << intToHex(gVal) << intToHex(bVal); return buffer.str(); } String GradientFormatter::getBackgroundColorString(double score) { int32_t rVal = getColorVal(bgRMin, bgRMax, score); int32_t gVal = getColorVal(bgGMin, bgGMax, score); int32_t bVal = getColorVal(bgBMin, bgBMax, score); StringStream buffer; buffer << L"#" << intToHex(rVal) << intToHex(gVal) << intToHex(bVal); return buffer.str(); } int32_t GradientFormatter::getColorVal(int32_t colorMin, int32_t colorMax, double score) { if (colorMin == colorMax) { return colorMin; } double scale = std::abs((double)(colorMin - colorMax)); double relScorePercent = std::min(maxScore, score) / maxScore; double colScore = scale * relScorePercent; return std::min(colorMin, colorMax) + (int32_t)colScore; } String GradientFormatter::intToHex(int32_t i) { static const wchar_t* hexDigits = L"0123456789abcdef"; StringStream buffer; buffer << hexDigits[(i & 0xf0) >> 4] << hexDigits[i & 0x0f]; return buffer.str(); } int32_t GradientFormatter::hexToInt(const String& hex) { int32_t len = (int32_t)hex.length(); if (len > 16) { boost::throw_exception(NumberFormatException()); } int32_t l = 0; for (int32_t i = 0; i < len; ++i) { l <<= 4; int32_t c = (int32_t)StringUtils::toLong(hex.substr(i, 1), 16); if (c < 0) { boost::throw_exception(NumberFormatException()); } l |= c; } return l; } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/Highlighter.cpp000066400000000000000000000310561456444476200250240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "Highlighter.h" #include "HighlighterScorer.h" #include "SimpleHTMLFormatter.h" #include "DefaultEncoder.h" #include "Scorer.h" #include "TokenStream.h" #include "StringReader.h" #include "Analyzer.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "PositionIncrementAttribute.h" #include "TextFragment.h" #include "TokenGroup.h" #include "SimpleFragmenter.h" #include "StringUtils.h" namespace Lucene { const int32_t Highlighter::DEFAULT_MAX_CHARS_TO_ANALYZE = 50 * 1024; Highlighter::Highlighter(const HighlighterScorerPtr& fragmentScorer) { this->formatter = newLucene(); this->encoder = newLucene(); this->fragmentScorer = fragmentScorer; this->maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE; this->textFragmenter = newLucene(); } Highlighter::Highlighter(const FormatterPtr& formatter, const HighlighterScorerPtr& fragmentScorer) { this->formatter = formatter; this->encoder = newLucene(); this->fragmentScorer = fragmentScorer; this->maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE; this->textFragmenter = newLucene(); } Highlighter::Highlighter(const FormatterPtr& formatter, const EncoderPtr& encoder, const HighlighterScorerPtr& fragmentScorer) { this->formatter = formatter; this->encoder = encoder; this->fragmentScorer = fragmentScorer; this->maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE; this->textFragmenter = newLucene(); } Highlighter::~Highlighter() { } String Highlighter::getBestFragment(const AnalyzerPtr& analyzer, const String& fieldName, const String& text) { TokenStreamPtr tokenStream(analyzer->tokenStream(fieldName, newLucene(text))); return getBestFragment(tokenStream, text); } String Highlighter::getBestFragment(const TokenStreamPtr& tokenStream, const String& text) { Collection results(getBestFragments(tokenStream,text, 1)); return results.empty() ? L"" : results[0]; } Collection Highlighter::getBestFragments(const AnalyzerPtr& analyzer, const String& fieldName, const String& text, int32_t maxNumFragments) { TokenStreamPtr tokenStream(analyzer->tokenStream(fieldName, newLucene(text))); return getBestFragments(tokenStream, text, maxNumFragments); } Collection Highlighter::getBestFragments(const TokenStreamPtr& tokenStream, const String& text, int32_t maxNumFragments) { maxNumFragments = std::max((int32_t)1, maxNumFragments); //sanity check Collection frag(getBestTextFragments(tokenStream, text, true, maxNumFragments)); // Get text Collection fragTexts(Collection::newInstance()); for (int32_t i = 0; i < frag.size(); ++i) { if (frag[i] && frag[i]->getScore() > 0) { fragTexts.add(frag[i]->toString()); } } return fragTexts; } Collection Highlighter::getBestTextFragments(const TokenStreamPtr& tokenStream, const String& text, bool merge, int32_t maxNumFragments) { Collection docFrags(Collection::newInstance()); StringBufferPtr newText(newLucene()); TokenStreamPtr _tokenStream(tokenStream); TermAttributePtr termAtt(_tokenStream->addAttribute()); OffsetAttributePtr offsetAtt(_tokenStream->addAttribute()); _tokenStream->addAttribute(); _tokenStream->reset(); TextFragmentPtr currentFrag(newLucene(newText, newText->length(), docFrags.size())); TokenStreamPtr newStream(fragmentScorer->init(_tokenStream)); if (newStream) { _tokenStream = newStream; } fragmentScorer->startFragment(currentFrag); docFrags.add(currentFrag); FragmentQueuePtr fragQueue(newLucene(maxNumFragments)); Collection frag; LuceneException finally; try { textFragmenter->start(text, _tokenStream); TokenGroupPtr tokenGroup(newLucene(_tokenStream)); String tokenText; int32_t startOffset = 0; int32_t endOffset = 0; int32_t lastEndOffset = 0; for (bool next = _tokenStream->incrementToken(); next && offsetAtt->startOffset() < maxDocCharsToAnalyze; next = _tokenStream->incrementToken()) { if (offsetAtt->endOffset() > (int32_t)text.length() || offsetAtt->startOffset() > (int32_t)text.length()) { boost::throw_exception(RuntimeException(L"InvalidTokenOffsets: Token " + termAtt->term() + L" exceeds length of provided text sized " + StringUtils::toString(text.length()))); } if (tokenGroup->numTokens > 0 && tokenGroup->isDistinct()) { // the current token is distinct from previous tokens - markup the cached token group info startOffset = tokenGroup->matchStartOffset; endOffset = tokenGroup->matchEndOffset; tokenText = text.substr(startOffset, endOffset - startOffset); String markedUpText(formatter->highlightTerm(encoder->encodeText(tokenText), tokenGroup)); // store any whitespace etc from between this and last group if (startOffset > lastEndOffset) { newText->append(encoder->encodeText(text.substr(lastEndOffset, startOffset - lastEndOffset))); } newText->append(markedUpText); lastEndOffset = std::max(endOffset, lastEndOffset); tokenGroup->clear(); // check if current token marks the start of a new fragment if (textFragmenter->isNewFragment()) { currentFrag->setScore(fragmentScorer->getFragmentScore()); // record stats for a new fragment currentFrag->textEndPos = newText->length(); currentFrag = newLucene(newText, newText->length(), docFrags.size()); fragmentScorer->startFragment(currentFrag); docFrags.add(currentFrag); } } tokenGroup->addToken(fragmentScorer->getTokenScore()); } currentFrag->setScore(fragmentScorer->getFragmentScore()); if (tokenGroup->numTokens > 0) { // flush the accumulated text (same code as in above loop) startOffset = tokenGroup->matchStartOffset; endOffset = tokenGroup->matchEndOffset; tokenText = text.substr(startOffset, endOffset - startOffset); String markedUpText(formatter->highlightTerm(encoder->encodeText(tokenText), tokenGroup)); // store any whitespace etc from between this and last group if (startOffset > lastEndOffset) { newText->append(encoder->encodeText(text.substr(lastEndOffset, startOffset - lastEndOffset))); } newText->append(markedUpText); lastEndOffset = std::max(lastEndOffset, endOffset); } // Test what remains of the original text beyond the point where we stopped analyzing if (lastEndOffset < (int32_t)text.length() && (int32_t)text.length() <= maxDocCharsToAnalyze) { // append it to the last fragment newText->append(encoder->encodeText(text.substr(lastEndOffset))); } currentFrag->textEndPos = newText->length(); // sort the most relevant sections of the text for (Collection::iterator i = docFrags.begin(); i != docFrags.end(); ++i) { fragQueue->addOverflow(*i); } // return the most relevant fragments frag = Collection::newInstance(fragQueue->size()); for (int32_t i = frag.size() - 1; i >= 0; --i) { frag[i] = fragQueue->pop(); } // merge any contiguous fragments to improve readability if (merge) { mergeContiguousFragments(frag); Collection fragTexts(Collection::newInstance()); for (int32_t i = 0; i < frag.size(); ++i) { if (frag[i] && frag[i]->getScore() > 0) { fragTexts.add(frag[i]); } } frag = fragTexts; } } catch (LuceneException& e) { finally = e; } if (_tokenStream) { try { _tokenStream->close(); } catch (...) { } } finally.throwException(); return frag; } void Highlighter::mergeContiguousFragments(Collection frag) { if (frag.size() > 1) { bool mergingStillBeingDone = false; do { mergingStillBeingDone = false; // initialise loop control flag // for each fragment, scan other frags looking for contiguous blocks for (int32_t i = 0; i < frag.size(); ++i) { if (!frag[i]) { continue; } // merge any contiguous blocks for (int32_t x = 0; x < frag.size(); ++x) { if (!frag[x]) { continue; } if (!frag[i]) { break; } TextFragmentPtr frag1; TextFragmentPtr frag2; int32_t frag1Num = 0; int32_t frag2Num = 0; int32_t bestScoringFragNum = 0; int32_t worstScoringFragNum = 0; // if blocks are contiguous if (frag[i]->follows(frag[x])) { frag1 = frag[x]; frag1Num = x; frag2 = frag[i]; frag2Num = i; } else if (frag[x]->follows(frag[i])) { frag1 = frag[i]; frag1Num = i; frag2 = frag[x]; frag2Num = x; } // merging required if (frag1) { if (frag1->getScore() > frag2->getScore()) { bestScoringFragNum = frag1Num; worstScoringFragNum = frag2Num; } else { bestScoringFragNum = frag2Num; worstScoringFragNum = frag1Num; } frag1->merge(frag2); frag[worstScoringFragNum].reset(); mergingStillBeingDone = true; frag[bestScoringFragNum] = frag1; } } } } while (mergingStillBeingDone); } } String Highlighter::getBestFragments(const TokenStreamPtr& tokenStream, const String& text, int32_t maxNumFragments, const String& separator) { Collection sections(getBestFragments(tokenStream, text, maxNumFragments)); StringStream result; for (int32_t i = 0; i < sections.size(); ++i) { if (i > 0) { result << separator; } result << sections[i]; } return result.str(); } int32_t Highlighter::getMaxDocCharsToAnalyze() { return maxDocCharsToAnalyze; } void Highlighter::setMaxDocCharsToAnalyze(int32_t maxDocCharsToAnalyze) { this->maxDocCharsToAnalyze = maxDocCharsToAnalyze; } FragmenterPtr Highlighter::getTextFragmenter() { return textFragmenter; } void Highlighter::setTextFragmenter(const FragmenterPtr& fragmenter) { textFragmenter = fragmenter; } HighlighterScorerPtr Highlighter::getFragmentScorer() { return fragmentScorer; } void Highlighter::setFragmentScorer(const HighlighterScorerPtr& scorer) { fragmentScorer = scorer; } EncoderPtr Highlighter::getEncoder() { return encoder; } void Highlighter::setEncoder(const EncoderPtr& encoder) { this->encoder = encoder; } FragmentQueue::FragmentQueue(int32_t size) : PriorityQueue(size) { } FragmentQueue::~FragmentQueue() { } bool FragmentQueue::lessThan(const TextFragmentPtr& first, const TextFragmentPtr& second) { if (first->getScore() == second->getScore()) { return first->fragNum > second->fragNum; } else { return first->getScore() < second->getScore(); } } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/HighlighterScorer.cpp000066400000000000000000000016361456444476200262030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "HighlighterScorer.h" namespace Lucene { HighlighterScorer::~HighlighterScorer() { } TokenStreamPtr HighlighterScorer::init(const TokenStreamPtr& tokenStream) { BOOST_ASSERT(false); return TokenStreamPtr(); // override } void HighlighterScorer::startFragment(const TextFragmentPtr& newFragment) { BOOST_ASSERT(false); // override } double HighlighterScorer::getTokenScore() { BOOST_ASSERT(false); return 0; // override } double HighlighterScorer::getFragmentScore() { BOOST_ASSERT(false); return 0; // override } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/MapWeightedSpanTerm.cpp000066400000000000000000000020001456444476200264210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "MapWeightedSpanTerm.h" namespace Lucene { MapWeightedSpanTerm::MapWeightedSpanTerm() { map = MapStringWeightedSpanTerm::newInstance(); } MapWeightedSpanTerm::~MapWeightedSpanTerm() { } MapStringWeightedSpanTerm::iterator MapWeightedSpanTerm::begin() { return map.begin(); } MapStringWeightedSpanTerm::iterator MapWeightedSpanTerm::end() { return map.end(); } void MapWeightedSpanTerm::put(const String& key, const WeightedSpanTermPtr& val) { return map.put(key, val); } WeightedSpanTermPtr MapWeightedSpanTerm::get(const String& key) const { return map.get(key); } void MapWeightedSpanTerm::clear() { map.clear(); } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/NullFragmenter.cpp000066400000000000000000000011401456444476200255020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "NullFragmenter.h" namespace Lucene { NullFragmenter::~NullFragmenter() { } void NullFragmenter::start(const String& originalText, const TokenStreamPtr& tokenStream) { } bool NullFragmenter::isNewFragment() { return false; } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/QueryScorer.cpp000066400000000000000000000115151456444476200250470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "QueryScorer.h" #include "WeightedSpanTerm.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" #include "TokenStream.h" #include "MapWeightedSpanTerm.h" #include "WeightedSpanTermExtractor.h" namespace Lucene { QueryScorer::QueryScorer(const QueryPtr& query) { init(query, L"", IndexReaderPtr(), true); } QueryScorer::QueryScorer(const QueryPtr& query, const String& field) { init(query, field, IndexReaderPtr(), true); } QueryScorer::QueryScorer(const QueryPtr& query, const IndexReaderPtr& reader, const String& field) { init(query, field, reader, true); } QueryScorer::QueryScorer(const QueryPtr& query, const IndexReaderPtr& reader, const String& field, const String& defaultField) { this->defaultField = defaultField; init(query, field, reader, true); } QueryScorer::QueryScorer(const QueryPtr& query, const String& field, const String& defaultField) { this->defaultField = defaultField; init(query, field, IndexReaderPtr(), true); } QueryScorer::QueryScorer(Collection weightedTerms) { init(QueryPtr(), L"", IndexReaderPtr(), true); this->fieldWeightedSpanTerms = newLucene(); for (int32_t i = 0; i < weightedTerms.size(); ++i) { WeightedSpanTermPtr existingTerm(fieldWeightedSpanTerms->get(weightedTerms[i]->term)); if (!existingTerm || existingTerm->weight < weightedTerms[i]->weight) { // if a term is defined more than once, always use the highest scoring weight fieldWeightedSpanTerms->put(weightedTerms[i]->term, weightedTerms[i]); maxTermWeight = std::max(maxTermWeight, weightedTerms[i]->getWeight()); } } skipInitExtractor = true; } QueryScorer::~QueryScorer() { } void QueryScorer::init(const QueryPtr& query, const String& field, const IndexReaderPtr& reader, bool expandMultiTermQuery) { this->totalScore = 0; this->maxTermWeight = 0; this->position = -1; this->skipInitExtractor = false; this->wrapToCaching = true; this->reader = reader; this->expandMultiTermQuery = expandMultiTermQuery; this->query = query; this->field = field; } double QueryScorer::getFragmentScore() { return totalScore; } double QueryScorer::getMaxTermWeight() { return maxTermWeight; } double QueryScorer::getTokenScore() { position += posIncAtt->getPositionIncrement(); String termText(termAtt->term()); WeightedSpanTermPtr weightedSpanTerm(fieldWeightedSpanTerms->get(termText)); if (!weightedSpanTerm) { return 0.0; } if (weightedSpanTerm->positionSensitive && !weightedSpanTerm->checkPosition(position)) { return 0.0; } double score = weightedSpanTerm->getWeight(); // found a query term - is it unique in this doc? if (!foundTerms.contains(termText)) { totalScore += score; foundTerms.add(termText); } return score; } TokenStreamPtr QueryScorer::init(const TokenStreamPtr& tokenStream) { position = -1; termAtt = tokenStream->addAttribute(); posIncAtt = tokenStream->addAttribute(); if (!skipInitExtractor) { if (fieldWeightedSpanTerms) { fieldWeightedSpanTerms->clear(); } return initExtractor(tokenStream); } return TokenStreamPtr(); } WeightedSpanTermPtr QueryScorer::getWeightedSpanTerm(const String& token) { return fieldWeightedSpanTerms->get(token); } TokenStreamPtr QueryScorer::initExtractor(const TokenStreamPtr& tokenStream) { WeightedSpanTermExtractorPtr qse(newLucene(defaultField)); qse->setExpandMultiTermQuery(expandMultiTermQuery); qse->setWrapIfNotCachingTokenFilter(wrapToCaching); if (!reader) { this->fieldWeightedSpanTerms = qse->getWeightedSpanTerms(query, tokenStream, field); } else { this->fieldWeightedSpanTerms = qse->getWeightedSpanTermsWithScores(query, tokenStream, field, reader); } if (qse->isCachedTokenStream()) { return qse->getTokenStream(); } return TokenStreamPtr(); } void QueryScorer::startFragment(const TextFragmentPtr& newFragment) { foundTerms = HashSet::newInstance(); totalScore = 0; } bool QueryScorer::isExpandMultiTermQuery() { return expandMultiTermQuery; } void QueryScorer::setExpandMultiTermQuery(bool expandMultiTermQuery) { this->expandMultiTermQuery = expandMultiTermQuery; } void QueryScorer::setWrapIfNotCachingTokenFilter(bool wrap) { this->wrapToCaching = wrap; } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/QueryTermExtractor.cpp000066400000000000000000000075031456444476200264170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "QueryTermExtractor.h" #include "Term.h" #include "BooleanQuery.h" #include "BooleanClause.h" #include "FilteredQuery.h" #include "WeightedTerm.h" #include "IndexReader.h" #include "MiscUtils.h" namespace Lucene { QueryTermExtractor::~QueryTermExtractor() { } Collection QueryTermExtractor::getTerms(const QueryPtr& query) { return getTerms(query, false); } Collection QueryTermExtractor::getIdfWeightedTerms(const QueryPtr& query, const IndexReaderPtr& reader, const String& fieldName) { Collection terms(getTerms(query, false, fieldName)); int32_t totalNumDocs = reader->numDocs(); for (int32_t i = 0; i < terms.size(); ++i) { try { int32_t docFreq = reader->docFreq(newLucene(fieldName, terms[i]->term)); // docFreq counts deletes if (totalNumDocs < docFreq) { docFreq = totalNumDocs; } // IDF algorithm taken from DefaultSimilarity class double idf = (double)(std::log((double)totalNumDocs / (double)(docFreq + 1)) + 1.0); terms[i]->weight *= idf; } catch (...) { // ignore } } return terms; } Collection QueryTermExtractor::getTerms(const QueryPtr& query, bool prohibited, const String& fieldName) { SetWeightedTerm terms(SetWeightedTerm::newInstance()); getTerms(query, terms, prohibited, fieldName); return Collection::newInstance(terms.begin(), terms.end()); } Collection QueryTermExtractor::getTerms(const QueryPtr& query, bool prohibited) { SetWeightedTerm terms(SetWeightedTerm::newInstance()); getTerms(query, terms, prohibited, L""); return Collection::newInstance(terms.begin(), terms.end()); } void QueryTermExtractor::getTerms(const QueryPtr& query, SetWeightedTerm terms, bool prohibited, const String& fieldName) { try { if (MiscUtils::typeOf(query)) { getTermsFromBooleanQuery(boost::dynamic_pointer_cast(query), terms, prohibited, fieldName); } else if (MiscUtils::typeOf(query)) { getTermsFromFilteredQuery(boost::dynamic_pointer_cast(query), terms, prohibited, fieldName); } else { SetTerm nonWeightedTerms(SetTerm::newInstance()); query->extractTerms(nonWeightedTerms); for (SetTerm::iterator term = nonWeightedTerms.begin(); term != nonWeightedTerms.end(); ++term) { if (fieldName.empty() || (*term)->field() == fieldName) { terms.add(newLucene(query->getBoost(), (*term)->text())); } } } } catch (UnsupportedOperationException&) { // this is non-fatal for our purposes } } void QueryTermExtractor::getTermsFromBooleanQuery(const BooleanQueryPtr& query, SetWeightedTerm terms, bool prohibited, const String& fieldName) { Collection queryClauses(query->getClauses()); for (int32_t i = 0; i < queryClauses.size(); ++i) { if (prohibited || queryClauses[i]->getOccur() != BooleanClause::MUST_NOT) { getTerms(queryClauses[i]->getQuery(), terms, prohibited, fieldName); } } } void QueryTermExtractor::getTermsFromFilteredQuery(const FilteredQueryPtr& query, SetWeightedTerm terms, bool prohibited, const String& fieldName) { getTerms(query->getQuery(), terms, prohibited, fieldName); } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/QueryTermScorer.cpp000066400000000000000000000056341456444476200257040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "QueryTermScorer.h" #include "QueryTermExtractor.h" #include "TermAttribute.h" #include "WeightedTerm.h" #include "TokenStream.h" namespace Lucene { QueryTermScorer::QueryTermScorer(const QueryPtr& query) { ConstructQueryTermScorer(QueryTermExtractor::getTerms(query)); } QueryTermScorer::QueryTermScorer(const QueryPtr& query, const String& fieldName) { ConstructQueryTermScorer(QueryTermExtractor::getTerms(query, false, fieldName)); } QueryTermScorer::QueryTermScorer(const QueryPtr& query, const IndexReaderPtr& reader, const String& fieldName) { ConstructQueryTermScorer(QueryTermExtractor::getIdfWeightedTerms(query, reader, fieldName)); } QueryTermScorer::QueryTermScorer(Collection weightedTerms) { ConstructQueryTermScorer(weightedTerms); } QueryTermScorer::~QueryTermScorer() { } void QueryTermScorer::ConstructQueryTermScorer(Collection weightedTerms) { totalScore = 0; maxTermWeight = 0; termsToFind = MapStringWeightedTerm::newInstance(); for (int32_t i = 0; i < weightedTerms.size(); ++i) { WeightedTermPtr existingTerm(termsToFind.get(weightedTerms[i]->term)); if (!existingTerm || existingTerm->weight < weightedTerms[i]->weight) { // if a term is defined more than once, always use the highest scoring weight termsToFind.put(weightedTerms[i]->term, weightedTerms[i]); maxTermWeight = std::max(maxTermWeight, weightedTerms[i]->getWeight()); } } } TokenStreamPtr QueryTermScorer::init(const TokenStreamPtr& tokenStream) { termAtt = tokenStream->addAttribute(); return TokenStreamPtr(); } void QueryTermScorer::startFragment(const TextFragmentPtr& newFragment) { uniqueTermsInFragment = HashSet::newInstance(); currentTextFragment = newFragment; totalScore = 0; } double QueryTermScorer::getTokenScore() { String termText(termAtt->term()); WeightedTermPtr queryTerm(termsToFind.get(termText)); if (!queryTerm) { return 0.0; // not a query term - return } // found a query term - is it unique in this doc? if (!uniqueTermsInFragment.contains(termText)) { totalScore += queryTerm->getWeight();; uniqueTermsInFragment.add(termText); } return queryTerm->getWeight(); } double QueryTermScorer::getFragmentScore() { return totalScore; } void QueryTermScorer::allFragmentsProcessed() { // this class has no special operations to perform at end of processing } double QueryTermScorer::getMaxTermWeight() { return maxTermWeight; } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/SimpleFragmenter.cpp000066400000000000000000000025641456444476200260340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "SimpleFragmenter.h" #include "TokenGroup.h" #include "OffsetAttribute.h" #include "TokenStream.h" namespace Lucene { const int32_t SimpleFragmenter::DEFAULT_FRAGMENT_SIZE = 100; SimpleFragmenter::SimpleFragmenter() { this->currentNumFrags = 0; this->fragmentSize = DEFAULT_FRAGMENT_SIZE; } SimpleFragmenter::SimpleFragmenter(int32_t fragmentSize) { this->currentNumFrags = 0; this->fragmentSize = fragmentSize; } SimpleFragmenter::~SimpleFragmenter() { } void SimpleFragmenter::start(const String& originalText, const TokenStreamPtr& tokenStream) { offsetAtt = tokenStream->addAttribute(); currentNumFrags = 1; } bool SimpleFragmenter::isNewFragment() { bool isNewFrag = (offsetAtt->endOffset() >= (fragmentSize * currentNumFrags)); if (isNewFrag) { ++currentNumFrags; } return isNewFrag; } int32_t SimpleFragmenter::getFragmentSize() { return fragmentSize; } void SimpleFragmenter::setFragmentSize(int32_t size) { fragmentSize = size; } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/SimpleHTMLEncoder.cpp000066400000000000000000000025231456444476200260010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "SimpleHTMLEncoder.h" namespace Lucene { SimpleHTMLEncoder::~SimpleHTMLEncoder() { } String SimpleHTMLEncoder::encodeText(const String& originalText) { return htmlEncode(originalText); } String SimpleHTMLEncoder::htmlEncode(const String& plainText) { if (plainText.empty()) { return L""; } StringStream result; for (int32_t index = 0; index < (int32_t)plainText.length(); ++index) { wchar_t ch = plainText[index]; switch (ch) { case L'\"': result << L"""; break; case L'&': result << L"&"; break; case L'<': result << L"<"; break; case L'>': result << L">"; break; default: if (ch < 128) { result << ch; } else { result << L"&#" << (int32_t)ch << L";"; } break; } } return result.str(); } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/SimpleHTMLFormatter.cpp000066400000000000000000000022221456444476200263610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "SimpleHTMLFormatter.h" #include "TokenGroup.h" namespace Lucene { const String SimpleHTMLFormatter::DEFAULT_PRE_TAG = L""; const String SimpleHTMLFormatter::DEFAULT_POST_TAG = L""; SimpleHTMLFormatter::SimpleHTMLFormatter() { this->preTag = DEFAULT_PRE_TAG; this->postTag = DEFAULT_POST_TAG; } SimpleHTMLFormatter::SimpleHTMLFormatter(const String& preTag, const String& postTag) { this->preTag = preTag; this->postTag = postTag; } SimpleHTMLFormatter::~SimpleHTMLFormatter() { } String SimpleHTMLFormatter::highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup) { if (tokenGroup->getTotalScore() == 0) { return originalText; } StringStream buffer; buffer << preTag << originalText << postTag; return buffer.str(); } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/SimpleSpanFragmenter.cpp000066400000000000000000000050021456444476200266440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "SimpleSpanFragmenter.h" #include "WeightedSpanTerm.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" #include "OffsetAttribute.h" #include "QueryScorer.h" #include "TokenStream.h" #include "MiscUtils.h" namespace Lucene { const int32_t SimpleSpanFragmenter::DEFAULT_FRAGMENT_SIZE = 100; SimpleSpanFragmenter::SimpleSpanFragmenter(const QueryScorerPtr& queryScorer) { this->currentNumFrags = 0; this->position = -1; this->waitForPos = -1; this->textSize = 0; this->queryScorer = queryScorer; this->fragmentSize = DEFAULT_FRAGMENT_SIZE; } SimpleSpanFragmenter::SimpleSpanFragmenter(const QueryScorerPtr& queryScorer, int32_t fragmentSize) { this->currentNumFrags = 0; this->position = -1; this->waitForPos = -1; this->textSize = 0; this->queryScorer = queryScorer; this->fragmentSize = fragmentSize; } SimpleSpanFragmenter::~SimpleSpanFragmenter() { } bool SimpleSpanFragmenter::isNewFragment() { position += posIncAtt->getPositionIncrement(); if (waitForPos == position) { waitForPos = -1; } else if (waitForPos != -1) { return false; } WeightedSpanTermPtr wSpanTerm(queryScorer->getWeightedSpanTerm(termAtt->term())); if (wSpanTerm) { Collection positionSpans(wSpanTerm->getPositionSpans()); for (int32_t i = 0; i < positionSpans.size(); ++i) { if (positionSpans[i]->start == position) { waitForPos = positionSpans[i]->end + 1; break; } } } bool isNewFrag = (offsetAtt->endOffset() >= (fragmentSize * currentNumFrags) && (textSize - offsetAtt->endOffset()) >= MiscUtils::unsignedShift(fragmentSize, 1)); if (isNewFrag) { ++currentNumFrags; } return isNewFrag; } void SimpleSpanFragmenter::start(const String& originalText, const TokenStreamPtr& tokenStream) { position = -1; currentNumFrags = 1; textSize = originalText.length(); termAtt = tokenStream->addAttribute(); posIncAtt = tokenStream->addAttribute(); offsetAtt = tokenStream->addAttribute(); } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/SpanGradientFormatter.cpp000066400000000000000000000027011456444476200270240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "SpanGradientFormatter.h" #include "TokenGroup.h" namespace Lucene { SpanGradientFormatter::SpanGradientFormatter(double maxScore, const String& minForegroundColor, const String& maxForegroundColor, const String& minBackgroundColor, const String& maxBackgroundColor) : GradientFormatter(maxScore, minForegroundColor, maxForegroundColor, minBackgroundColor, maxBackgroundColor) { } SpanGradientFormatter::~SpanGradientFormatter() { } String SpanGradientFormatter::highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup) { if (tokenGroup->getTotalScore() == 0) { return originalText; } double score = tokenGroup->getTotalScore(); if (score == 0.0) { return originalText; } StringStream buffer; buffer << L"" << originalText << L""; return buffer.str(); } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/TextFragment.cpp000066400000000000000000000030321456444476200251670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "TextFragment.h" namespace Lucene { TextFragment::TextFragment(const StringBufferPtr& markedUpText, int32_t textStartPos, int32_t fragNum) { this->markedUpText = markedUpText; this->textStartPos = textStartPos; this->textEndPos = 0; this->fragNum = fragNum; this->score = 0; } TextFragment::~TextFragment() { } void TextFragment::setScore(double score) { this->score = score; } double TextFragment::getScore() { return score; } void TextFragment::merge(const TextFragmentPtr& frag2) { textEndPos = frag2->textEndPos; score = std::max(score, frag2->score); } bool TextFragment::follows(const TextFragmentPtr& fragment) { return (textStartPos == fragment->textEndPos); } int32_t TextFragment::getFragNum() { return fragNum; } String TextFragment::toString() { return markedUpText->toString().substr(textStartPos, textEndPos - textStartPos); } StringBuffer::~StringBuffer() { } int32_t StringBuffer::length() { return buffer.str().length(); } String StringBuffer::toString() { return buffer.str(); } void StringBuffer::append(const String& str) { buffer << str; } void StringBuffer::clear() { buffer.str(L""); } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/TokenGroup.cpp000066400000000000000000000054341456444476200246640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "TokenGroup.h" #include "OffsetAttribute.h" #include "TermAttribute.h" #include "TokenStream.h" #include "Token.h" namespace Lucene { const int32_t TokenGroup::MAX_NUM_TOKENS_PER_GROUP = 50; TokenGroup::TokenGroup(const TokenStreamPtr& tokenStream) { offsetAtt = tokenStream->addAttribute(); termAtt = tokenStream->addAttribute(); tokens = Collection::newInstance(MAX_NUM_TOKENS_PER_GROUP); scores = Collection::newInstance(MAX_NUM_TOKENS_PER_GROUP); numTokens = 0; startOffset = 0; endOffset = 0; tot = 0.0; matchStartOffset = 0; matchEndOffset = 0; } TokenGroup::~TokenGroup() { } void TokenGroup::addToken(double score) { if (numTokens < MAX_NUM_TOKENS_PER_GROUP) { int32_t termStartOffset = offsetAtt->startOffset(); int32_t termEndOffset = offsetAtt->endOffset(); if (numTokens == 0) { matchStartOffset = termStartOffset; startOffset = termStartOffset; matchEndOffset = termEndOffset; endOffset = termEndOffset; tot += score; } else { startOffset = std::min(startOffset, termStartOffset); endOffset = std::max(endOffset, termEndOffset); if (score > 0) { if (tot == 0) { matchStartOffset = offsetAtt->startOffset(); matchEndOffset = offsetAtt->endOffset(); } else { matchStartOffset = std::min(matchStartOffset, termStartOffset); matchEndOffset = std::max(matchEndOffset, termEndOffset); } tot += score; } } TokenPtr token(newLucene(termStartOffset, termEndOffset)); token->setTermBuffer(termAtt->term()); tokens[numTokens] = token; scores[numTokens] = score; ++numTokens; } } bool TokenGroup::isDistinct() { return (offsetAtt->startOffset() >= endOffset); } void TokenGroup::clear() { numTokens = 0; tot = 0; } TokenPtr TokenGroup::getToken(int32_t index) { return tokens[index]; } double TokenGroup::getScore(int32_t index) { return scores[index]; } int32_t TokenGroup::getEndOffset() { return endOffset; } int32_t TokenGroup::getNumTokens() { return numTokens; } int32_t TokenGroup::getStartOffset() { return startOffset; } double TokenGroup::getTotalScore() { return tot; } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/TokenSources.cpp000066400000000000000000000156661456444476200252230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "TokenSources.h" #include "IndexReader.h" #include "Document.h" #include "Analyzer.h" #include "TokenStream.h" #include "TermFreqVector.h" #include "TermPositionVector.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "TermVectorOffsetInfo.h" #include "Token.h" #include "StringReader.h" #include "StringUtils.h" namespace Lucene { TokenSources::~TokenSources() { } TokenStreamPtr TokenSources::getAnyTokenStream(const IndexReaderPtr& reader, int32_t docId, const String& field, const DocumentPtr& doc, const AnalyzerPtr& analyzer) { TokenStreamPtr ts; TermFreqVectorPtr tfv(reader->getTermFreqVector(docId, field)); if (tfv) { if (boost::dynamic_pointer_cast(tfv)) { ts = getTokenStream(boost::dynamic_pointer_cast(tfv)); } } // No token info stored so fall back to analyzing raw content if (!ts) { ts = getTokenStream(doc, field, analyzer); } return ts; } TokenStreamPtr TokenSources::getAnyTokenStream(const IndexReaderPtr& reader, int32_t docId, const String& field, const AnalyzerPtr& analyzer) { TokenStreamPtr ts; TermFreqVectorPtr tfv(reader->getTermFreqVector(docId, field)); if (tfv) { if (boost::dynamic_pointer_cast(tfv)) { ts = getTokenStream(boost::dynamic_pointer_cast(tfv)); } } // No token info stored so fall back to analyzing raw content if (!ts) { ts = getTokenStream(reader, docId, field, analyzer); } return ts; } TokenStreamPtr TokenSources::getTokenStream(const TermPositionVectorPtr& tpv) { // assumes the worst and makes no assumptions about token position sequences. return getTokenStream(tpv, false); } struct lessTokenOffset { inline bool operator()(const TokenPtr& first, const TokenPtr& second) const { if (first->startOffset() < second->startOffset()) { return true; } return (first->startOffset() > second->endOffset()); } }; TokenStreamPtr TokenSources::getTokenStream(const TermPositionVectorPtr& tpv, bool tokenPositionsGuaranteedContiguous) { // code to reconstruct the original sequence of Tokens Collection terms(tpv->getTerms()); Collection freq(tpv->getTermFrequencies()); int32_t totalTokens = 0; for (int32_t t = 0; t < freq.size(); ++t) { totalTokens += freq[t]; } Collection tokensInOriginalOrder(Collection::newInstance(totalTokens)); Collection unsortedTokens; for (int32_t t = 0; t < freq.size(); ++t) { Collection offsets(tpv->getOffsets(t)); if (!offsets) { return TokenStreamPtr(); } Collection pos; if (tokenPositionsGuaranteedContiguous) { // try get the token position info to speed up assembly of tokens into sorted sequence pos = tpv->getTermPositions(t); } if (!pos) { // tokens NOT stored with positions or not guaranteed contiguous - must add to list and sort later if (!unsortedTokens) { unsortedTokens = Collection::newInstance(); } for (int32_t tp = 0; tp < offsets.size(); ++tp) { TokenPtr token(newLucene(offsets[tp]->getStartOffset(), offsets[tp]->getEndOffset())); token->setTermBuffer(terms[t]); unsortedTokens.add(token); } } else { // We have positions stored and a guarantee that the token position information is contiguous // This may be fast BUT wont work if Tokenizers used which create >1 token in same position or // creates jumps in position numbers - this code would fail under those circumstances // Tokens stored with positions - can use this to index straight into sorted array for (int32_t tp = 0; tp < pos.size(); ++tp) { TokenPtr token(newLucene(terms[t], offsets[tp]->getStartOffset(), offsets[tp]->getEndOffset())); tokensInOriginalOrder[pos[tp]] = token; } } } // If the field has been stored without position data we must perform a sort if (unsortedTokens) { tokensInOriginalOrder = unsortedTokens; std::sort(tokensInOriginalOrder.begin(), tokensInOriginalOrder.end(), lessTokenOffset()); } return newLucene(tokensInOriginalOrder); } TokenStreamPtr TokenSources::getTokenStream(const IndexReaderPtr& reader, int32_t docId, const String& field) { TermFreqVectorPtr tfv(reader->getTermFreqVector(docId, field)); if (!tfv) { boost::throw_exception(IllegalArgumentException(field + L" in doc #" + StringUtils::toString(docId) + L"does not have any term position data stored")); } if (boost::dynamic_pointer_cast(tfv)) { TermPositionVectorPtr tpv(boost::dynamic_pointer_cast(reader->getTermFreqVector(docId, field))); return getTokenStream(tpv); } boost::throw_exception(IllegalArgumentException(field + L" in doc #" + StringUtils::toString(docId) + L"does not have any term position data stored")); return TokenStreamPtr(); } TokenStreamPtr TokenSources::getTokenStream(const IndexReaderPtr& reader, int32_t docId, const String& field, const AnalyzerPtr& analyzer) { DocumentPtr doc(reader->document(docId)); return getTokenStream(doc, field, analyzer); } TokenStreamPtr TokenSources::getTokenStream(const DocumentPtr& doc, const String& field, const AnalyzerPtr& analyzer) { String contents(doc->get(field)); if (contents.empty()) { boost::throw_exception(IllegalArgumentException(L"Field " + field + L" in document is not stored and cannot be analyzed")); } return getTokenStream(field, contents, analyzer); } TokenStreamPtr TokenSources::getTokenStream(const String& field, const String& contents, const AnalyzerPtr& analyzer) { return analyzer->tokenStream(field, newLucene(contents)); } StoredTokenStream::StoredTokenStream(Collection tokens) { this->tokens = tokens; this->termAtt = addAttribute(); this->offsetAtt = addAttribute(); } StoredTokenStream::~StoredTokenStream() { } bool StoredTokenStream::incrementToken() { if (currentToken >= tokens.size()) { return false; } clearAttributes(); TokenPtr token(tokens[currentToken++]); termAtt->setTermBuffer(token->term()); offsetAtt->setOffset(token->startOffset(), token->endOffset()); return true; } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/WeightedSpanTerm.cpp000066400000000000000000000034451456444476200260010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "WeightedSpanTerm.h" namespace Lucene { WeightedSpanTerm::WeightedSpanTerm(double weight, const String& term, bool positionSensitive) : WeightedTerm(weight, term) { this->positionSensitive = positionSensitive; this->positionSpans = Collection::newInstance(); } WeightedSpanTerm::~WeightedSpanTerm() { } bool WeightedSpanTerm::checkPosition(int32_t position) { // There would probably be a slight speed improvement if PositionSpans where kept in some sort of priority queue - // that way this method could bail early without checking each PositionSpan. for (Collection::iterator posSpan = positionSpans.begin(); posSpan != positionSpans.end(); ++posSpan) { if (position >= (*posSpan)->start && position <= (*posSpan)->end) { return true; } } return false; } void WeightedSpanTerm::addPositionSpans(Collection positionSpans) { this->positionSpans.addAll(positionSpans.begin(), positionSpans.end()); } bool WeightedSpanTerm::isPositionSensitive() { return positionSensitive; } void WeightedSpanTerm::setPositionSensitive(bool positionSensitive) { this->positionSensitive = positionSensitive; } Collection WeightedSpanTerm::getPositionSpans() { return positionSpans; } PositionSpan::PositionSpan(int32_t start, int32_t end) { this->start = start; this->end = end; } PositionSpan::~PositionSpan() { } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/WeightedSpanTermExtractor.cpp000066400000000000000000000425231456444476200276750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "WeightedSpanTermExtractor.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "BooleanQuery.h" #include "BooleanClause.h" #include "PhraseQuery.h" #include "Term.h" #include "SpanQuery.h" #include "SpanTermQuery.h" #include "SpanNearQuery.h" #include "TermQuery.h" #include "FilteredQuery.h" #include "DisjunctionMaxQuery.h" #include "MultiTermQuery.h" #include "MultiPhraseQuery.h" #include "WeightedSpanTerm.h" #include "CachingTokenFilter.h" #include "Spans.h" #include "FieldMaskingSpanQuery.h" #include "SpanFirstQuery.h" #include "SpanNotQuery.h" #include "SpanOrQuery.h" #include "MemoryIndex.h" #include "MiscUtils.h" namespace Lucene { WeightedSpanTermExtractor::WeightedSpanTermExtractor(const String& defaultField) { this->defaultField = defaultField; this->expandMultiTermQuery = false; this->cachedTokenStream = false; this->wrapToCaching = true; this->readers = MapStringIndexReader::newInstance(); } WeightedSpanTermExtractor::~WeightedSpanTermExtractor() { } void WeightedSpanTermExtractor::closeReaders() { for (MapStringIndexReader::iterator reader = readers.begin(); reader != readers.end(); ++reader) { try { reader->second->close(); } catch (...) { } } } void WeightedSpanTermExtractor::extract(const QueryPtr& query, const MapWeightedSpanTermPtr& terms) { QueryPtr _query(query); if (MiscUtils::typeOf(_query)) { Collection queryClauses(boost::dynamic_pointer_cast(_query)->getClauses()); for (int32_t i = 0; i < queryClauses.size(); ++i) { if (!queryClauses[i]->isProhibited()) { extract(queryClauses[i]->getQuery(), terms); } } } else if (MiscUtils::typeOf(_query)) { PhraseQueryPtr phraseQuery(boost::dynamic_pointer_cast(_query)); Collection phraseQueryTerms(phraseQuery->getTerms()); Collection clauses(Collection::newInstance(phraseQueryTerms.size())); for (int32_t i = 0; i < phraseQueryTerms.size(); ++i) { clauses[i] = newLucene(phraseQueryTerms[i]); } // sum position increments beyond 1 int32_t positionGaps = 0; Collection positions(phraseQuery->getPositions()); if (!positions.empty() && positions.size() > 1) { // positions are in increasing order. max(0,...) is just a safeguard. positionGaps = (std::max)(0, positions[positions.size() - 1] - positions[0] - positions.size() + 1 ); } //if original slop is 0 then require inOrder bool inorder = (phraseQuery->getSlop() == 0); SpanNearQueryPtr sp(newLucene(clauses, phraseQuery->getSlop() + positionGaps, inorder)); sp->setBoost(_query->getBoost()); extractWeightedSpanTerms(terms, sp); } else if (MiscUtils::typeOf(_query)) { extractWeightedTerms(terms, _query); } else if (MiscUtils::typeOf(_query)) { extractWeightedSpanTerms(terms, boost::dynamic_pointer_cast(_query)); } else if (MiscUtils::typeOf(_query)) { extract(boost::dynamic_pointer_cast(_query)->getQuery(), terms); } else if (MiscUtils::typeOf(_query)) { DisjunctionMaxQueryPtr dmq(boost::dynamic_pointer_cast(_query)); for (Collection::iterator q = dmq->begin(); q != dmq->end(); ++q) { extract(*q, terms); } } else if (MiscUtils::typeOf(_query) && expandMultiTermQuery) { MultiTermQueryPtr mtq(boost::dynamic_pointer_cast(_query)); if (mtq->getRewriteMethod() != MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()) { mtq = boost::dynamic_pointer_cast(mtq->clone()); mtq->setRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); _query = mtq; } FakeReaderPtr fReader(newLucene()); MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()->rewrite(fReader, mtq); if (!fReader->field.empty()) { IndexReaderPtr ir(getReaderForField(fReader->field)); extract(_query->rewrite(ir), terms); } } else if (MiscUtils::typeOf(_query)) { MultiPhraseQueryPtr mpq(boost::dynamic_pointer_cast(_query)); Collection< Collection > termArrays(mpq->getTermArrays()); Collection positions(mpq->getPositions()); if (!positions.empty()) { int32_t maxPosition = positions[positions.size() - 1]; for (int32_t i = 0; i < positions.size() - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } Collection< Collection > disjunctLists(Collection< Collection >::newInstance(maxPosition + 1)); int32_t distinctPositions = 0; for (int32_t i = 0; i < termArrays.size(); ++i) { Collection termArray(termArrays[i]); Collection disjuncts(disjunctLists[positions[i]]); if (!disjuncts) { disjunctLists[positions[i]] = Collection::newInstance(); disjuncts = disjunctLists[positions[i]]; ++distinctPositions; } for (int32_t j = 0; j < termArray.size(); ++j) { disjuncts.add(newLucene(termArray[j])); } } int32_t positionGaps = 0; int32_t position = 0; Collection clauses(Collection::newInstance(distinctPositions)); for (int32_t i = 0; i < disjunctLists.size(); ++i) { Collection disjuncts(disjunctLists[i]); if (disjuncts) { clauses[position++] = newLucene(disjuncts); } else { ++positionGaps; } } int32_t slop = mpq->getSlop(); bool inorder = (slop == 0); SpanNearQueryPtr sp(newLucene(clauses, slop + positionGaps, inorder)); sp->setBoost(_query->getBoost()); extractWeightedSpanTerms(terms, sp); } } } void WeightedSpanTermExtractor::extractWeightedSpanTerms(const MapWeightedSpanTermPtr& terms, const SpanQueryPtr& spanQuery) { HashSet fieldNames(HashSet::newInstance()); if (fieldName.empty()) { collectSpanQueryFields(spanQuery, fieldNames); } else { fieldNames.add(fieldName); } // To support the use of the default field name if (!defaultField.empty()) { fieldNames.add(defaultField); } MapStringSpanQuery queries(MapStringSpanQuery::newInstance()); SetTerm nonWeightedTerms(SetTerm::newInstance()); bool rewriteQuery = mustRewriteQuery(spanQuery); if (rewriteQuery) { for (HashSet::iterator field = fieldNames.begin(); field != fieldNames.end(); ++field) { SpanQueryPtr rewrittenQuery(boost::dynamic_pointer_cast(spanQuery->rewrite(getReaderForField(*field)))); queries.put(*field, rewrittenQuery); rewrittenQuery->extractTerms(nonWeightedTerms); } } else { spanQuery->extractTerms(nonWeightedTerms); } Collection spanPositions(Collection::newInstance()); for (HashSet::iterator field = fieldNames.begin(); field != fieldNames.end(); ++field) { IndexReaderPtr reader(getReaderForField(*field)); SpansPtr spans; if (rewriteQuery) { spans = queries.get(*field)->getSpans(reader); } else { spans = spanQuery->getSpans(reader); } // collect span positions while (spans->next()) { spanPositions.add(newLucene(spans->start(), spans->end() - 1)); } } if (spanPositions.empty()) { // no spans found return; } for (SetTerm::iterator queryTerm = nonWeightedTerms.begin(); queryTerm != nonWeightedTerms.end(); ++queryTerm) { if (fieldNameComparator((*queryTerm)->field())) { WeightedSpanTermPtr weightedSpanTerm(terms->get((*queryTerm)->text())); if (!weightedSpanTerm) { weightedSpanTerm = newLucene(spanQuery->getBoost(), (*queryTerm)->text()); weightedSpanTerm->addPositionSpans(spanPositions); weightedSpanTerm->positionSensitive = true; terms->put((*queryTerm)->text(), weightedSpanTerm); } else { if (!spanPositions.empty()) { weightedSpanTerm->addPositionSpans(spanPositions); } } } } } void WeightedSpanTermExtractor::extractWeightedTerms(const MapWeightedSpanTermPtr& terms, const QueryPtr& query) { SetTerm nonWeightedTerms(SetTerm::newInstance()); query->extractTerms(nonWeightedTerms); for (SetTerm::iterator queryTerm = nonWeightedTerms.begin(); queryTerm != nonWeightedTerms.end(); ++queryTerm) { if (fieldNameComparator((*queryTerm)->field())) { WeightedSpanTermPtr weightedSpanTerm(newLucene(query->getBoost(), (*queryTerm)->text())); terms->put((*queryTerm)->text(), weightedSpanTerm); } } } bool WeightedSpanTermExtractor::fieldNameComparator(const String& fieldNameToCheck) { return (fieldName.empty() || fieldNameToCheck == fieldName || fieldNameToCheck == defaultField); } IndexReaderPtr WeightedSpanTermExtractor::getReaderForField(const String& field) { if (wrapToCaching && !cachedTokenStream && !MiscUtils::typeOf(tokenStream)) { tokenStream = newLucene(tokenStream); cachedTokenStream = true; } IndexReaderPtr reader(readers.get(field)); if (!reader) { MemoryIndexPtr indexer(newLucene()); indexer->addField(field, tokenStream); tokenStream->reset(); IndexSearcherPtr searcher(indexer->createSearcher()); reader = searcher->getIndexReader(); readers.put(field, reader); } return reader; } MapWeightedSpanTermPtr WeightedSpanTermExtractor::getWeightedSpanTerms(const QueryPtr& query, const TokenStreamPtr& tokenStream) { return getWeightedSpanTerms(query, tokenStream, L""); } MapWeightedSpanTermPtr WeightedSpanTermExtractor::getWeightedSpanTerms(const QueryPtr& query, const TokenStreamPtr& tokenStream, const String& fieldName) { if (!fieldName.empty()) { this->fieldName = fieldName; } else { this->fieldName.clear(); } MapWeightedSpanTermPtr terms(newLucene()); this->tokenStream = tokenStream; LuceneException finally; try { extract(query, terms); } catch (LuceneException& e) { finally = e; } closeReaders(); finally.throwException(); return terms; } MapWeightedSpanTermPtr WeightedSpanTermExtractor::getWeightedSpanTermsWithScores(const QueryPtr& query, const TokenStreamPtr& tokenStream, const String& fieldName, const IndexReaderPtr& reader) { if (!fieldName.empty()) { this->fieldName = fieldName; } else { this->fieldName.clear(); } MapWeightedSpanTermPtr terms(newLucene()); extract(query, terms); int32_t totalNumDocs = reader->numDocs(); LuceneException finally; try { for (MapStringWeightedSpanTerm::iterator weightedSpanTerm = terms->begin(); weightedSpanTerm != terms->end(); ++weightedSpanTerm) { int32_t docFreq = reader->docFreq(newLucene(fieldName, weightedSpanTerm->second->term)); // docFreq counts deletes if (totalNumDocs < docFreq) { docFreq = totalNumDocs; } // IDF algorithm taken from DefaultSimilarity class double idf = (double)(std::log((double)totalNumDocs / (double)(docFreq + 1)) + 1.0); weightedSpanTerm->second->weight *= idf; } } catch (LuceneException& e) { finally = e; } closeReaders(); finally.throwException(); return terms; } void WeightedSpanTermExtractor::collectSpanQueryFields(const SpanQueryPtr& spanQuery, HashSet fieldNames) { if (MiscUtils::typeOf(spanQuery)) { collectSpanQueryFields(boost::dynamic_pointer_cast(spanQuery)->getMaskedQuery(), fieldNames); } else if (MiscUtils::typeOf(spanQuery)) { collectSpanQueryFields(boost::dynamic_pointer_cast(spanQuery)->getMatch(), fieldNames); } else if (MiscUtils::typeOf(spanQuery)) { Collection clauses(boost::dynamic_pointer_cast(spanQuery)->getClauses()); for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { collectSpanQueryFields(*clause, fieldNames); } } else if (MiscUtils::typeOf(spanQuery)) { collectSpanQueryFields(boost::dynamic_pointer_cast(spanQuery)->getInclude(), fieldNames); } else if (MiscUtils::typeOf(spanQuery)) { Collection clauses(boost::dynamic_pointer_cast(spanQuery)->getClauses()); for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { collectSpanQueryFields(*clause, fieldNames); } } else { fieldNames.add(spanQuery->getField()); } } bool WeightedSpanTermExtractor::mustRewriteQuery(const SpanQueryPtr& spanQuery) { if (!expandMultiTermQuery) { return false; // Will throw UnsupportedOperationException in case of a SpanRegexQuery. } else if (MiscUtils::typeOf(spanQuery)) { return mustRewriteQuery(boost::dynamic_pointer_cast(spanQuery)->getMaskedQuery()); } else if (MiscUtils::typeOf(spanQuery)) { return mustRewriteQuery(boost::dynamic_pointer_cast(spanQuery)->getMatch()); } else if (MiscUtils::typeOf(spanQuery)) { Collection clauses(boost::dynamic_pointer_cast(spanQuery)->getClauses()); for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { if (mustRewriteQuery(*clause)) { return true; } } return false; } else if (MiscUtils::typeOf(spanQuery)) { SpanNotQueryPtr spanNotQuery(boost::dynamic_pointer_cast(spanQuery)); return mustRewriteQuery(spanNotQuery->getInclude()) || mustRewriteQuery(spanNotQuery->getExclude()); } else if (MiscUtils::typeOf(spanQuery)) { Collection clauses(boost::dynamic_pointer_cast(spanQuery)->getClauses()); for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { if (mustRewriteQuery(*clause)) { return true; } } return false; } else if (MiscUtils::typeOf(spanQuery)) { return false; } else { return true; } } bool WeightedSpanTermExtractor::getExpandMultiTermQuery() { return expandMultiTermQuery; } void WeightedSpanTermExtractor::setExpandMultiTermQuery(bool expandMultiTermQuery) { this->expandMultiTermQuery = expandMultiTermQuery; } bool WeightedSpanTermExtractor::isCachedTokenStream() { return cachedTokenStream; } TokenStreamPtr WeightedSpanTermExtractor::getTokenStream() { return tokenStream; } void WeightedSpanTermExtractor::setWrapIfNotCachingTokenFilter(bool wrap) { this->wrapToCaching = wrap; } PositionCheckingMap::~PositionCheckingMap() { } void PositionCheckingMap::put(const String& key, const WeightedSpanTermPtr& val) { MapStringWeightedSpanTerm::iterator prev = map.find(key); if (prev == map.end()) { map.put(key, val); return; } bool positionSensitive = prev->second->positionSensitive; prev->second = val; if (!positionSensitive) { prev->second->positionSensitive = false; } } FakeReader::FakeReader() : FilterIndexReader(EMPTY_MEMORY_INDEX_READER()) { } FakeReader::~FakeReader() { } IndexReaderPtr FakeReader::EMPTY_MEMORY_INDEX_READER() { static IndexReaderPtr _EMPTY_MEMORY_INDEX_READER; LUCENE_RUN_ONCE( _EMPTY_MEMORY_INDEX_READER = newLucene()->createSearcher()->getIndexReader(); CycleCheck::addStatic(_EMPTY_MEMORY_INDEX_READER); ); return _EMPTY_MEMORY_INDEX_READER; } TermEnumPtr FakeReader::terms(const TermPtr& t) { // only set first fieldname if (t && field.empty()) { field = t->field(); } return FilterIndexReader::terms(t); } } LucenePlusPlus-rel_3.0.9/src/contrib/highlighter/WeightedTerm.cpp000066400000000000000000000014711456444476200251540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "WeightedTerm.h" namespace Lucene { WeightedTerm::WeightedTerm(double weight, const String& term) { this->weight = weight; this->term = term; } WeightedTerm::~WeightedTerm() { } String WeightedTerm::getTerm() { return term; } double WeightedTerm::getWeight() { return weight; } void WeightedTerm::setTerm(const String& term) { this->term = term; } void WeightedTerm::setWeight(double weight) { this->weight = weight; } } LucenePlusPlus-rel_3.0.9/src/contrib/include/000077500000000000000000000000001456444476200212025ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/include/ArabicAnalyzer.h000066400000000000000000000057001456444476200242440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ARABICANALYZER_H #define ARABICANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for Arabic. /// /// This analyzer implements light-stemming as specified by: /// Light Stemming for Arabic Information Retrieval /// /// http://www.mtholyoke.edu/~lballest/Pubs/arab_stem05.pdf /// /// The analysis package contains three primary components: ///
    ///
  • {@link ArabicNormalizationFilter}: Arabic orthographic normalization. ///
  • {@link ArabicStemFilter}: Arabic light stemming. ///
  • Arabic stop words file: a set of default Arabic stop words. ///
class LPPCONTRIBAPI ArabicAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. ArabicAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. ArabicAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); virtual ~ArabicAnalyzer(); LUCENE_CLASS(ArabicAnalyzer); public: /// Default Arabic stopwords in UTF-8 format. /// /// Generated from http://members.unine.ch/jacques.savoy/clef/index.html /// The stopword list is BSD-Licensed. static const uint8_t DEFAULT_STOPWORD_FILE[]; protected: /// Contains the stopwords used with the StopFilter. HashSet stoptable; LuceneVersion::Version matchVersion; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with /// {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter} and /// {@link ArabicStemFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with /// {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter} and /// {@link ArabicStemFilter}. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; class LPPCONTRIBAPI ArabicAnalyzerSavedStreams : public LuceneObject { public: virtual ~ArabicAnalyzerSavedStreams(); LUCENE_CLASS(ArabicAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/ArabicLetterTokenizer.h000066400000000000000000000026511456444476200256130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ARABICLETTERTOKENIZER_H #define ARABICLETTERTOKENIZER_H #include "LuceneContrib.h" #include "LetterTokenizer.h" namespace Lucene { /// Tokenizer that breaks text into runs of letters and diacritics. /// /// The problem with the standard Letter tokenizer is that it fails on diacritics. /// Handling similar to this is necessary for Indic Scripts, Hebrew, Thaana, etc. /// class LPPCONTRIBAPI ArabicLetterTokenizer : public LetterTokenizer { public: /// Construct a new ArabicLetterTokenizer. ArabicLetterTokenizer(const ReaderPtr& input); /// Construct a new ArabicLetterTokenizer using a given {@link AttributeSource}. ArabicLetterTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); /// Construct a new ArabicLetterTokenizer using a given {@link AttributeFactory}. ArabicLetterTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); virtual ~ArabicLetterTokenizer(); LUCENE_CLASS(ArabicLetterTokenizer); public: /// Allows for Letter category or NonspacingMark category virtual bool isTokenChar(wchar_t c); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/ArabicNormalizationFilter.h000066400000000000000000000016541456444476200264570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ARABICNORMALIZATIONFILTER_H #define ARABICNORMALIZATIONFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} that applies {@link ArabicNormalizer} to normalize the orthography. class LPPCONTRIBAPI ArabicNormalizationFilter : public TokenFilter { public: ArabicNormalizationFilter(const TokenStreamPtr& input); virtual ~ArabicNormalizationFilter(); LUCENE_CLASS(ArabicNormalizationFilter); protected: ArabicNormalizerPtr normalizer; TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/ArabicNormalizer.h000066400000000000000000000041771456444476200246100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ARABICNORMALIZER_H #define ARABICNORMALIZER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Normalizer for Arabic. /// /// Normalization is done in-place for efficiency, operating on a termbuffer. /// /// Normalization is defined as: ///
    ///
  • Normalization of hamza with alef seat to a bare alef. ///
  • Normalization of teh marbuta to heh ///
  • Normalization of dotless yeh (alef maksura) to yeh. ///
  • Removal of Arabic diacritics (the harakat) ///
  • Removal of tatweel (stretching character). ///
class LPPCONTRIBAPI ArabicNormalizer : public LuceneObject { public: virtual ~ArabicNormalizer(); LUCENE_CLASS(ArabicNormalizer); public: static const wchar_t ALEF; static const wchar_t ALEF_MADDA; static const wchar_t ALEF_HAMZA_ABOVE; static const wchar_t ALEF_HAMZA_BELOW; static const wchar_t YEH; static const wchar_t DOTLESS_YEH; static const wchar_t TEH_MARBUTA; static const wchar_t HEH; static const wchar_t TATWEEL; static const wchar_t FATHATAN; static const wchar_t DAMMATAN; static const wchar_t KASRATAN; static const wchar_t FATHA; static const wchar_t DAMMA; static const wchar_t KASRA; static const wchar_t SHADDA; static const wchar_t SUKUN; public: /// Normalize an input buffer of Arabic text /// @param s input buffer /// @param len length of input buffer /// @return length of input buffer after normalization int32_t normalize(wchar_t* s, int32_t len); /// Delete a character in-place /// @param s Input Buffer /// @param pos Position of character to delete /// @param len length of input buffer /// @return length of input buffer after deletion int32_t deleteChar(wchar_t* s, int32_t pos, int32_t len); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/ArabicStemFilter.h000066400000000000000000000015451456444476200245400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ARABICSTEMFILTER_H #define ARABICSTEMFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} that applies {@link ArabicStemmer} to stem Arabic words. class LPPCONTRIBAPI ArabicStemFilter : public TokenFilter { public: ArabicStemFilter(const TokenStreamPtr& input); virtual ~ArabicStemFilter(); LUCENE_CLASS(ArabicStemFilter); protected: ArabicStemmerPtr stemmer; TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/ArabicStemmer.h000066400000000000000000000062401456444476200240730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ARABICSTEMMER_H #define ARABICSTEMMER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Stemmer for Arabic. /// /// Stemming is done in-place for efficiency, operating on a termbuffer. /// /// Stemming is defined as: ///
    ///
  • Removal of attached definite article, conjunction, and prepositions. ///
  • Stemming of common suffixes. ///
class LPPCONTRIBAPI ArabicStemmer : public LuceneObject { public: virtual ~ArabicStemmer(); LUCENE_CLASS(ArabicStemmer); public: static const wchar_t ALEF; static const wchar_t BEH; static const wchar_t TEH_MARBUTA; static const wchar_t TEH; static const wchar_t FEH; static const wchar_t KAF; static const wchar_t LAM; static const wchar_t NOON; static const wchar_t HEH; static const wchar_t WAW; static const wchar_t YEH; public: static const Collection prefixes(); static const Collection suffixes(); /// Stem an input buffer of Arabic text. /// @param s input buffer /// @param len length of input buffer /// @return length of input buffer after normalization int32_t stem(wchar_t* s, int32_t len); /// Stem a prefix off an Arabic word. /// @param s input buffer /// @param len length of input buffer /// @return new length of input buffer after stemming. int32_t stemPrefix(wchar_t* s, int32_t len); /// Stem suffix(es) off an Arabic word. /// @param s input buffer /// @param len length of input buffer /// @return new length of input buffer after stemming int32_t stemSuffix(wchar_t* s, int32_t len); /// Returns true if the prefix matches and can be stemmed /// @param s input buffer /// @param len length of input buffer /// @param prefix prefix to check /// @return true if the prefix matches and can be stemmed bool startsWith(wchar_t* s, int32_t len, const String& prefix); /// Returns true if the suffix matches and can be stemmed /// @param s input buffer /// @param len length of input buffer /// @param suffix suffix to check /// @return true if the suffix matches and can be stemmed bool endsWith(wchar_t* s, int32_t len, const String& suffix); protected: /// Delete n characters in-place /// @param s Input Buffer /// @param pos Position of character to delete /// @param len Length of input buffer /// @param chars number of characters to delete /// @return length of input buffer after deletion int32_t deleteChars(wchar_t* s, int32_t pos, int32_t len, int32_t chars); /// Delete a character in-place /// @param s Input Buffer /// @param pos Position of character to delete /// @param len length of input buffer /// @return length of input buffer after deletion int32_t deleteChar(wchar_t* s, int32_t pos, int32_t len); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/BrazilianAnalyzer.h000066400000000000000000000060401456444476200247740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BRAZILIANANALYZER_H #define BRAZILIANANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for Brazilian Portuguese language. /// /// Supports an external list of stopwords (words that will not be indexed at all) and an external list of /// exclusions (words that will not be stemmed, but indexed). /// /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. class LPPCONTRIBAPI BrazilianAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. BrazilianAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. BrazilianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); /// Builds an analyzer with the given stop words and stemming exclusion words. BrazilianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions); virtual ~BrazilianAnalyzer(); LUCENE_CLASS(BrazilianAnalyzer); protected: /// Contains the stopwords used with the {@link StopFilter}. HashSet stoptable; /// Contains words that should be indexed but not stemmed. HashSet excltable; LuceneVersion::Version matchVersion; /// List of typical Brazilian Portuguese stopwords. static const wchar_t* _BRAZILIAN_STOP_WORDS[]; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); void setStemExclusionTable(HashSet exclusions); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with /// {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and {@link BrazilianStemFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from an {@link BrazilianLetterTokenizer} filtered with /// {@link LowerCaseFilter}, {@link StopFilter}, {@link BrazilianNormalizationFilter} and /// {@link BrazilianStemFilter}. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; class LPPCONTRIBAPI BrazilianAnalyzerSavedStreams : public LuceneObject { public: virtual ~BrazilianAnalyzerSavedStreams(); LUCENE_CLASS(BrazilianAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/BrazilianStemFilter.h000066400000000000000000000020301456444476200252600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BRAZILIANSTEMFILTER_H #define BRAZILIANSTEMFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} that applies {@link BrazilianStemmer}. class LPPCONTRIBAPI BrazilianStemFilter : public TokenFilter { public: BrazilianStemFilter(const TokenStreamPtr& input); BrazilianStemFilter(const TokenStreamPtr& input, HashSet exclusiontable); virtual ~BrazilianStemFilter(); LUCENE_CLASS(BrazilianStemFilter); protected: /// {@link BrazilianStemmer} in use by this filter. BrazilianStemmerPtr stemmer; HashSet exclusions; TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/BrazilianStemmer.h000066400000000000000000000074171456444476200246340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef BRAZILIANSTEMMER_H #define BRAZILIANSTEMMER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// A stemmer for Brazilian Portuguese words. class LPPCONTRIBAPI BrazilianStemmer : public LuceneObject { public: virtual ~BrazilianStemmer(); LUCENE_CLASS(BrazilianStemmer); protected: String TERM; String CT; String R1; String R2; String RV; public: /// Stems the given term to a unique discriminator. /// /// @param term The term that should be stemmed. /// @return Discriminator for term. String stem(const String& term); protected: /// Checks a term if it can be processed correctly. /// @return true if, and only if, the given term consists in letters. bool isStemmable(const String& term); /// Checks a term if it can be processed indexed. /// @return true if it can be indexed bool isIndexable(const String& term); /// See if string is 'a','e','i','o','u' /// @return true if is vowel bool isVowel(wchar_t value); /// Gets R1. /// R1 - is the region after the first non-vowel following a vowel, or is the null region at the end of the /// word if there is no such non-vowel. /// @return null or a string representing R1 String getR1(const String& value); /// Gets RV. /// RV - if the second letter is a consonant, RV is the region after the next following vowel, /// /// OR if the first two letters are vowels, RV is the region after the next consonant, /// /// AND otherwise (consonant-vowel case) RV is the region after the third letter. /// /// BUT RV is the end of the word if this positions cannot be found. /// @return null or a string representing RV String getRV(const String& value); /// 1) Turn to lowercase /// 2) Remove accents /// 3) ã -> a ; õ -> o /// 4) ç -> c /// @return null or a string transformed String changeTerm(const String& value); /// Check if a string ends with a suffix. /// @return true if the string ends with the specified suffix. bool checkSuffix(const String& value, const String& suffix); /// Replace a string suffix by another /// @return the replaced String String replaceSuffix(const String& value, const String& toReplace, const String& changeTo); /// Remove a string suffix. /// @return the String without the suffix; String removeSuffix(const String& value, const String& toRemove); /// See if a suffix is preceded by a String. /// @return true if the suffix is preceded. bool suffixPreceded(const String& value, const String& suffix, const String& preceded); /// Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'. void createCT(const String& term); /// Standard suffix removal. /// @return false if no ending was removed bool step1(); /// Verb suffixes. /// Search for the longest among the following suffixes in RV, and if found, delete. /// @return false if no ending was removed bool step2(); /// Delete suffix 'i' if in RV and preceded by 'c' void step3(); /// Residual suffix /// If the word ends with one of the suffixes (os a i o á í ó) in RV, delete it. void step4(); /// If the word ends with one of (e é ê) in RV,delete it, and if preceded by 'gu' (or 'ci') with /// the 'u' (or 'i') in RV, delete the 'u' (or 'i') /// /// Or if the word ends ç remove the cedilha. void step5(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/CJKAnalyzer.h000066400000000000000000000042331456444476200234720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CJKANALYZER_H #define CJKANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// An {@link Analyzer} that tokenizes text with {@link CJKTokenizer} and filters with {@link StopFilter} class LPPCONTRIBAPI CJKAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. CJKAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. CJKAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); virtual ~CJKAnalyzer(); LUCENE_CLASS(CJKAnalyzer); protected: /// Contains the stopwords used with the {@link StopFilter}. HashSet stoptable; LuceneVersion::Version matchVersion; /// List of typical English stopwords. static const wchar_t* _STOP_WORDS[]; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with {@link StopFilter} virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with {@link StopFilter} virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; class LPPCONTRIBAPI CJKAnalyzerSavedStreams : public LuceneObject { public: virtual ~CJKAnalyzerSavedStreams(); LUCENE_CLASS(CJKAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/CJKTokenizer.h000066400000000000000000000061101456444476200236530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CJKTOKENIZER_H #define CJKTOKENIZER_H #include "Tokenizer.h" namespace Lucene { /// CJKTokenizer is designed for Chinese, Japanese, and Korean languages. /// /// The tokens returned are every two adjacent characters with overlap match. /// /// Example: "lucene C1C2C3C4" will be segmented to: "lucene" "C1C2" "C2C3" "C3C4". /// /// Additionally, the following is applied to Latin text (such as English): ///
    ///
  • Text is converted to lowercase. ///
  • Numeric digits, '+', '#', and '_' are tokenized as letters. ///
  • Full-width forms are converted to half-width forms. ///
/// For more info on Asian language (Chinese, Japanese, and Korean) text segmentation: /// please search google class LPPCONTRIBAPI CJKTokenizer : public Tokenizer { public: CJKTokenizer(const ReaderPtr& input); CJKTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); CJKTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); virtual ~CJKTokenizer(); LUCENE_CLASS(CJKTokenizer); public: /// Word token type static const int32_t WORD_TYPE; /// Single byte token type static const int32_t SINGLE_TOKEN_TYPE; /// Double byte token type static const int32_t DOUBLE_TOKEN_TYPE; /// Names for token types static const wchar_t* TOKEN_TYPE_NAMES[]; protected: /// Max word length static const int32_t MAX_WORD_LEN; static const int32_t IO_BUFFER_SIZE; enum UnicodeBlock { NONE, BASIC_LATIN, HALFWIDTH_AND_FULLWIDTH_FORMS }; protected: /// word offset, used to imply which character(in) is parsed int32_t offset; /// the index used only for ioBuffer int32_t bufferIndex; /// data length int32_t dataLen; /// character buffer, store the characters which are used to compose the returned Token CharArray buffer; /// I/O buffer, used to store the content of the input (one of the members of Tokenizer) CharArray ioBuffer; /// word type: single=>ASCII double=>non-ASCII word=>default int32_t tokenType; /// tag: previous character is a cached double-byte character "C1C2C3C4" /// ----(set the C1 isTokened) C1C2 "C2C3C4" ----(set the C2 isTokened) /// C1C2 C2C3 "C3C4" ----(set the C3 isTokened) "C1C2 C2C3 C3C4" bool preIsTokened; TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; TypeAttributePtr typeAtt; protected: /// return unicode block for given character (see http://unicode.org/Public/UNIDATA/Blocks.txt) UnicodeBlock unicodeBlock(wchar_t c); public: virtual void initialize(); virtual bool incrementToken(); virtual void end(); virtual void reset(); virtual void reset(const ReaderPtr& input); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/CMakeLists.txt000066400000000000000000000003511456444476200237410ustar00rootroot00000000000000#################################### # install headers #################################### file(GLOB_RECURSE lucene_headers "${CMAKE_CURRENT_SOURCE_DIR}/*.h" ) install( FILES ${lucene_headers} DESTINATION include/lucene++ ) LucenePlusPlus-rel_3.0.9/src/contrib/include/ChineseAnalyzer.h000066400000000000000000000031231456444476200244360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHINESEANALYZER_H #define CHINESEANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// An {@link Analyzer} that tokenizes text with {@link ChineseTokenizer} and filters with {@link ChineseFilter} class LPPCONTRIBAPI ChineseAnalyzer : public Analyzer { public: virtual ~ChineseAnalyzer(); LUCENE_CLASS(ChineseAnalyzer); public: /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from {@link ChineseTokenizer}, filtered with {@link ChineseFilter} virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from {@link ChineseTokenizer}, filtered with {@link ChineseFilter} virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; class LPPCONTRIBAPI ChineseAnalyzerSavedStreams : public LuceneObject { public: virtual ~ChineseAnalyzerSavedStreams(); LUCENE_CLASS(ChineseAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/ChineseFilter.h000066400000000000000000000020761456444476200241040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHINESEFILTER_H #define CHINESEFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} with a stop word table. ///
    ///
  • Numeric tokens are removed. ///
  • English tokens must be larger than 1 character. ///
  • One Chinese character as one Chinese word. ///
class LPPCONTRIBAPI ChineseFilter : public TokenFilter { public: ChineseFilter(const TokenStreamPtr& input); virtual ~ChineseFilter(); LUCENE_CLASS(ChineseFilter); public: /// Only English now, Chinese to be added later. static const wchar_t* STOP_WORDS[]; protected: HashSet stopTable; TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/ChineseTokenizer.h000066400000000000000000000044311456444476200246260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CHINESETOKENIZER_H #define CHINESETOKENIZER_H #include "Tokenizer.h" namespace Lucene { /// Tokenize Chinese text as individual Chinese characters. /// /// The difference between ChineseTokenizer and ChineseTokenizer is that they have different /// token parsing logic. /// /// For example, if the Chinese text "C1C2C3C4" is to be indexed: ///
    ///
  • The tokens returned from ChineseTokenizer are C1, C2, C3, C4. ///
  • The tokens returned from the ChineseTokenizer are C1C2, C2C3, C3C4. ///
/// /// Therefore the index created by ChineseTokenizer is much larger. /// /// The problem is that when searching for C1, C1C2, C1C3, C4C2, C1C2C3 ... the /// ChineseTokenizer works, but the ChineseTokenizer will not work. class LPPCONTRIBAPI ChineseTokenizer : public Tokenizer { public: ChineseTokenizer(const ReaderPtr& input); ChineseTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); ChineseTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); virtual ~ChineseTokenizer(); LUCENE_CLASS(ChineseTokenizer); protected: /// Max word length static const int32_t MAX_WORD_LEN; static const int32_t IO_BUFFER_SIZE; protected: /// word offset, used to imply which character(in) is parsed int32_t offset; /// the index used only for ioBuffer int32_t bufferIndex; /// data length int32_t dataLen; /// character buffer, store the characters which are used to compose the returned Token CharArray buffer; /// I/O buffer, used to store the content of the input (one of the members of Tokenizer) CharArray ioBuffer; TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; int32_t length; int32_t start; public: virtual void initialize(); virtual bool incrementToken(); virtual void end(); virtual void reset(); virtual void reset(const ReaderPtr& input); protected: void push(wchar_t c); bool flush(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/ContribInc.h000066400000000000000000000010351456444476200234040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifdef _WIN32 #include "targetver.h" #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #ifndef NOMINMAX #define NOMINMAX #endif #include #endif #include "LuceneContrib.h" LucenePlusPlus-rel_3.0.9/src/contrib/include/CzechAnalyzer.h000066400000000000000000000050321456444476200241150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef CZECHANALYZER_H #define CZECHANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for Czech language. /// /// Supports an external list of stopwords (words that will not be indexed at all). /// A default set of stopwords is used unless an alternative list is specified. /// /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. class LPPCONTRIBAPI CzechAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. CzechAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. CzechAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); virtual ~CzechAnalyzer(); LUCENE_CLASS(CzechAnalyzer); protected: /// Contains the stopwords used with the {@link StopFilter}. HashSet stoptable; LuceneVersion::Version matchVersion; /// Default Czech stopwords in UTF-8 format. static const uint8_t _CZECH_STOP_WORDS[]; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from {@link StandardTokenizer}, filtered with {@link StandardFilter}, /// {@link LowerCaseFilter}, and {@link StopFilter} virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from {@link StandardTokenizer}, filtered with {@link StandardFilter}, /// {@link LowerCaseFilter}, and {@link StopFilter} virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; class LPPCONTRIBAPI CzechAnalyzerSavedStreams : public LuceneObject { public: virtual ~CzechAnalyzerSavedStreams(); LUCENE_CLASS(CzechAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/DefaultEncoder.h000066400000000000000000000013411456444476200242360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DEFAULTENCODER_H #define DEFAULTENCODER_H #include "Encoder.h" namespace Lucene { /// Simple {@link Encoder} implementation that does not modify the output. class LPPCONTRIBAPI DefaultEncoder : public Encoder, public LuceneObject { public: virtual ~DefaultEncoder(); LUCENE_CLASS(DefaultEncoder); public: virtual String encodeText(const String& originalText); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/DutchAnalyzer.h000066400000000000000000000060651456444476200241370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DUTCHANALYZER_H #define DUTCHANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for Dutch language. /// /// Supports an external list of stopwords (words that will not be indexed at all) and an external list of /// exclusions (words that will not be stemmed, but indexed). A default set of stopwords is used unless an /// alternative list is specified, but the exclusion list is empty by default. /// /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. class LPPCONTRIBAPI DutchAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. DutchAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. DutchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); /// Builds an analyzer with the given stop words and stemming exclusion words. DutchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions); virtual ~DutchAnalyzer(); LUCENE_CLASS(DutchAnalyzer); protected: /// Contains the stopwords used with the {@link StopFilter}. HashSet stoptable; /// Contains words that should be indexed but not stemmed. HashSet excltable; MapStringString stemdict; LuceneVersion::Version matchVersion; /// List of typical Dutch stopwords. static const wchar_t* _DUTCH_STOP_WORDS[]; public: virtual void initialize(); /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); void setStemExclusionTable(HashSet exclusions); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with /// {@link StandardFilter}, {@link StopFilter} and {@link DutchStemFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with /// {@link StandardFilter}, {@link StopFilter} and {@link DutchStemFilter}. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; class LPPCONTRIBAPI DutchAnalyzerSavedStreams : public LuceneObject { public: virtual ~DutchAnalyzerSavedStreams(); LUCENE_CLASS(DutchAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/DutchStemFilter.h000066400000000000000000000042651456444476200244300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DUTCHSTEMFILTER_H #define DUTCHSTEMFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} that stems Dutch words. /// /// It supports a table of words that should not be stemmed at all. The stemmer used can /// be changed at runtime after the filter object is created (as long as it is a /// {@link DutchStemmer}). /// /// NOTE: This stemmer does not implement the Snowball algorithm correctly, specifically /// doubled consonants. It is recommended that you consider using the "Dutch" stemmer in /// the snowball package instead. This stemmer will likely be deprecated in a future release. class LPPCONTRIBAPI DutchStemFilter : public TokenFilter { public: DutchStemFilter(const TokenStreamPtr& input); /// Builds a DutchStemFilter that uses an exclusion table. DutchStemFilter(const TokenStreamPtr& input, HashSet exclusiontable); /// Builds a DutchStemFilter that uses an exclusion table and dictionary of word stem /// pairs, that overrule the algorithm. DutchStemFilter(const TokenStreamPtr& input, HashSet exclusiontable, MapStringString stemdictionary); virtual ~DutchStemFilter(); LUCENE_CLASS(DutchStemFilter); protected: /// {@link DutchStemmer} in use by this filter. DutchStemmerPtr stemmer; HashSet exclusions; TermAttributePtr termAtt; public: virtual bool incrementToken(); /// Set a alternative/custom {@link DutchStemmer} for this filter. void setStemmer(const DutchStemmerPtr& stemmer); /// Set an alternative exclusion list for this filter. void setExclusionSet(HashSet exclusiontable); /// Set dictionary for stemming, this dictionary overrules the algorithm, so you can /// correct for a particular unwanted word-stem pair. void setStemDictionary(MapStringString dict); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/DutchStemmer.h000066400000000000000000000053271456444476200237660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef DUTCHSTEMMER_H #define DUTCHSTEMMER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// A stemmer for Dutch words. /// /// The algorithm is an implementation of the /// dutch stemming /// algorithm in Martin Porter's snowball project. class LPPCONTRIBAPI DutchStemmer : public LuceneObject { public: DutchStemmer(); virtual ~DutchStemmer(); LUCENE_CLASS(DutchStemmer); protected: /// Buffer for the terms while stemming them. String buffer; bool removedE; MapStringString stemDict; int32_t R1; int32_t R2; public: /// Stems the given term to a unique discriminator. /// /// @param term The term that should be stemmed. /// @return Discriminator for term. String stem(const String& term); void setStemDictionary(MapStringString dict); protected: bool enEnding(); void step1(); /// Delete suffix e if in R1 and preceded by a non-vowel, and then undouble the ending. void step2(); /// Delete "heid" void step3a(); /// A d-suffix, or derivational suffix, enables a new word, often with a different grammatical /// category, or with a different sense, to be built from another word. Whether a d-suffix can /// be attached is discovered not from the rules of grammar, but by referring to a dictionary. /// So in English, ness can be added to certain adjectives to form corresponding nouns /// (littleness, kindness, foolishness ...) but not to all adjectives (not for example, to big, /// cruel, wise ...) d-suffixes can be used to change meaning, often in rather exotic ways. /// Remove "ing", "end", "ig", "lijk", "baar" and "bar" void step3b(); /// Undouble vowel. If the words ends CVD, where C is a non-vowel, D is a non-vowel other than /// I, and V is double a, e, o or u, remove one of the vowels from V (for example, maan -> man, /// brood -> brod). void step4(); /// Checks if a term could be stemmed. bool isStemmable(); /// Substitute ä, ë, ï, ö, ü, á , é, í, ó, ú void substitute(); bool isValidSEnding(int32_t index); bool isValidEnEnding(int32_t index); void unDouble(); void unDouble(int32_t endIndex); int32_t getRIndex(int32_t start); void storeYandI(); void reStoreYandI(); bool isVowel(wchar_t c); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/ElisionFilter.h000066400000000000000000000025131456444476200241240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ELISIONFILTER_H #define ELISIONFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// Removes elisions from a {@link TokenStream}. For example, "l'avion" (the plane) will be /// tokenized as "avion" (plane). /// /// Note that {@link StandardTokenizer} sees " ' " as a space, and cuts it out. /// @see Elision in Wikipedia class LPPCONTRIBAPI ElisionFilter : public TokenFilter { public: /// Constructs an elision filter with standard stop words. ElisionFilter(const TokenStreamPtr& input); /// Constructs an elision filter with a Set of stop words ElisionFilter(const TokenStreamPtr& input, HashSet articles); virtual ~ElisionFilter(); LUCENE_CLASS(ElisionFilter); protected: static const wchar_t apostrophes[]; CharArraySetPtr articles; TermAttributePtr termAtt; public: void setArticles(HashSet articles); virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/Encoder.h000066400000000000000000000013151456444476200227320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef ENCODER_H #define ENCODER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Encodes original text. The Encoder works with the {@link Formatter} to generate output. class LPPCONTRIBAPI Encoder { public: virtual ~Encoder(); LUCENE_INTERFACE(Encoder); public: virtual String encodeText(const String& originalText); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/Formatter.h000066400000000000000000000017471456444476200233270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FORMATTER_H #define FORMATTER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Processes terms found in the original text, typically by applying some form of mark-up to highlight /// terms in HTML search results pages. class LPPCONTRIBAPI Formatter { public: virtual ~Formatter(); LUCENE_INTERFACE(Formatter); public: /// @param originalText The section of text being considered for markup /// @param tokenGroup contains one or several overlapping Tokens along with their scores and positions. virtual String highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/Fragmenter.h000066400000000000000000000026461456444476200234550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FRAGMENTER_H #define FRAGMENTER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Implements the policy for breaking text into multiple fragments for consideration by the /// {@link Highlighter} class. A sophisticated implementation may do this on the basis of /// detecting end of sentences in the text. class LPPCONTRIBAPI Fragmenter { public: virtual ~Fragmenter(); LUCENE_INTERFACE(Fragmenter); public: /// Initializes the Fragmenter. You can grab references to the Attributes you are /// interested in from tokenStream and then access the values in {@link #isNewFragment()}. /// @param originalText the original source text. /// @param tokenStream the {@link TokenStream} to be fragmented. virtual void start(const String& originalText, const TokenStreamPtr& tokenStream); /// Test to see if this token from the stream should be held in a new TextFragment. /// Every time this is called, the TokenStream passed to start(String, TokenStream) /// will have been incremented. virtual bool isNewFragment(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/FrenchAnalyzer.h000066400000000000000000000060721456444476200242730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FRENCHANALYZER_H #define FRENCHANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for French language. /// /// Supports an external list of stopwords (words that will not be indexed at all) and an external list of /// exclusions (words that will not be stemmed, but indexed). A default set of stopwords is used unless an /// alternative list is specified, but the exclusion list is empty by default. /// /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. class LPPCONTRIBAPI FrenchAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. FrenchAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. FrenchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); /// Builds an analyzer with the given stop words and stemming exclusion words. FrenchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions); virtual ~FrenchAnalyzer(); LUCENE_CLASS(FrenchAnalyzer); protected: /// Contains the stopwords used with the {@link StopFilter}. HashSet stoptable; /// Contains words that should be indexed but not stemmed. HashSet excltable; LuceneVersion::Version matchVersion; /// List of typical French stopwords. static const wchar_t* _FRENCH_STOP_WORDS[]; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); void setStemExclusionTable(HashSet exclusions); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with /// {@link StandardFilter}, {@link StopFilter}, {@link FrenchStemFilter}, and {@link LowerCaseFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from an {@link StandardTokenizer} filtered with /// {@link StandardFilter}, {@link StopFilter}, {@link FrenchStemFilter} and {@link LowerCaseFilter}. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; class LPPCONTRIBAPI FrenchAnalyzerSavedStreams : public LuceneObject { public: virtual ~FrenchAnalyzerSavedStreams(); LUCENE_CLASS(FrenchAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/FrenchStemFilter.h000066400000000000000000000034101456444476200245550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FRENCHSTEMFILTER_H #define FRENCHSTEMFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} that stems French words. /// /// It supports a table of words that should not be stemmed at all. The stemmer used can /// be changed at runtime after the filter object is created (as long as it is a /// {@link FrenchStemmer}). /// /// NOTE: This stemmer does not implement the Snowball algorithm correctly, especially /// involving case problems. It is recommended that you consider using the "French" stemmer /// in the snowball package instead. This stemmer will likely be deprecated in a future release. class LPPCONTRIBAPI FrenchStemFilter : public TokenFilter { public: FrenchStemFilter(const TokenStreamPtr& input); /// Builds a FrenchStemFilter that uses an exclusion table. FrenchStemFilter(const TokenStreamPtr& input, HashSet exclusiontable); virtual ~FrenchStemFilter(); LUCENE_CLASS(FrenchStemFilter); protected: /// {@link FrenchStemmer} in use by this filter. FrenchStemmerPtr stemmer; HashSet exclusions; TermAttributePtr termAtt; public: virtual bool incrementToken(); /// Set a alternative/custom {@link FrenchStemmer} for this filter. void setStemmer(const FrenchStemmerPtr& stemmer); /// Set an alternative exclusion list for this filter. void setExclusionSet(HashSet exclusiontable); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/FrenchStemmer.h000066400000000000000000000172651456444476200241300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef FRENCHSTEMMER_H #define FRENCHSTEMMER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// A stemmer for French words. /// /// The algorithm is based on the work of Dr Martin Porter on his snowball project refer to /// http://snowball.sourceforge.net/french/stemmer.html (French stemming algorithm) for details. class LPPCONTRIBAPI FrenchStemmer : public LuceneObject { public: FrenchStemmer(); virtual ~FrenchStemmer(); LUCENE_CLASS(FrenchStemmer); protected: /// Buffer for the terms while stemming them. String stringBuffer; /// A temporary buffer, used to reconstruct R2. String tempBuffer; /// Region R0 is equal to the whole buffer. String R0; /// Region RV /// /// "If the word begins with two vowels, RV is the region after the third letter, otherwise /// the region after the first vowel not at the beginning of the word, or the end of the /// word if these positions cannot be found." String RV; /// Region R1 /// /// "R1 is the region after the first non-vowel following a vowel or is the null region at /// the end of the word if there is no such non-vowel" String R1; /// Region R2 /// /// "R2 is the region after the first non-vowel in R1 following a vowel or is the null region /// at the end of the word if there is no such non-vowel" String R2; /// Set to true if we need to perform step 2 bool suite; /// Set to true if the buffer was modified bool modified; public: /// Stems the given term to a unique discriminator. /// /// @param term The term that should be stemmed. /// @return Discriminator for term. String stem(const String& term); protected: /// Sets the search region Strings it needs to be done each time the buffer was modified. void setStrings(); /// First step of the Porter Algorithm. /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. void step1(); /// Second step (A) of the Porter Algorithm. /// Will be performed if nothing changed from the first step or changed were done in the amment, /// emment, ments or ment suffixes. /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. /// @return true if something changed in the buffer bool step2a(); /// Second step (B) of the Porter Algorithm. /// Will be performed if step 2 A was performed unsuccessfully. /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. void step2b(); /// Third step of the Porter Algorithm. /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. void step3(); /// Fourth step of the Porter Algorithm. /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. void step4(); /// Fifth step of the Porter Algorithm. /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. void step5(); /// Sixth step of the Porter Algorithm. /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. void step6(); /// Delete a suffix searched in zone "source" if zone "from" contains prefix + search string. /// @param source String - the primary source zone for search. /// @param search String[] - the strings to search for suppression. /// @param from String - the secondary source zone for search. /// @param prefix String - the prefix to add to the search string to test. /// @return true if modified bool deleteFromIfPrecededIn(const String& source, Collection search, const String& from, const String& prefix); /// Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel. /// @param source String - the primary source zone for search. /// @param search String[] - the strings to search for suppression. /// @param vowel boolean - true if we need a vowel before the search string. /// @param from String - the secondary source zone for search (where vowel could be). /// @return true if modified bool deleteFromIfTestVowelBeforeIn(const String& source, Collection search, bool vowel, const String& from); /// Delete a suffix searched in zone "source" if preceded by the prefix. /// @param source String - the primary source zone for search. /// @param search String[] - the strings to search for suppression. /// @param prefix String - the prefix to add to the search string to test. /// @param without boolean - true if it will be deleted even without prefix found. void deleteButSuffixFrom(const String& source, Collection search, const String& prefix, bool without); /// Delete a suffix searched in zone "source" if preceded by prefix or replace it with the /// replace string if preceded by the prefix in the zone "from" or delete the suffix if specified. /// @param source String - the primary source zone for search. /// @param search String[] - the strings to search for suppression. /// @param prefix String - the prefix to add to the search string to test. /// @param without boolean - true if it will be deleted even without prefix found. void deleteButSuffixFromElseReplace(const String& source, Collection search, const String& prefix, bool without, const String& from, const String& replace); /// Replace a search string with another within the source zone. /// @param source String - the source zone for search. /// @param search String[] - the strings to search for replacement. /// @param replace String - the replacement string. bool replaceFrom(const String& source, Collection search, const String& replace); /// Delete a search string within the source zone. /// @param source the source zone for search. /// @param suffix the strings to search for suppression. void deleteFrom(const String& source, Collection suffix); /// Test if a char is a French vowel, including accentuated ones. /// @param ch the char to test. /// @return true if the char is a vowel bool isVowel(wchar_t ch); /// Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string. /// "R is the region after the first non-vowel following a vowel or is the null region at the /// end of the word if there is no such non-vowel". /// @param buffer the in buffer. /// @return the resulting string. String retrieveR(const String& buffer); /// Retrieve the "RV zone" from a buffer an return the corresponding string. /// "If the word begins with two vowels, RV is the region after the third letter, otherwise the /// region after the first vowel not at the beginning of the word, or the end of the word if /// these positions cannot be found." /// @param buffer the in buffer /// @return the resulting string String retrieveRV(const String& buffer); /// Turns u and i preceded AND followed by a vowel to UpperCase<. /// Turns y preceded OR followed by a vowel to UpperCase. /// Turns u preceded by q to UpperCase. /// @param buffer the buffer to treat void treatVowels(String& buffer); /// Checks a term if it can be processed correctly. /// @return boolean - true if, and only if, the given term consists in letters. bool isStemmable(const String& term); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/GermanAnalyzer.h000066400000000000000000000061221456444476200242730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef GERMANANALYZER_H #define GERMANANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for German language. /// /// Supports an external list of stopwords (words that will not be indexed at all) and an external list of /// exclusions (words that will not be stemmed, but indexed). A default set of stopwords is used unless an /// alternative list is specified, but the exclusion list is empty by default. /// /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. class LPPCONTRIBAPI GermanAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. GermanAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. GermanAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); /// Builds an analyzer with the given stop words and stemming exclusion words. GermanAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions); virtual ~GermanAnalyzer(); LUCENE_CLASS(GermanAnalyzer); protected: /// Contains the stopwords used with the {@link StopFilter}. HashSet stopSet; /// Contains words that should be indexed but not stemmed. HashSet exclusionSet; LuceneVersion::Version matchVersion; /// List of typical German stopwords. static const wchar_t* _GERMAN_STOP_WORDS[]; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); void setStemExclusionTable(HashSet exclusions); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with /// {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and {@link GermanStemFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from an {@link GermanLetterTokenizer} filtered with /// {@link LowerCaseFilter}, {@link StopFilter}, {@link GermanNormalizationFilter} and /// {@link GermanStemFilter}. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; class LPPCONTRIBAPI GermanAnalyzerSavedStreams : public LuceneObject { public: virtual ~GermanAnalyzerSavedStreams(); LUCENE_CLASS(GermanAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/GermanStemFilter.h000066400000000000000000000027561456444476200245750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef GERMANSTEMFILTER_H #define GERMANSTEMFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} that stems German words. /// /// It supports a table of words that should not be stemmed at all. The stemmer used can /// be changed at runtime after the filter object is created (as long as it is a /// {@link GermanStemmer}). class LPPCONTRIBAPI GermanStemFilter : public TokenFilter { public: GermanStemFilter(const TokenStreamPtr& input); /// Builds a GermanStemFilter that uses an exclusion table. GermanStemFilter(const TokenStreamPtr& input, HashSet exclusionSet); virtual ~GermanStemFilter(); LUCENE_CLASS(GermanStemFilter); protected: /// {@link GermanStemmer} in use by this filter. GermanStemmerPtr stemmer; HashSet exclusionSet; TermAttributePtr termAtt; public: virtual bool incrementToken(); /// Set a alternative/custom {@link GermanStemmer} for this filter. void setStemmer(const GermanStemmerPtr& stemmer); /// Set an alternative exclusion list for this filter. void setExclusionSet(HashSet exclusionSet); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/GermanStemmer.h000066400000000000000000000051061456444476200241230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef GERMANSTEMMER_H #define GERMANSTEMMER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// A stemmer for German words. /// /// The algorithm is based on the report "A Fast and Simple Stemming Algorithm for German Words" by Jörg /// Caumanns (joerg.caumanns at isst.fhg.de). class LPPCONTRIBAPI GermanStemmer : public LuceneObject { public: GermanStemmer(); virtual ~GermanStemmer(); LUCENE_CLASS(GermanStemmer); protected: /// Buffer for the terms while stemming them. String buffer; /// Amount of characters that are removed with substitute() while stemming. int32_t substCount; public: /// Stems the given term to a unique discriminator. /// /// @param term The term that should be stemmed. /// @return Discriminator for term. String stem(const String& term); protected: /// Checks if a term could be stemmed. /// @return true if, and only if, the given term consists in letters. bool isStemmable(); /// Suffix stripping (stemming) on the current term. The stripping is reduced to the seven "base" /// suffixes "e", "s", "n", "t", "em", "er" and * "nd", from which all regular suffixes are build /// of. The simplification causes some overstemming, and way more irregular stems, but still /// provides unique. /// Discriminators in the most of those cases. /// The algorithm is context free, except of the length restrictions. void strip(); /// Does some optimizations on the term. This optimisations are contextual. void optimize(); /// Removes a particle denotion ("ge") from a term. void removeParticleDenotion(); /// Do some substitutions for the term to reduce overstemming: /// /// - Substitute Umlauts with their corresponding vowel: äöü -> aou, "ß" is substituted by "ss" /// - Substitute a second char of a pair of equal characters with an asterisk: ?? -> ?* /// - Substitute some common character combinations with a token: sch/ch/ei/ie/ig/st -> $/§/%/&/#/! void substitute(); /// Undoes the changes made by substitute(). That are character pairs and character combinations. /// Umlauts will remain as their corresponding vowel, as "ß" remains as "ss". void resubstitute(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/GradientFormatter.h000066400000000000000000000031541456444476200247770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef GRADIENTFORMATTER_H #define GRADIENTFORMATTER_H #include "Formatter.h" namespace Lucene { /// Formats text with different color intensity depending on the score of the term. class LPPCONTRIBAPI GradientFormatter : public Formatter, public LuceneObject { public: GradientFormatter(double maxScore, const String& minForegroundColor, const String& maxForegroundColor, const String& minBackgroundColor, const String& maxBackgroundColor); virtual ~GradientFormatter(); LUCENE_CLASS(GradientFormatter); protected: double maxScore; bool highlightForeground; bool highlightBackground; public: int32_t fgRMin; int32_t fgGMin; int32_t fgBMin; int32_t fgRMax; int32_t fgGMax; int32_t fgBMax; int32_t bgRMin; int32_t bgGMin; int32_t bgBMin; int32_t bgRMax; int32_t bgGMax; int32_t bgBMax; public: virtual String highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup); protected: String getForegroundColorString(double score); String getBackgroundColorString(double score); int32_t getColorVal(int32_t colorMin, int32_t colorMax, double score); static String intToHex(int32_t i); /// Converts a hex string into an int. static int32_t hexToInt(const String& hex); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/GreekAnalyzer.h000066400000000000000000000047711456444476200241270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef GREEKANALYZER_H #define GREEKANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for Greek language. /// /// Supports an external list of stopwords (words that will not be indexed at all). A default set of stopwords /// is used unless an alternative list is specified. /// /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. class LPPCONTRIBAPI GreekAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. GreekAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. GreekAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); virtual ~GreekAnalyzer(); LUCENE_CLASS(GreekAnalyzer); protected: /// Contains the stopwords used with the {@link StopFilter}. HashSet stopSet; LuceneVersion::Version matchVersion; /// Default Greek stopwords in UTF-8 format. static const uint8_t _GREEK_STOP_WORDS[]; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with /// {@link GreekLowerCaseFilter} and {@link StopFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from an {@link GreekLetterTokenizer} filtered with /// {@link GreekLowerCaseFilter} and {@link StopFilter}. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; class LPPCONTRIBAPI GreekAnalyzerSavedStreams : public LuceneObject { public: virtual ~GreekAnalyzerSavedStreams(); LUCENE_CLASS(GreekAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/GreekLowerCaseFilter.h000066400000000000000000000016651456444476200253730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef GREEKLOWERCASEFILTER_H #define GREEKLOWERCASEFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// Normalizes token text to lower case, removes some Greek diacritics, and standardizes /// final sigma to sigma. class LPPCONTRIBAPI GreekLowerCaseFilter : public TokenFilter { public: GreekLowerCaseFilter(const TokenStreamPtr& input); virtual ~GreekLowerCaseFilter(); LUCENE_CLASS(GreekLowerCaseFilter); protected: TermAttributePtr termAtt; public: virtual bool incrementToken(); protected: wchar_t lowerCase(wchar_t codepoint); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/Highlighter.h000066400000000000000000000145211456444476200236140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef HIGHLIGHTER_H #define HIGHLIGHTER_H #include "LuceneContrib.h" #include "PriorityQueue.h" namespace Lucene { /// Class used to markup highlighted terms found in the best sections of a text, using configurable /// {@link Fragmenter}, {@link Scorer}, {@link Formatter}, {@link Encoder} and tokenizers. class LPPCONTRIBAPI Highlighter : public LuceneObject { public: Highlighter(const HighlighterScorerPtr& fragmentScorer); Highlighter(const FormatterPtr& formatter, const HighlighterScorerPtr& fragmentScorer); Highlighter(const FormatterPtr& formatter, const EncoderPtr& encoder, const HighlighterScorerPtr& fragmentScorer); virtual ~Highlighter(); LUCENE_CLASS(Highlighter); public: static const int32_t DEFAULT_MAX_CHARS_TO_ANALYZE; protected: int32_t maxDocCharsToAnalyze; FormatterPtr formatter; EncoderPtr encoder; FragmenterPtr textFragmenter; HighlighterScorerPtr fragmentScorer; public: /// Highlights chosen terms in a text, extracting the most relevant section. This is a convenience /// method that calls {@link #getBestFragment(TokenStreamPtr, const String&)} /// /// @param analyzer The analyzer that will be used to split text into chunks /// @param text Text to highlight terms in /// @param fieldName Name of field used to influence analyzer's tokenization policy /// @return highlighted text fragment or null if no terms found String getBestFragment(const AnalyzerPtr& analyzer, const String& fieldName, const String& text); /// Highlights chosen terms in a text, extracting the most relevant section. The document text is /// analyzed in chunks to record hit statistics across the document. After accumulating stats, the /// fragment with the highest score is returned. /// /// @param tokenStream A stream of tokens identified in the text parameter, including offset /// information. This is typically produced by an analyzer re-parsing a document's text. Some /// work may be done on retrieving TokenStreams more efficiently by adding support for storing /// original text position data in the Lucene index but this support is not currently available. /// @param text Text to highlight terms in /// @return highlighted text fragment or null if no terms found String getBestFragment(const TokenStreamPtr& tokenStream, const String& text); /// Highlights chosen terms in a text, extracting the most relevant sections. This is a convenience /// method that calls {@link #getBestFragments(TokenStreamPtr, const String&, int32_t)} /// /// @param analyzer The analyzer that will be used to split text into chunks /// @param fieldName The name of the field being highlighted (used by analyzer) /// @param text Text to highlight terms in /// @param maxNumFragments The maximum number of fragments. /// @return highlighted text fragments (between 0 and maxNumFragments number of fragments) Collection getBestFragments(const AnalyzerPtr& analyzer, const String& fieldName, const String& text, int32_t maxNumFragments); /// Highlights chosen terms in a text, extracting the most relevant sections. The document text is /// analyzed in chunks to record hit statistics across the document. After accumulating stats, the /// fragments with the highest scores are returned as an array of strings in order of score (contiguous /// fragments are merged into one in their original order to improve readability) /// /// @param text Text to highlight terms in /// @param maxNumFragments The maximum number of fragments. /// @return highlighted Text fragments (between 0 and maxNumFragments number of fragments) Collection getBestFragments(const TokenStreamPtr& tokenStream, const String& text, int32_t maxNumFragments); /// Low level api to get the most relevant (formatted) sections of the document. /// This method has been made public to allow visibility of score information held in TextFragment objects. Collection getBestTextFragments(const TokenStreamPtr& tokenStream, const String& text, bool merge, int32_t maxNumFragments); /// Improves readability of a score-sorted list of TextFragments by merging any fragments that were /// contiguous in the original text into one larger fragment with the correct order. This will leave /// a "null" in the array entry for the lesser scored fragment. /// /// @param frag An array of document fragments in descending score void mergeContiguousFragments(Collection frag); /// Highlights terms in the text , extracting the most relevant sections and concatenating the chosen /// fragments with a separator (typically "..."). The document text is analyzed in chunks to record /// hit statistics across the document. After accumulating stats, the fragments with the highest scores /// are returned in order as "separator" delimited strings. /// /// @param text Text to highlight terms in /// @param maxNumFragments The maximum number of fragments. /// @param separator The separator used to intersperse the document fragments (typically "...") /// @return highlighted text String getBestFragments(const TokenStreamPtr& tokenStream, const String& text, int32_t maxNumFragments, const String& separator); int32_t getMaxDocCharsToAnalyze(); void setMaxDocCharsToAnalyze(int32_t maxDocCharsToAnalyze); FragmenterPtr getTextFragmenter(); void setTextFragmenter(const FragmenterPtr& fragmenter); /// @return Object used to score each text fragment HighlighterScorerPtr getFragmentScorer(); void setFragmentScorer(const HighlighterScorerPtr& scorer); EncoderPtr getEncoder(); void setEncoder(const EncoderPtr& encoder); }; class LPPCONTRIBAPI FragmentQueue : public PriorityQueue { public: FragmentQueue(int32_t size); virtual ~FragmentQueue(); LUCENE_CLASS(FragmentQueue); protected: virtual bool lessThan(const TextFragmentPtr& first, const TextFragmentPtr& second); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/HighlighterScorer.h000066400000000000000000000042201456444476200247650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef HIGHLIGHTERSCORER_H #define HIGHLIGHTERSCORER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// A HighlighterScorer is responsible for scoring a stream of tokens. These token scores /// can then be used to compute {@link TextFragment} scores. class LPPCONTRIBAPI HighlighterScorer { public: virtual ~HighlighterScorer(); LUCENE_INTERFACE(HighlighterScorer); public: /// Called to init the Scorer with a {@link TokenStream}. You can grab references to the /// attributes you are interested in here and access them from {@link #getTokenScore()}. /// /// @param tokenStream the {@link TokenStream} that will be scored. /// @return either a {@link TokenStream} that the Highlighter should continue using (eg /// if you read the tokenSream in this method) or null to continue using the same {@link /// TokenStream} that was passed in. virtual TokenStreamPtr init(const TokenStreamPtr& tokenStream); /// Called when a new fragment is started for consideration. /// /// @param newFragment the fragment that will be scored next virtual void startFragment(const TextFragmentPtr& newFragment); /// Called for each token in the current fragment. The {@link Highlighter} will increment /// the {@link TokenStream} passed to init on every call. /// /// @return a score which is passed to the {@link Highlighter} class to influence the /// mark-up of the text (this return value is NOT used to score the fragment) virtual double getTokenScore(); /// Called when the {@link Highlighter} has no more tokens for the current fragment - the /// Scorer returns the weighting it has derived for the most recent fragment, typically /// based on the results of {@link #getTokenScore()}. virtual double getFragmentScore(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/LuceneContrib.h000066400000000000000000000103351456444476200241110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef LUCENECONTRIB_H #define LUCENECONTRIB_H #include "Lucene.h" namespace Lucene { // analyzers DECLARE_SHARED_PTR(ArabicAnalyzer) DECLARE_SHARED_PTR(ArabicAnalyzerSavedStreams) DECLARE_SHARED_PTR(ArabicLetterTokenizer) DECLARE_SHARED_PTR(ArabicNormalizationFilter) DECLARE_SHARED_PTR(ArabicNormalizer) DECLARE_SHARED_PTR(ArabicStemFilter) DECLARE_SHARED_PTR(ArabicStemmer) DECLARE_SHARED_PTR(BrazilianAnalyzer) DECLARE_SHARED_PTR(BrazilianAnalyzerSavedStreams) DECLARE_SHARED_PTR(BrazilianStemFilter) DECLARE_SHARED_PTR(BrazilianStemmer) DECLARE_SHARED_PTR(CJKAnalyzer) DECLARE_SHARED_PTR(CJKAnalyzerSavedStreams) DECLARE_SHARED_PTR(CJKTokenizer) DECLARE_SHARED_PTR(ChineseAnalyzer) DECLARE_SHARED_PTR(ChineseAnalyzerSavedStreams) DECLARE_SHARED_PTR(ChineseFilter) DECLARE_SHARED_PTR(ChineseTokenizer) DECLARE_SHARED_PTR(CzechAnalyzer) DECLARE_SHARED_PTR(CzechAnalyzerSavedStreams) DECLARE_SHARED_PTR(DutchAnalyzer) DECLARE_SHARED_PTR(DutchAnalyzerSavedStreams) DECLARE_SHARED_PTR(DutchStemFilter) DECLARE_SHARED_PTR(DutchStemmer) DECLARE_SHARED_PTR(ElisionFilter) DECLARE_SHARED_PTR(FrenchAnalyzer) DECLARE_SHARED_PTR(FrenchAnalyzerSavedStreams) DECLARE_SHARED_PTR(FrenchStemFilter) DECLARE_SHARED_PTR(FrenchStemmer) DECLARE_SHARED_PTR(GermanAnalyzer) DECLARE_SHARED_PTR(GermanAnalyzerSavedStreams) DECLARE_SHARED_PTR(GermanStemFilter) DECLARE_SHARED_PTR(GermanStemmer) DECLARE_SHARED_PTR(GreekLowerCaseFilter) DECLARE_SHARED_PTR(GreekAnalyzer) DECLARE_SHARED_PTR(GreekAnalyzerSavedStreams) DECLARE_SHARED_PTR(PersianAnalyzer) DECLARE_SHARED_PTR(PersianAnalyzerSavedStreams) DECLARE_SHARED_PTR(PersianNormalizationFilter) DECLARE_SHARED_PTR(PersianNormalizer) DECLARE_SHARED_PTR(ReverseStringFilter) DECLARE_SHARED_PTR(RussianAnalyzer) DECLARE_SHARED_PTR(RussianAnalyzerSavedStreams) DECLARE_SHARED_PTR(RussianLetterTokenizer) DECLARE_SHARED_PTR(RussianLowerCaseFilter) DECLARE_SHARED_PTR(RussianStemFilter) DECLARE_SHARED_PTR(RussianStemmer) DECLARE_SHARED_PTR(SnowballFilter) DECLARE_SHARED_PTR(SnowballAnalyzer) DECLARE_SHARED_PTR(SnowballAnalyzerSavedStreams) // highlighter DECLARE_SHARED_PTR(DefaultEncoder) DECLARE_SHARED_PTR(Encoder) DECLARE_SHARED_PTR(FakeReader) DECLARE_SHARED_PTR(Formatter) DECLARE_SHARED_PTR(Fragmenter) DECLARE_SHARED_PTR(FragmentQueue) DECLARE_SHARED_PTR(GradientFormatter) DECLARE_SHARED_PTR(Highlighter) DECLARE_SHARED_PTR(HighlighterScorer) DECLARE_SHARED_PTR(MapWeightedSpanTerm) DECLARE_SHARED_PTR(NullFragmenter) DECLARE_SHARED_PTR(PositionCheckingMap) DECLARE_SHARED_PTR(PositionSpan) DECLARE_SHARED_PTR(QueryScorer) DECLARE_SHARED_PTR(QueryTermExtractor) DECLARE_SHARED_PTR(QueryTermScorer) DECLARE_SHARED_PTR(SimpleFragmenter) DECLARE_SHARED_PTR(SimpleHTMLEncoder) DECLARE_SHARED_PTR(SimpleHTMLFormatter) DECLARE_SHARED_PTR(SimpleSpanFragmenter) DECLARE_SHARED_PTR(SpanGradientFormatter) DECLARE_SHARED_PTR(StringBuffer) DECLARE_SHARED_PTR(TextFragment) DECLARE_SHARED_PTR(TokenGroup) DECLARE_SHARED_PTR(TokenSources) DECLARE_SHARED_PTR(WeightedSpanTerm) DECLARE_SHARED_PTR(WeightedSpanTermExtractor) DECLARE_SHARED_PTR(WeightedTerm) // memory DECLARE_SHARED_PTR(MemoryIndex) DECLARE_SHARED_PTR(MemoryIndexInfo) DECLARE_SHARED_PTR(MemoryIndexReader) typedef HashMap< String, WeightedSpanTermPtr > MapStringWeightedSpanTerm; typedef HashMap< String, WeightedTermPtr > MapStringWeightedTerm; typedef HashMap< String, SpanQueryPtr > MapStringSpanQuery; typedef HashMap< String, Collection > MapStringIntCollection; typedef HashMap< String, MemoryIndexInfoPtr > MapStringMemoryIndexInfo; typedef std::pair< String, Collection > PairStringIntCollection; typedef Collection< PairStringIntCollection > CollectionStringIntCollection; typedef std::pair< String, MemoryIndexInfoPtr > PairStringMemoryIndexInfo; typedef Collection< PairStringMemoryIndexInfo > CollectionStringMemoryIndexInfo; typedef HashSet< WeightedTermPtr, luceneHash, luceneEquals > SetWeightedTerm; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/MapWeightedSpanTerm.h000066400000000000000000000020711456444476200252230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MAPWEIGHTEDSPANTERM_H #define MAPWEIGHTEDSPANTERM_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Utility class that encapsulates a StringWeightedSpanTerm map that can be overridden. class LPPCONTRIBAPI MapWeightedSpanTerm : public LuceneObject { public: MapWeightedSpanTerm(); virtual ~MapWeightedSpanTerm(); LUCENE_CLASS(MapWeightedSpanTerm); protected: MapStringWeightedSpanTerm map; public: virtual MapStringWeightedSpanTerm::iterator begin(); virtual MapStringWeightedSpanTerm::iterator end(); virtual void put(const String& key, const WeightedSpanTermPtr& val); virtual WeightedSpanTermPtr get(const String& key) const; virtual void clear(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/MemoryIndex.h000066400000000000000000000341641456444476200236230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef MEMORYINDEX_H #define MEMORYINDEX_H #include "LuceneContrib.h" #include "IndexReader.h" #include "TermEnum.h" #include "Collector.h" #include "TermPositions.h" #include "TermPositionVector.h" namespace Lucene { /// High-performance single-document main memory Lucene fulltext search index. /// /// Overview /// /// This class is a replacement/substitute for a large subset of {@link RAMDirectory} functionality. /// It is designed to enable maximum efficiency for on-the-fly matchmaking combining structured and /// fuzzy fulltext search in realtime streaming applications such as Nux XQuery based XML message /// queues, publish-subscribe systems for Blogs/newsfeeds, text chat, data acquisition and /// distribution systems, application level routers, firewalls, classifiers, etc. Rather than /// targeting fulltext search of infrequent queries over huge persistent data archives (historic /// search), this class targets fulltext search of huge numbers of queries over comparatively small /// transient realtime data (prospective search). /// /// For example as in ///
/// double score = search(const String& text, const QueryPtr& query)
/// 
/// /// Each instance can hold at most one Lucene "document", with a document containing zero or more /// "fields", each field having a name and a fulltext value. The fulltext value is tokenized /// (split and transformed) into zero or more index terms (aka words) on addField(), according to /// the policy implemented by an Analyzer. For example, Lucene analyzers can split on whitespace, /// normalize to lower case for case insensitivity, ignore common terms with little discriminatory /// value such as "he", "in", "and" (stop words), reduce the terms to their natural linguistic root /// form such as "fishing" being reduced to "fish" (stemming), resolve synonyms/inflexions/thesauri /// (upon indexing and/or querying), etc. /// /// Note that a Lucene query selects on the field names and associated (indexed) tokenized terms, /// not on the original fulltext(s) - the latter are not stored but rather thrown away immediately /// after tokenization. /// /// For some interesting background information on search technology, see Bob Wyman's Prospective Search, /// Jim Gray's /// A Call to Arms - Custom subscriptions, and Tim Bray's On Search, the Series. /// /// /// Example Usage ///
/// AnalyzerPtr analyzer = newLucene();
/// MemoryIndexPtr index = newLucene();
/// index->addField(L"content", L"Readings about Salmons and other select Alaska fishing Manuals", analyzer);
/// index->addField(L"author", L"Tales of James", analyzer);
/// QueryParserPtr parser = newLucene(L"content", analyzer);
/// double score = index->search(parser->parse(L"+author:james +salmon~ +fish* manual~"));
/// if (score > 0.0)
/// {
///     // it's a match
/// }
/// else
/// {
///     // no match found
/// }
/// 
/// /// /// Performance Notes /// /// Internally there's a new data structure geared towards efficient indexing and searching, plus /// the necessary support code to seamlessly plug into the Lucene framework. /// /// This class performs very well for very small texts (eg. 10 chars) as well as for large texts /// (eg. 10 MB) and everything in between. Typically, it is about 10-100 times faster than /// RAMDirectory. Note that RAMDirectory has particularly large efficiency overheads for small to /// medium sized texts, both in time and space. Indexing a field with N tokens takes O(N) in the /// best case, and O(N logN) in the worst case. Memory consumption is probably larger than for /// RAMDirectory. /// class LPPCONTRIBAPI MemoryIndex : public LuceneObject { public: /// Constructs an empty instance that can optionally store the start and end character offset /// of each token term in the text. This can be useful for highlighting of hit locations with /// the Lucene highlighter package. Private until the highlighter package matures, so that /// this can actually be meaningfully integrated. /// @param storeOffsets Whether or not to store the start and end character offset of each /// token term in the text. MemoryIndex(bool storeOffsets = false); virtual ~MemoryIndex(); LUCENE_CLASS(MemoryIndex); protected: /// info for each field MapStringMemoryIndexInfo fields; /// fields sorted ascending by fieldName; lazily computed on demand CollectionStringMemoryIndexInfo sortedFields; /// pos: positions[3 * i], startOffset: positions[3 * i + 1], endOffset: positions[3 * i + 2] int32_t stride; static const double docBoost; public: /// Convenience method; Tokenizes the given field text and adds the resulting terms to the /// index; Equivalent to adding an indexed non-keyword Lucene {@link Field} that is {@link /// Field::INDEX_ANALYZED tokenized}, {@link Field::STORE_NO not stored}, {@link /// Field::TERM_VECTOR_WITH_POSITIONS termVectorStored with positions} (or {@link /// Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS termVectorStored with positions and offsets}) /// @param fieldName A name to be associated with the text /// @param text The text to tokenize and index. /// @param analyzer The analyzer to use for tokenization void addField(const String& fieldName, const String& text, const AnalyzerPtr& analyzer); /// Iterates over the given token stream and adds the resulting terms to the index; /// Equivalent to adding a tokenized, indexed, termVectorStored, unstored, Lucene {@link /// Field}. Finally closes the token stream. Note that untokenized keywords can be added /// with this method via {@link #keywordTokenStream(Collection)}, the Lucene contrib /// KeywordTokenizer or similar utilities. /// @param fieldName A name to be associated with the text. /// @param stream The token stream to retrieve tokens from. /// @param boost The boost factor for hits for this field. /// @see Field#setBoost(double) void addField(const String& fieldName, const TokenStreamPtr& stream, double boost = 1.0); /// Creates and returns a searcher that can be used to execute arbitrary Lucene queries /// and to collect the resulting query results as hits. /// @return a searcher IndexSearcherPtr createSearcher(); /// Convenience method that efficiently returns the relevance score by matching this index /// against the given Lucene query expression. /// @param query An arbitrary Lucene query to run against this index /// @return the relevance score of the matchmaking; A number in the range [0.0 .. 1.0], /// with 0.0 indicating no match. The higher the number the better the match. double search(const QueryPtr& query); protected: int32_t numPositions(Collection positions); /// sorts into ascending order (on demand), reusing memory along the way void sortFields(); friend class MemoryIndexReader; friend class MemoryIndexInfo; friend class MemoryIndexTermEnum; friend class MemoryIndexTermPositions; friend class MemoryIndexTermPositionVector; }; /// Index data structure for a field; Contains the tokenized term texts and their positions. class LPPCONTRIBAPI MemoryIndexInfo : public LuceneObject { public: MemoryIndexInfo(MapStringIntCollection terms, int32_t numTokens, int32_t numOverlapTokens, double boost); virtual ~MemoryIndexInfo(); LUCENE_CLASS(MemoryIndexInfo); protected: /// Term strings and their positions for this field MapStringIntCollection terms; /// Terms sorted ascending by term text; computed on demand CollectionStringIntCollection sortedTerms; /// Number of added tokens for this field int32_t numTokens; /// Number of overlapping tokens for this field int32_t numOverlapTokens; /// Boost factor for hits for this field double boost; /// Term for this field's fieldName, lazily computed on demand TermPtr _template; public: /// Sorts hashed terms into ascending order, reusing memory along the way. Note that /// sorting is lazily delayed until required (often it's not required at all). void sortTerms(); /// Note that the frequency can be calculated as numPosition(getPositions(x)) Collection getPositions(const String& term); /// Note that the frequency can be calculated as numPosition(getPositions(x)) Collection getPositions(int32_t pos); double getBoost(); friend class MemoryIndexReader; friend class MemoryIndexTermEnum; friend class MemoryIndexTermPositions; friend class MemoryIndexTermPositionVector; }; /// Search support for Lucene framework integration; implements all methods required by the /// Lucene IndexReader contracts. class LPPCONTRIBAPI MemoryIndexReader : public IndexReader { public: MemoryIndexReader(const MemoryIndexPtr& memoryIndex); virtual ~MemoryIndexReader(); LUCENE_CLASS(MemoryIndexReader); public: static TermPtr MATCH_ALL_TERM(); protected: MemoryIndexPtr memoryIndex; SearcherWeakPtr _searcher; // needed to find searcher.getSimilarity() /// cache norms to avoid repeated expensive calculations ByteArray cachedNorms; String cachedFieldName; SimilarityPtr cachedSimilarity; protected: MemoryIndexInfoPtr getInfo(const String& fieldName); MemoryIndexInfoPtr getInfo(int32_t pos); SimilarityPtr getSimilarity(); void setSearcher(const SearcherPtr& searcher); public: virtual int32_t docFreq(const TermPtr& t); virtual TermEnumPtr terms(); virtual TermEnumPtr terms(const TermPtr& t); virtual TermPositionsPtr termPositions(); virtual TermDocsPtr termDocs(); virtual Collection getTermFreqVectors(int32_t docNumber); virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper); virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper); virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); virtual ByteArray norms(const String& field); virtual void norms(const String& field, ByteArray norms, int32_t offset); virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); virtual int32_t numDocs(); virtual int32_t maxDoc(); virtual DocumentPtr document(int32_t n); virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector); virtual bool isDeleted(int32_t n); virtual bool hasDeletions(); virtual void doDelete(int32_t docNum); virtual void doUndeleteAll(); virtual void doCommit(MapStringString commitUserData); virtual void doClose(); virtual HashSet getFieldNames(FieldOption fieldOption); friend class MemoryIndex; friend class MemoryIndexTermEnum; friend class MemoryIndexTermPositions; friend class MemoryIndexTermPositionVector; }; class LPPCONTRIBAPI MemoryIndexTermEnum : public TermEnum { public: MemoryIndexTermEnum(const MemoryIndexReaderPtr& reader, int32_t ix, int32_t jx); virtual ~MemoryIndexTermEnum(); LUCENE_CLASS(MemoryIndexTermEnum); protected: MemoryIndexReaderWeakPtr _reader; int32_t i; int32_t j; public: virtual bool next(); virtual TermPtr term(); virtual int32_t docFreq(); virtual void close(); protected: TermPtr createTerm(const MemoryIndexInfoPtr& info, int32_t pos, const String& text); }; class LPPCONTRIBAPI MemoryIndexCollector : public Collector { public: MemoryIndexCollector(Collection scores); virtual ~MemoryIndexCollector(); LUCENE_CLASS(MemoryIndexCollector); protected: Collection scores; ScorerPtr scorer; public: virtual void collect(int32_t doc); virtual void setScorer(const ScorerPtr& scorer); virtual bool acceptsDocsOutOfOrder(); virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); }; class LPPCONTRIBAPI MemoryIndexTermPositions : public TermPositions, public LuceneObject { public: MemoryIndexTermPositions(const MemoryIndexReaderPtr& reader); virtual ~MemoryIndexTermPositions(); LUCENE_CLASS(MemoryIndexTermPositions); protected: MemoryIndexReaderWeakPtr _reader; bool hasNext; int32_t cursor; Collection current; TermPtr term; public: virtual void seek(const TermPtr& term); virtual void seek(const TermEnumPtr& termEnum); virtual int32_t doc(); virtual int32_t freq(); virtual bool next(); virtual int32_t read(Collection& docs, Collection& freqs); virtual bool skipTo(int32_t target); virtual void close(); virtual int32_t nextPosition(); virtual int32_t getPayloadLength(); virtual ByteArray getPayload(ByteArray data, int32_t offset); virtual bool isPayloadAvailable(); }; class MemoryIndexTermPositionVector : public TermPositionVector, public LuceneObject { public: MemoryIndexTermPositionVector(const MemoryIndexReaderPtr& reader, const MemoryIndexInfoPtr& info, const String& fieldName); virtual ~MemoryIndexTermPositionVector(); LUCENE_CLASS(MemoryIndexTermPositionVector); protected: MemoryIndexReaderWeakPtr _reader; CollectionStringIntCollection sortedTerms; String fieldName; public: virtual String getField(); virtual int32_t size(); virtual Collection getTerms(); virtual Collection getTermFrequencies(); virtual int32_t indexOf(const String& term); virtual Collection indexesOf(Collection terms, int32_t start, int32_t length); virtual Collection getTermPositions(int32_t index); virtual Collection getOffsets(int32_t index); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/NullFragmenter.h000066400000000000000000000015621456444476200243040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef NULLFRAGMENTER_H #define NULLFRAGMENTER_H #include "Fragmenter.h" namespace Lucene { /// {@link Fragmenter} implementation which does not fragment the text. This is useful for /// highlighting the entire content of a document or field. class LPPCONTRIBAPI NullFragmenter : public Fragmenter, public LuceneObject { public: virtual ~NullFragmenter(); LUCENE_CLASS(NullFragmenter); public: virtual void start(const String& originalText, const TokenStreamPtr& tokenStream); virtual bool isNewFragment(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/PersianAnalyzer.h000066400000000000000000000054371456444476200244730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PERSIANANALYZER_H #define PERSIANANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for Persian. /// /// This Analyzer uses {@link ArabicLetterTokenizer} which implies tokenizing around /// zero-width non-joiner in addition to whitespace. Some persian-specific variant /// forms (such as farsi yeh and keheh) are standardized. "Stemming" is accomplished /// via stopwords. class LPPCONTRIBAPI PersianAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. PersianAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. PersianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); virtual ~PersianAnalyzer(); LUCENE_CLASS(PersianAnalyzer); public: /// Default Persian stopwords in UTF-8 format. /// /// Generated from http://members.unine.ch/jacques.savoy/clef/index.html /// The stopword list is BSD-Licensed. static const uint8_t DEFAULT_STOPWORD_FILE[]; protected: /// Contains the stopwords used with the StopFilter. HashSet stoptable; LuceneVersion::Version matchVersion; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with /// {@link LowerCaseFilter}, {@link ArabicNormalizationFilter}, {@link PersianNormalizationFilter} /// and Persian Stop words. virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with /// {@link LowerCaseFilter}, {@link ArabicNormalizationFilter}, {@link PersianNormalizationFilter} /// and Persian Stop words. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; class LPPCONTRIBAPI PersianAnalyzerSavedStreams : public LuceneObject { public: virtual ~PersianAnalyzerSavedStreams(); LUCENE_CLASS(PersianAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/PersianNormalizationFilter.h000066400000000000000000000016641456444476200267000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PERSIANNORMALIZATIONFILTER_H #define PERSIANNORMALIZATIONFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} that applies {@link PersianNormalizer} to normalize the orthography. class LPPCONTRIBAPI PersianNormalizationFilter : public TokenFilter { public: PersianNormalizationFilter(const TokenStreamPtr& input); virtual ~PersianNormalizationFilter(); LUCENE_CLASS(PersianNormalizationFilter); protected: PersianNormalizerPtr normalizer; TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/PersianNormalizer.h000066400000000000000000000034231456444476200250210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef PERSIANNORMALIZER_H #define PERSIANNORMALIZER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Normalizer for Persian. /// /// Normalization is done in-place for efficiency, operating on a termbuffer. /// /// Normalization is defined as: ///
    ///
  • Normalization of various heh + hamza forms and heh goal to heh. ///
  • Normalization of farsi yeh and yeh barree to arabic yeh. ///
  • Normalization of persian keheh to arabic kaf. ///
class LPPCONTRIBAPI PersianNormalizer : public LuceneObject { public: virtual ~PersianNormalizer(); LUCENE_CLASS(PersianNormalizer); public: static const wchar_t YEH; static const wchar_t FARSI_YEH; static const wchar_t YEH_BARREE; static const wchar_t KEHEH; static const wchar_t KAF; static const wchar_t HAMZA_ABOVE; static const wchar_t HEH_YEH; static const wchar_t HEH_GOAL; static const wchar_t HEH; public: /// Normalize an input buffer of Persian text /// @param s input buffer /// @param len length of input buffer /// @return length of input buffer after normalization int32_t normalize(wchar_t* s, int32_t len); /// Delete a character in-place /// @param s Input Buffer /// @param pos Position of character to delete /// @param len length of input buffer /// @return length of input buffer after deletion int32_t deleteChar(wchar_t* s, int32_t pos, int32_t len); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/QueryScorer.h000066400000000000000000000077121456444476200236450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYSCORER_H #define QUERYSCORER_H #include "LuceneContrib.h" #include "HighlighterScorer.h" namespace Lucene { /// {@link HighlighterScorer} implementation which scores text fragments by the number of unique query terms found. /// This class converts appropriate {@link Query}s to {@link SpanQuery}s and attempts to score only /// those terms that participated in generating the 'hit' on the document. class LPPCONTRIBAPI QueryScorer : public HighlighterScorer, public LuceneObject { public: /// @param query Query to use for highlighting QueryScorer(const QueryPtr& query); /// @param query Query to use for highlighting /// @param field Field to highlight - pass empty string to ignore fields QueryScorer(const QueryPtr& query, const String& field); /// @param query Query to use for highlighting /// @param reader {@link IndexReader} to use for quasi tf/idf scoring /// @param field Field to highlight - pass empty string to ignore fields QueryScorer(const QueryPtr& query, const IndexReaderPtr& reader, const String& field); /// @param query Query to use for highlighting /// @param reader {@link IndexReader} to use for quasi tf/idf scoring /// @param field Field to highlight - pass empty string to ignore fields /// @param defaultField QueryScorer(const QueryPtr& query, const IndexReaderPtr& reader, const String& field, const String& defaultField); /// @param query Query to use for highlighting /// @param field Field to highlight - pass empty string to ignore fields /// @param defaultField QueryScorer(const QueryPtr& query, const String& field, const String& defaultField); /// @param weightedTerms an array of pre-created {@link WeightedSpanTerm}s QueryScorer(Collection weightedTerms); virtual ~QueryScorer(); LUCENE_CLASS(QueryScorer); protected: double totalScore; HashSet foundTerms; MapWeightedSpanTermPtr fieldWeightedSpanTerms; double maxTermWeight; int32_t position; String defaultField; TermAttributePtr termAtt; PositionIncrementAttributePtr posIncAtt; bool expandMultiTermQuery; QueryPtr query; String field; IndexReaderPtr reader; bool skipInitExtractor; bool wrapToCaching; protected: void init(const QueryPtr& query, const String& field, const IndexReaderPtr& reader, bool expandMultiTermQuery); TokenStreamPtr initExtractor(const TokenStreamPtr& tokenStream); public: virtual double getFragmentScore(); /// @return The highest weighted term (useful for passing to GradientFormatter to set top end of coloring scale). virtual double getMaxTermWeight(); virtual double getTokenScore(); virtual TokenStreamPtr init(const TokenStreamPtr& tokenStream); virtual WeightedSpanTermPtr getWeightedSpanTerm(const String& token); virtual void startFragment(const TextFragmentPtr& newFragment); /// @return true if multi-term queries should be expanded virtual bool isExpandMultiTermQuery(); /// Controls whether or not multi-term queries are expanded against a {@link MemoryIndex} {@link IndexReader}. /// @param expandMultiTermQuery true if multi-term queries should be expanded virtual void setExpandMultiTermQuery(bool expandMultiTermQuery); /// By default, {@link TokenStream}s that are not of the type {@link CachingTokenFilter} are wrapped in a {@link /// CachingTokenFilter} to ensure an efficient reset - if you are already using a different caching {@link /// TokenStream} impl and you don't want it to be wrapped, set this to false. virtual void setWrapIfNotCachingTokenFilter(bool wrap); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/QueryTermExtractor.h000066400000000000000000000065611456444476200252140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYTERMEXTRACTOR_H #define QUERYTERMEXTRACTOR_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Utility class used to extract the terms used in a query, plus any weights. This class will not /// find terms for MultiTermQuery, TermRangeQuery and PrefixQuery classes so the caller must pass a /// rewritten query (see Query.rewrite) to obtain a list of expanded terms. class LPPCONTRIBAPI QueryTermExtractor : public LuceneObject { public: virtual ~QueryTermExtractor(); LUCENE_CLASS(QueryTermExtractor); public: /// Extracts all terms texts of a given Query into an array of WeightedTerms /// /// @param query Query to extract term texts from. /// @return an array of the terms used in a query, plus their weights. static Collection getTerms(const QueryPtr& query); /// Extracts all terms texts of a given Query into an array of WeightedTerms /// /// @param query Query to extract term texts from. /// @param reader used to compute IDF which can be used to /// a) score selected fragments better /// b) use graded highlights eg changing intensity of font color /// @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based. /// @return an array of the terms used in a query, plus their weights. static Collection getIdfWeightedTerms(const QueryPtr& query, const IndexReaderPtr& reader, const String& fieldName); /// Extracts all terms texts of a given Query into an array of WeightedTerms /// /// @param query Query to extract term texts from. /// @param prohibited true to extract "prohibited" terms, too. /// @param fieldName The fieldName used to filter query terms. /// @return an array of the terms used in a query, plus their weights. static Collection getTerms(const QueryPtr& query, bool prohibited, const String& fieldName); /// Extracts all terms texts of a given Query into an array of WeightedTerms /// /// @param query Query to extract term texts from. /// @param prohibited true to extract "prohibited" terms, too. /// @return an array of the terms used in a query, plus their weights. static Collection getTerms(const QueryPtr& query, bool prohibited); static void getTerms(const QueryPtr& query, SetWeightedTerm terms, bool prohibited, const String& fieldName); protected: /// extractTerms is currently the only query-independent means of introspecting queries but it only reveals /// a list of terms for that query - not the boosts each individual term in that query may or may not have. /// "Container" queries such as BooleanQuery should be unwrapped to get at the boost info held in each child /// element. static void getTermsFromBooleanQuery(const BooleanQueryPtr& query, SetWeightedTerm terms, bool prohibited, const String& fieldName); static void getTermsFromFilteredQuery(const FilteredQueryPtr& query, SetWeightedTerm terms, bool prohibited, const String& fieldName); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/QueryTermScorer.h000066400000000000000000000052451456444476200244740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef QUERYTERMSCORER_H #define QUERYTERMSCORER_H #include "LuceneContrib.h" #include "HighlighterScorer.h" namespace Lucene { /// {@link HighlighterScorer} implementation which scores text fragments by the number of unique query terms found. /// This class uses the {@link QueryTermExtractor} class to process determine the query terms and their /// boosts to be used. class LPPCONTRIBAPI QueryTermScorer : public HighlighterScorer, public LuceneObject { public: /// @param query a Lucene query (ideally rewritten using query.rewrite before being passed to this class /// and the searcher) QueryTermScorer(const QueryPtr& query); /// @param query a Lucene query (ideally rewritten using query.rewrite before being passed to this class /// and the searcher) /// @param fieldName the Field name which is used to match Query terms QueryTermScorer(const QueryPtr& query, const String& fieldName); /// @param query a Lucene query (ideally rewritten using query.rewrite before being passed to this class /// and the searcher) /// @param reader used to compute IDF which can be used to /// a) score selected fragments better /// b) use graded highlights eg set font color intensity /// @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based QueryTermScorer(const QueryPtr& query, const IndexReaderPtr& reader, const String& fieldName); /// @param weightedTerms an array of pre-created {@link WeightedTerm}s QueryTermScorer(Collection weightedTerms); virtual ~QueryTermScorer(); LUCENE_CLASS(QueryTermScorer); public: TextFragmentPtr currentTextFragment; HashSet uniqueTermsInFragment; double totalScore; double maxTermWeight; protected: MapStringWeightedTerm termsToFind; TermAttributePtr termAtt; protected: void ConstructQueryTermScorer(Collection weightedTerms); public: virtual TokenStreamPtr init(const TokenStreamPtr& tokenStream); virtual void startFragment(const TextFragmentPtr& newFragment); virtual double getTokenScore(); virtual double getFragmentScore(); virtual void allFragmentsProcessed(); /// @return The highest weighted term (useful for passing to GradientFormatter to set top end of coloring scale. virtual double getMaxTermWeight(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/ReverseStringFilter.h000066400000000000000000000037301456444476200253260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef REVERSESTRINGFILTER_H #define REVERSESTRINGFILTER_H #include "TokenFilter.h" namespace Lucene { /// Reverse token string, for example "country" => "yrtnuoc". /// /// If marker is supplied, then tokens will be also prepended by that character. For example, with a /// marker of \u0001, "country" => "\u0001yrtnuoc". This is useful when implementing efficient /// leading wildcards search. class LPPCONTRIBAPI ReverseStringFilter : public TokenFilter { public: /// Create a new ReverseStringFilter that reverses all tokens in the supplied {@link TokenStream}. /// /// The reversed tokens will not be marked. ReverseStringFilter(const TokenStreamPtr& input); /// Create a new ReverseStringFilter that reverses and marks all tokens in the supplied {@link /// TokenStream}. /// /// The reversed tokens will be prepended (marked) by the marker character. ReverseStringFilter(const TokenStreamPtr& input, wchar_t marker); virtual ~ReverseStringFilter(); LUCENE_CLASS(ReverseStringFilter); protected: TermAttributePtr termAtt; wchar_t marker; static const wchar_t NOMARKER; public: /// Example marker character: U+0001 (START OF HEADING) static const wchar_t START_OF_HEADING_MARKER; /// Example marker character: U+001F (INFORMATION SEPARATOR ONE) static const wchar_t INFORMATION_SEPARATOR_MARKER; /// Example marker character: U+EC00 (PRIVATE USE AREA: EC00) static const wchar_t PUA_EC00_MARKER; /// Example marker character: U+200F (RIGHT-TO-LEFT MARK) static const wchar_t RTL_DIRECTION_MARKER; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/RussianAnalyzer.h000066400000000000000000000047271456444476200245170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RUSSIANANALYZER_H #define RUSSIANANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// {@link Analyzer} for Russian language. /// /// Supports an external list of stopwords (words that will not be indexed at all). /// A default set of stopwords is used unless an alternative list is specified. class LPPCONTRIBAPI RussianAnalyzer : public Analyzer { public: /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. RussianAnalyzer(LuceneVersion::Version matchVersion); /// Builds an analyzer with the given stop words. RussianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); virtual ~RussianAnalyzer(); LUCENE_CLASS(RussianAnalyzer); protected: /// Contains the stopwords used with the {@link StopFilter}. HashSet stopSet; LuceneVersion::Version matchVersion; /// List of typical Russian stopwords. static const uint8_t DEFAULT_STOPWORD_FILE[]; public: /// Returns an unmodifiable instance of the default stop-words set. static const HashSet getDefaultStopSet(); /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// /// @return A {@link TokenStream} built from a {@link RussianLetterTokenizer} filtered with /// {@link RussianLowerCaseFilter}, {@link StopFilter} and {@link RussianStemFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the /// provided {@link Reader}. /// /// @return A {@link TokenStream} built from a {@link RussianLetterTokenizer} filtered with /// {@link RussianLowerCaseFilter}, {@link StopFilter} and {@link RussianStemFilter}. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; class LPPCONTRIBAPI RussianAnalyzerSavedStreams : public LuceneObject { public: virtual ~RussianAnalyzerSavedStreams(); LUCENE_CLASS(RussianAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/RussianLetterTokenizer.h000066400000000000000000000024701456444476200260550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RUSSIANLETTERTOKENIZER_H #define RUSSIANLETTERTOKENIZER_H #include "CharTokenizer.h" namespace Lucene { /// A RussianLetterTokenizer is a {@link Tokenizer} that extends {@link LetterTokenizer} by also /// allowing the basic Latin digits 0-9. class LPPCONTRIBAPI RussianLetterTokenizer : public CharTokenizer { public: /// Construct a new RussianLetterTokenizer. RussianLetterTokenizer(const ReaderPtr& input); /// Construct a new RussianLetterTokenizer using a given {@link AttributeSource}. RussianLetterTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); /// Construct a new RussianLetterTokenizer using a given {@link AttributeFactory}. RussianLetterTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); virtual ~RussianLetterTokenizer(); LUCENE_CLASS(RussianLetterTokenizer); public: /// Collects only characters which satisfy UnicodeUtil::isAlpha(c). virtual bool isTokenChar(wchar_t c); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/RussianLowerCaseFilter.h000066400000000000000000000015021456444476200257500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RUSSIANLOWERCASEFILTER_H #define RUSSIANLOWERCASEFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// Normalizes token text to lower case. class LPPCONTRIBAPI RussianLowerCaseFilter : public TokenFilter { public: RussianLowerCaseFilter(const TokenStreamPtr& input); virtual ~RussianLowerCaseFilter(); LUCENE_CLASS(RussianLowerCaseFilter); protected: TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/RussianStemFilter.h000066400000000000000000000023571456444476200250050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RUSSIANSTEMFILTER_H #define RUSSIANSTEMFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" namespace Lucene { /// A {@link TokenFilter} that stems Russian words. /// /// The implementation was inspired by GermanStemFilter. /// /// The input should be filtered by {@link LowerCaseFilter} before passing it to RussianStemFilter, /// because RussianStemFilter only works with lowercase characters. class LPPCONTRIBAPI RussianStemFilter : public TokenFilter { public: RussianStemFilter(const TokenStreamPtr& input); virtual ~RussianStemFilter(); LUCENE_CLASS(RussianStemFilter); protected: /// {@link RussianStemmer} in use by this filter. RussianStemmerPtr stemmer; TermAttributePtr termAtt; public: virtual bool incrementToken(); /// Set a alternative/custom {@link RussianStemmer} for this filter. void setStemmer(const RussianStemmerPtr& stemmer); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/RussianStemmer.h000066400000000000000000000076701456444476200243460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef RUSSIANSTEMMER_H #define RUSSIANSTEMMER_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Russian stemming algorithm implementation (see http://snowball.sourceforge.net for /// detailed description). class LPPCONTRIBAPI RussianStemmer : public LuceneObject { public: RussianStemmer(); virtual ~RussianStemmer(); LUCENE_CLASS(RussianStemmer); protected: /// positions of RV, R1 and R2 respectively int32_t RV; int32_t R1; int32_t R2; static const wchar_t A; static const wchar_t V; static const wchar_t G; static const wchar_t E; static const wchar_t I; static const wchar_t I_; static const wchar_t L; static const wchar_t M; static const wchar_t N; static const wchar_t O; static const wchar_t S; static const wchar_t T; static const wchar_t U; static const wchar_t X; static const wchar_t SH; static const wchar_t SHCH; static const wchar_t Y; static const wchar_t SOFT; static const wchar_t AE; static const wchar_t IU; static const wchar_t IA; /// stem definitions static const wchar_t vowels[]; Collection perfectiveGerundEndings1(); Collection perfectiveGerund1Predessors(); Collection perfectiveGerundEndings2(); Collection adjectiveEndings(); Collection participleEndings1(); Collection participleEndings2(); Collection participle1Predessors(); Collection reflexiveEndings(); Collection verbEndings1(); Collection verbEndings2(); Collection verb1Predessors(); Collection nounEndings(); Collection superlativeEndings(); Collection derivationalEndings(); Collection doubleN(); public: /// Finds the stem for given Russian word. String stem(const String& input); /// Static method for stemming. static String stemWord(const String& word); protected: /// Adjectival ending is an adjective ending, optionally preceded by participle ending. bool adjectival(String& stemmingZone); /// Derivational endings bool derivational(String& stemmingZone); /// Finds ending among given ending class and returns the length of ending found(0, if not found). int32_t findEnding(String& stemmingZone, int32_t startIndex, Collection theEndingClass); int32_t findEnding(String& stemmingZone, Collection theEndingClass); /// Finds the ending among the given class of endings and removes it from stemming zone. bool findAndRemoveEnding(String& stemmingZone, Collection theEndingClass); /// Finds the ending among the given class of endings, then checks if this ending was /// preceded by any of given predecessors, and if so, removes it from stemming zone. bool findAndRemoveEnding(String& stemmingZone, Collection theEndingClass, Collection thePredessors); /// Marks positions of RV, R1 and R2 in a given word. void markPositions(const String& word); /// Checks if character is a vowel. bool isVowel(wchar_t letter); /// Noun endings. bool noun(String& stemmingZone); /// Perfective gerund endings. bool perfectiveGerund(String& stemmingZone); /// Reflexive endings. bool reflexive(String& stemmingZone); bool removeI(String& stemmingZone); bool removeSoft(String& stemmingZone); /// Superlative endings. bool superlative(String& stemmingZone); /// Undoubles N. bool undoubleN(String& stemmingZone); /// Verb endings. bool verb(String& stemmingZone); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/SimpleFragmenter.h000066400000000000000000000024121456444476200246160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMPLEFRAGMENTER_H #define SIMPLEFRAGMENTER_H #include "Fragmenter.h" namespace Lucene { /// {@link Fragmenter} implementation which breaks text up into same-size fragments with /// no concerns over spotting sentence boundaries. class LPPCONTRIBAPI SimpleFragmenter : public Fragmenter, public LuceneObject { public: SimpleFragmenter(); SimpleFragmenter(int32_t fragmentSize); virtual ~SimpleFragmenter(); LUCENE_CLASS(SimpleFragmenter); protected: static const int32_t DEFAULT_FRAGMENT_SIZE; int32_t currentNumFrags; int32_t fragmentSize; OffsetAttributePtr offsetAtt; public: virtual void start(const String& originalText, const TokenStreamPtr& tokenStream); virtual bool isNewFragment(); /// @return size in number of characters of each fragment int32_t getFragmentSize(); /// @param size size in characters of each fragment void setFragmentSize(int32_t size); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/SimpleHTMLEncoder.h000066400000000000000000000015071456444476200245740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMPLEHTMLENCODER_H #define SIMPLEHTMLENCODER_H #include "Encoder.h" namespace Lucene { /// Simple {@link Encoder} implementation to escape text for HTML output. class LPPCONTRIBAPI SimpleHTMLEncoder : public Encoder, public LuceneObject { public: virtual ~SimpleHTMLEncoder(); LUCENE_CLASS(SimpleHTMLEncoder); public: virtual String encodeText(const String& originalText); /// Encode string into HTML static String htmlEncode(const String& plainText); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/SimpleHTMLFormatter.h000066400000000000000000000021411456444476200251530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMPLEHTMLFORMATTER_H #define SIMPLEHTMLFORMATTER_H #include "Formatter.h" namespace Lucene { /// Simple {@link Formatter} implementation to highlight terms with a pre and post tag. class LPPCONTRIBAPI SimpleHTMLFormatter : public Formatter, public LuceneObject { public: /// Default constructor uses HTML: <B> tags to markup terms. SimpleHTMLFormatter(); SimpleHTMLFormatter(const String& preTag, const String& postTag); virtual ~SimpleHTMLFormatter(); LUCENE_CLASS(SimpleHTMLFormatter); protected: static const String DEFAULT_PRE_TAG; static const String DEFAULT_POST_TAG; String preTag; String postTag; public: virtual String highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/SimpleSpanFragmenter.h000066400000000000000000000030561456444476200254450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SIMPLESPANFRAGMENTER_H #define SIMPLESPANFRAGMENTER_H #include "Fragmenter.h" namespace Lucene { /// {@link Fragmenter} implementation which breaks text up into same-size fragments but /// does not split up {@link Spans}. This is a simple sample class. class LPPCONTRIBAPI SimpleSpanFragmenter : public Fragmenter, public LuceneObject { public: /// @param queryScorer QueryScorer that was used to score hits SimpleSpanFragmenter(const QueryScorerPtr& queryScorer); /// @param queryScorer QueryScorer that was used to score hits /// @param fragmentSize size in bytes of each fragment SimpleSpanFragmenter(const QueryScorerPtr& queryScorer, int32_t fragmentSize); virtual ~SimpleSpanFragmenter(); LUCENE_CLASS(SimpleSpanFragmenter); protected: static const int32_t DEFAULT_FRAGMENT_SIZE; int32_t fragmentSize; int32_t currentNumFrags; int32_t position; QueryScorerPtr queryScorer; int32_t waitForPos; int32_t textSize; TermAttributePtr termAtt; PositionIncrementAttributePtr posIncAtt; OffsetAttributePtr offsetAtt; public: virtual bool isNewFragment(); virtual void start(const String& originalText, const TokenStreamPtr& tokenStream); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/SnowballAnalyzer.h000066400000000000000000000041101456444476200246360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SNOWBALLANALYZER_H #define SNOWBALLANALYZER_H #include "LuceneContrib.h" #include "Analyzer.h" namespace Lucene { /// Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} /// and {@link SnowballFilter}. /// /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. class LPPCONTRIBAPI SnowballAnalyzer : public Analyzer { public: /// Builds the named analyzer with no stop words. SnowballAnalyzer(LuceneVersion::Version matchVersion, const String& name); /// Builds an analyzer with the given stop words. SnowballAnalyzer(LuceneVersion::Version matchVersion, const String& name, HashSet stopwords); virtual ~SnowballAnalyzer(); LUCENE_CLASS(SnowballAnalyzer); protected: /// Contains the stopwords used with the StopFilter. HashSet stopSet; String name; LuceneVersion::Version matchVersion; public: /// Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, /// a {@link StopFilter} and a {@link SnowballFilter}. virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); /// Returns a (possibly reused) {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link /// LowerCaseFilter}, a {@link StopFilter} and a {@link SnowballFilter}. virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); }; class LPPCONTRIBAPI SnowballAnalyzerSavedStreams : public LuceneObject { public: virtual ~SnowballAnalyzerSavedStreams(); LUCENE_CLASS(SnowballAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/SnowballFilter.h000066400000000000000000000016201456444476200243010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SNOWBALLFILTER_H #define SNOWBALLFILTER_H #include "LuceneContrib.h" #include "TokenFilter.h" struct sb_stemmer; namespace Lucene { /// A filter that stems words using a Snowball-generated stemmer. class LPPCONTRIBAPI SnowballFilter : public TokenFilter { public: SnowballFilter(const TokenStreamPtr& input, const String& name); virtual ~SnowballFilter(); LUCENE_CLASS(SnowballFilter); protected: struct sb_stemmer* stemmer; UTF8ResultPtr utf8Result; TermAttributePtr termAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/SpanGradientFormatter.h000066400000000000000000000022061456444476200256160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef SPANGRADIENTFORMATTER_H #define SPANGRADIENTFORMATTER_H #include "GradientFormatter.h" namespace Lucene { /// Formats text with different color intensity depending on the score of the term using the /// span tag. GradientFormatter uses a bgcolor argument to the font tag which doesn't work /// in Mozilla, thus this class. /// @see GradientFormatter class LPPCONTRIBAPI SpanGradientFormatter : public GradientFormatter { public: SpanGradientFormatter(double maxScore, const String& minForegroundColor, const String& maxForegroundColor, const String& minBackgroundColor, const String& maxBackgroundColor); virtual ~SpanGradientFormatter(); LUCENE_CLASS(SpanGradientFormatter); public: virtual String highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/TextFragment.h000066400000000000000000000033141456444476200237640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TEXTFRAGMENT_H #define TEXTFRAGMENT_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Low-level class used to record information about a section of a document with a score. class LPPCONTRIBAPI TextFragment : public LuceneObject { public: TextFragment(const StringBufferPtr& markedUpText, int32_t textStartPos, int32_t fragNum); virtual ~TextFragment(); LUCENE_CLASS(TextFragment); public: StringBufferPtr markedUpText; int32_t fragNum; int32_t textStartPos; int32_t textEndPos; double score; public: void setScore(double score); double getScore(); /// @param frag2 Fragment to be merged into this one void merge(const TextFragmentPtr& frag2); /// @return true if this fragment follows the one passed bool follows(const TextFragmentPtr& fragment); /// @return the fragment sequence number int32_t getFragNum(); /// Returns the marked-up text for this text fragment virtual String toString(); }; /// Utility class to store a string buffer that contains text fragment class LPPCONTRIBAPI StringBuffer : public LuceneObject { public: virtual ~StringBuffer(); LUCENE_CLASS(StringBuffer); protected: StringStream buffer; public: virtual String toString(); virtual int32_t length(); virtual void append(const String& str); virtual void clear(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/TokenGroup.h000066400000000000000000000033231456444476200234510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOKENGROUP_H #define TOKENGROUP_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// One, or several overlapping tokens, along with the score(s) and the scope of the original text class LPPCONTRIBAPI TokenGroup : public LuceneObject { public: TokenGroup(const TokenStreamPtr& tokenStream); virtual ~TokenGroup(); LUCENE_CLASS(TokenGroup); protected: static const int32_t MAX_NUM_TOKENS_PER_GROUP; OffsetAttributePtr offsetAtt; TermAttributePtr termAtt; public: Collection tokens; Collection scores; int32_t numTokens; int32_t startOffset; int32_t endOffset; double tot; int32_t matchStartOffset; int32_t matchEndOffset; public: void addToken(double score); bool isDistinct(); void clear(); /// @param index a value between 0 and numTokens -1 /// @return the "n"th token TokenPtr getToken(int32_t index); /// @param index a value between 0 and numTokens -1 /// @return the "n"th score double getScore(int32_t index); /// @return the end position in the original text int32_t getEndOffset(); /// @return the number of tokens in this group int32_t getNumTokens(); /// @return the start position in the original text int32_t getStartOffset(); /// @return all tokens' scores summed up double getTotalScore(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/TokenSources.h000066400000000000000000000112021456444476200237730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef TOKENSOURCES_H #define TOKENSOURCES_H #include "LuceneContrib.h" #include "TokenStream.h" namespace Lucene { /// Hides implementation issues associated with obtaining a TokenStream for use with the highlighter - can obtain /// from TermFreqVectors with offsets and (optionally) positions or from Analyzer class re-parsing the stored content. class LPPCONTRIBAPI TokenSources : public LuceneObject { public: virtual ~TokenSources(); LUCENE_CLASS(TokenSources); public: /// A convenience method that tries to first get a TermPositionVector for the specified docId, then, falls back to /// using the passed in {@link Document} to retrieve the TokenStream. This is useful when you already have the /// document, but would prefer to use the vector first. /// @param reader The {@link IndexReader} to use to try and get the vector from. /// @param docId The docId to retrieve. /// @param field The field to retrieve on the document. /// @param doc The document to fall back on. /// @param analyzer The analyzer to use for creating the TokenStream if the vector doesn't exist. /// @return The {@link TokenStream} for the {@link Fieldable} on the {@link Document} static TokenStreamPtr getAnyTokenStream(const IndexReaderPtr& reader, int32_t docId, const String& field, const DocumentPtr& doc, const AnalyzerPtr& analyzer); /// A convenience method that tries a number of approaches to getting a token stream. The cost of finding there /// are no termVectors in the index is minimal (1000 invocations still registers 0 ms). So this "lazy" (flexible?) /// approach to coding is probably acceptable static TokenStreamPtr getAnyTokenStream(const IndexReaderPtr& reader, int32_t docId, const String& field, const AnalyzerPtr& analyzer); static TokenStreamPtr getTokenStream(const TermPositionVectorPtr& tpv); /// Low level api. /// Returns a token stream or null if no offset info available in index. This can be used to feed the highlighter /// with a pre-parsed token stream. /// /// In my tests the speeds to recreate 1000 token streams using this method are: /// - with TermVector offset only data stored - 420 milliseconds /// - with TermVector offset AND position data stored - 271 milliseconds /// (nb timings for TermVector with position data are based on a tokenizer with contiguous positions - no overlaps /// or gaps) The cost of not using TermPositionVector to store pre-parsed content and using an analyzer to re-parse /// the original content: /// - reanalyzing the original content - 980 milliseconds /// /// The re-analyze timings will typically vary depending on - /// 1) The complexity of the analyzer code (timings above were using a stemmer/lowercaser/stopword combo) /// 2) The number of other fields (Lucene reads ALL fields off the disk when accessing just one document field - /// can cost dear!) /// 3) Use of compression on field storage - could be faster due to compression (less disk IO) or slower (more CPU /// burn) depending on the content. /// /// @param tpv /// @param tokenPositionsGuaranteedContiguous true if the token position numbers have no overlaps or gaps. If looking /// to eek out the last drops of performance, set to true. If in doubt, set to false. static TokenStreamPtr getTokenStream(const TermPositionVectorPtr& tpv, bool tokenPositionsGuaranteedContiguous); static TokenStreamPtr getTokenStream(const IndexReaderPtr& reader, int32_t docId, const String& field); static TokenStreamPtr getTokenStream(const IndexReaderPtr& reader, int32_t docId, const String& field, const AnalyzerPtr& analyzer); static TokenStreamPtr getTokenStream(const DocumentPtr& doc, const String& field, const AnalyzerPtr& analyzer); static TokenStreamPtr getTokenStream(const String& field, const String& contents, const AnalyzerPtr& analyzer); }; /// an object used to iterate across an array of tokens class LPPCONTRIBAPI StoredTokenStream : public TokenStream { public: StoredTokenStream(Collection tokens); virtual ~StoredTokenStream(); LUCENE_CLASS(StoredTokenStream); public: Collection tokens; int32_t currentToken; TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; public: virtual bool incrementToken(); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/WeightedSpanTerm.h000066400000000000000000000030331456444476200245640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WEIGHTEDSPANTERM_H #define WEIGHTEDSPANTERM_H #include "WeightedTerm.h" namespace Lucene { /// Lightweight class to hold term, weight, and positions used for scoring this term. class LPPCONTRIBAPI WeightedSpanTerm : public WeightedTerm { public: WeightedSpanTerm(double weight, const String& term, bool positionSensitive = false); virtual ~WeightedSpanTerm(); LUCENE_CLASS(WeightedSpanTerm); public: bool positionSensitive; protected: Collection positionSpans; public: /// Checks to see if this term is valid at position. /// @param position To check against valid term positions. /// @return true if this term is a hit at this position. bool checkPosition(int32_t position); void addPositionSpans(Collection positionSpans); bool isPositionSensitive(); void setPositionSensitive(bool positionSensitive); Collection getPositionSpans(); }; /// Utility class to store a Span class LPPCONTRIBAPI PositionSpan : public LuceneObject { public: PositionSpan(int32_t start, int32_t end); virtual ~PositionSpan(); LUCENE_CLASS(PositionSpan); public: int32_t start; int32_t end; }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/WeightedSpanTermExtractor.h000066400000000000000000000120111456444476200264540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WEIGHTEDSPANTERMEXTRACTOR_H #define WEIGHTEDSPANTERMEXTRACTOR_H #include "LuceneContrib.h" #include "FilterIndexReader.h" #include "MapWeightedSpanTerm.h" namespace Lucene { /// Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether {@link Term}s /// from the {@link Query} are contained in a supplied {@link TokenStream}. class LPPCONTRIBAPI WeightedSpanTermExtractor : public LuceneObject { public: WeightedSpanTermExtractor(const String& defaultField = L""); virtual ~WeightedSpanTermExtractor(); LUCENE_CLASS(WeightedSpanTermExtractor); protected: String fieldName; TokenStreamPtr tokenStream; MapStringIndexReader readers; String defaultField; bool expandMultiTermQuery; bool cachedTokenStream; bool wrapToCaching; protected: void closeReaders(); /// Fills a Map with {@link WeightedSpanTerm}s using the terms from the supplied Query. /// /// @param query Query to extract Terms from /// @param terms Map to place created WeightedSpanTerms in void extract(const QueryPtr& query, const MapWeightedSpanTermPtr& terms); /// Fills a Map with {@link WeightedSpanTerm}s using the terms from the supplied SpanQuery. /// /// @param terms Map to place created WeightedSpanTerms in. /// @param spanQuery SpanQuery to extract Terms from void extractWeightedSpanTerms(const MapWeightedSpanTermPtr& terms, const SpanQueryPtr& spanQuery); /// Fills a Map with {@link WeightedSpanTerm}s using the terms from the supplied Query. /// @param terms Map to place created WeightedSpanTerms in /// @param query Query to extract Terms from void extractWeightedTerms(const MapWeightedSpanTermPtr& terms, const QueryPtr& query); /// Necessary to implement matches for queries against defaultField bool fieldNameComparator(const String& fieldNameToCheck); IndexReaderPtr getReaderForField(const String& field); void collectSpanQueryFields(const SpanQueryPtr& spanQuery, HashSet fieldNames); bool mustRewriteQuery(const SpanQueryPtr& spanQuery); public: /// Creates a Map of WeightedSpanTerms from the given Query and TokenStream. /// /// @param query That caused hit /// @param tokenStream Of text to be highlighted /// @return Map containing WeightedSpanTerms MapWeightedSpanTermPtr getWeightedSpanTerms(const QueryPtr& query, const TokenStreamPtr& tokenStream); /// Creates a Map of WeightedSpanTerms from the given Query and TokenStream. /// /// @param query That caused hit /// @param tokenStream Of text to be highlighted /// @param fieldName Restricts Term's used based on field name /// @return Map containing WeightedSpanTerms MapWeightedSpanTermPtr getWeightedSpanTerms(const QueryPtr& query, const TokenStreamPtr& tokenStream, const String& fieldName); /// Creates a Map of WeightedSpanTerms from the given Query and TokenStream. Uses a supplied /// IndexReader to properly weight terms (for gradient highlighting). /// /// @param query That caused hit /// @param tokenStream Of text to be highlighted /// @param fieldName Restricts Term's used based on field name /// @param reader To use for scoring /// @return Map containing WeightedSpanTerms MapWeightedSpanTermPtr getWeightedSpanTermsWithScores(const QueryPtr& query, const TokenStreamPtr& tokenStream, const String& fieldName, const IndexReaderPtr& reader); bool getExpandMultiTermQuery(); void setExpandMultiTermQuery(bool expandMultiTermQuery); bool isCachedTokenStream(); TokenStreamPtr getTokenStream(); /// By default, {@link TokenStream}s that are not of the type {@link CachingTokenFilter} /// are wrapped in a {@link CachingTokenFilter} to ensure an efficient reset - if you /// are already using a different caching {@link TokenStream} impl and you don't want /// it to be wrapped, set this to false. void setWrapIfNotCachingTokenFilter(bool wrap); }; /// This class makes sure that if both position sensitive and insensitive versions of the same /// term are added, the position insensitive one wins. class LPPCONTRIBAPI PositionCheckingMap : public MapWeightedSpanTerm { public: virtual ~PositionCheckingMap(); LUCENE_CLASS(PositionCheckingMap); public: virtual void put(const String& key, const WeightedSpanTermPtr& val); }; /// A fake IndexReader class to extract the field from a MultiTermQuery class LPPCONTRIBAPI FakeReader : public FilterIndexReader { public: FakeReader(); virtual ~FakeReader(); LUCENE_CLASS(FakeReader); public: String field; protected: static IndexReaderPtr EMPTY_MEMORY_INDEX_READER(); public: virtual TermEnumPtr terms(const TermPtr& t); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/include/WeightedTerm.h000066400000000000000000000021631456444476200237450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WEIGHTEDTERM_H #define WEIGHTEDTERM_H #include "LuceneContrib.h" #include "LuceneObject.h" namespace Lucene { /// Lightweight class to hold term and a weight value used for scoring this term class LPPCONTRIBAPI WeightedTerm : public LuceneObject { public: WeightedTerm(double weight, const String& term); virtual ~WeightedTerm(); LUCENE_CLASS(WeightedTerm); public: double weight; // multiplier String term; // stemmed form public: /// @return the term value (stemmed) String getTerm(); /// @return the weight associated with this term double getWeight(); /// @param term the term value (stemmed) void setTerm(const String& term); /// @param weight the weight associated with this term void setWeight(double weight); }; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/memory/000077500000000000000000000000001456444476200210675ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/memory/MemoryIndex.cpp000066400000000000000000000535271456444476200240470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "MemoryIndex.h" #include "TokenStream.h" #include "Analyzer.h" #include "StringReader.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" #include "OffsetAttribute.h" #include "IndexSearcher.h" #include "Term.h" #include "Scorer.h" #include "TermFreqVector.h" #include "TermVectorOffsetInfo.h" #include "TermVectorMapper.h" #include "Similarity.h" #include "FieldInvertState.h" #include "Document.h" #include "MiscUtils.h" namespace Lucene { const double MemoryIndex::docBoost = 1.0; MemoryIndex::MemoryIndex(bool storeOffsets) { stride = storeOffsets ? 3 : 1; fields = MapStringMemoryIndexInfo::newInstance(); } MemoryIndex::~MemoryIndex() { } void MemoryIndex::addField(const String& fieldName, const String& text, const AnalyzerPtr& analyzer) { if (fieldName.empty()) { boost::throw_exception(IllegalArgumentException(L"fieldName must not be empty")); } if (text.empty()) { boost::throw_exception(IllegalArgumentException(L"text must not be empty")); } if (!analyzer) { boost::throw_exception(IllegalArgumentException(L"analyzer must not be null")); } TokenStreamPtr stream(analyzer->tokenStream(fieldName, newLucene(text))); addField(fieldName, stream); } void MemoryIndex::addField(const String& fieldName, const TokenStreamPtr& stream, double boost) { LuceneException finally; try { if (fieldName.empty()) { boost::throw_exception(IllegalArgumentException(L"fieldName must not be empty")); } if (!stream) { boost::throw_exception(IllegalArgumentException(L"token stream must not be null")); } if (boost <= 0.0) { boost::throw_exception(IllegalArgumentException(L"boost factor must be greater than 0.0")); } if (fields.contains(fieldName)) { boost::throw_exception(IllegalArgumentException(L"field must not be added more than once")); } MapStringIntCollection terms(MapStringIntCollection::newInstance()); int32_t numTokens = 0; int32_t numOverlapTokens = 0; int32_t pos = -1; TermAttributePtr termAtt(stream->addAttribute()); PositionIncrementAttributePtr posIncrAttribute(stream->addAttribute()); OffsetAttributePtr offsetAtt(stream->addAttribute()); stream->reset(); while (stream->incrementToken()) { String term(termAtt->term()); if (term.empty()) { continue; // nothing to do } ++numTokens; int32_t posIncr = posIncrAttribute->getPositionIncrement(); if (posIncr == 0) { ++numOverlapTokens; } pos += posIncr; Collection positions(terms.get(term)); if (!positions) { // term not seen before positions = Collection::newInstance(); terms.put(term, positions); } positions.add(pos); if (stride != 1) { positions.add(offsetAtt->startOffset()); positions.add(offsetAtt->endOffset()); } } stream->end(); // ensure infos.numTokens > 0 invariant; needed for correct operation of terms() if (numTokens > 0) { boost = boost * docBoost; // see DocumentWriter.addDocument(...) fields.put(fieldName, newLucene(terms, numTokens, numOverlapTokens, boost)); sortedFields.reset(); // invalidate sorted view, if any } } catch (IOException& e) { // can never happen boost::throw_exception(RuntimeException(e.getError())); } catch (LuceneException& e) { finally = e; } try { if (stream) { stream->close(); } } catch (IOException& e) { boost::throw_exception(RuntimeException(e.getError())); } finally.throwException(); } IndexSearcherPtr MemoryIndex::createSearcher() { MemoryIndexReaderPtr reader(newLucene(shared_from_this())); IndexSearcherPtr searcher(newLucene(reader)); // ensures no auto-close reader->setSearcher(searcher); // to later get hold of searcher.getSimilarity() return searcher; } double MemoryIndex::search(const QueryPtr& query) { if (!query) { boost::throw_exception(IllegalArgumentException(L"query must not be null")); } SearcherPtr searcher(createSearcher()); LuceneException finally; try { Collection scores = Collection::newInstance(1); scores[0] = 0.0; // inits to 0.0 (no match) searcher->search(query, newLucene(scores)); return scores[0]; } catch (IOException& e) { // can never happen boost::throw_exception(RuntimeException(e.getError())); } catch (LuceneException& e) { finally = e; } finally.throwException(); return 0; // silence static analyzers } int32_t MemoryIndex::numPositions(Collection positions) { return (positions.size() / stride); } struct lessField { inline bool operator()(const PairStringMemoryIndexInfo& first, const PairStringMemoryIndexInfo& second) const { return (first.first < second.first); } }; void MemoryIndex::sortFields() { if (!sortedFields) { sortedFields = CollectionStringMemoryIndexInfo::newInstance(fields.begin(), fields.end()); std::sort(sortedFields.begin(), sortedFields.end(), lessField()); } } MemoryIndexInfo::MemoryIndexInfo(MapStringIntCollection terms, int32_t numTokens, int32_t numOverlapTokens, double boost) { this->terms = terms; this->numTokens = numTokens; this->numOverlapTokens = numOverlapTokens; this->boost = boost; } MemoryIndexInfo::~MemoryIndexInfo() { } struct lessTerm { inline bool operator()(const PairStringIntCollection& first, const PairStringIntCollection& second) const { return (first.first < second.first); } }; void MemoryIndexInfo::sortTerms() { if (!sortedTerms) { sortedTerms = CollectionStringIntCollection::newInstance(terms.begin(), terms.end()); std::sort(sortedTerms.begin(), sortedTerms.end(), lessTerm()); } } Collection MemoryIndexInfo::getPositions(const String& term) { return terms.get(term); } Collection MemoryIndexInfo::getPositions(int32_t pos) { return sortedTerms[pos].second; } double MemoryIndexInfo::getBoost() { return boost; } MemoryIndexReader::MemoryIndexReader(const MemoryIndexPtr& memoryIndex) { this->memoryIndex = memoryIndex; } MemoryIndexReader::~MemoryIndexReader() { } TermPtr MemoryIndexReader::MATCH_ALL_TERM() { static TermPtr _MATCH_ALL_TERM; LUCENE_RUN_ONCE( _MATCH_ALL_TERM = newLucene(L""); CycleCheck::addStatic(_MATCH_ALL_TERM); ); return _MATCH_ALL_TERM; } MemoryIndexInfoPtr MemoryIndexReader::getInfo(const String& fieldName) { return memoryIndex->fields.get(fieldName); } MemoryIndexInfoPtr MemoryIndexReader::getInfo(int32_t pos) { return memoryIndex->sortedFields[pos].second; } int32_t MemoryIndexReader::docFreq(const TermPtr& t) { MemoryIndexInfoPtr info(getInfo(t->field())); int32_t freq = 0; if (info) { freq = info->getPositions(t->text()) ? 1 : 0; } return freq; } TermEnumPtr MemoryIndexReader::terms() { return terms(MATCH_ALL_TERM()); } TermEnumPtr MemoryIndexReader::terms(const TermPtr& t) { int32_t i = 0; // index into info.sortedTerms int32_t j = 0; // index into sortedFields memoryIndex->sortFields(); if (memoryIndex->sortedFields.size() == 1 && memoryIndex->sortedFields[0].first == t->field()) { j = 0; // fast path } else { CollectionStringMemoryIndexInfo::iterator search = std::lower_bound(memoryIndex->sortedFields.begin(), memoryIndex->sortedFields.end(), std::make_pair(t->field(), MemoryIndexInfoPtr()), lessField()); int32_t keyPos = std::distance(memoryIndex->sortedFields.begin(), search); j = (search == memoryIndex->sortedFields.end() || t->field() < search->first) ? -(keyPos + 1) : keyPos; } if (j < 0) { // not found; choose successor j = -j - 1; i = 0; if (j < memoryIndex->sortedFields.size()) { getInfo(j)->sortTerms(); } } else { // found MemoryIndexInfoPtr info(getInfo(j)); info->sortTerms(); CollectionStringIntCollection::iterator search = std::lower_bound(info->sortedTerms.begin(), info->sortedTerms.end(), std::make_pair(t->text(), Collection()), lessTerm()); int32_t keyPos = std::distance(info->sortedTerms.begin(), search); i = (search == info->sortedTerms.end() || t->text() < search->first) ? -(keyPos + 1) : keyPos; if (i < 0) { // not found; choose successor i = -i - 1; if (i >= info->sortedTerms.size()) { // move to next successor ++j; i = 0; if (j < memoryIndex->sortedFields.size()) { getInfo(j)->sortTerms(); } } } } return newLucene(shared_from_this(), i, j); } TermPositionsPtr MemoryIndexReader::termPositions() { return newLucene(shared_from_this()); } TermDocsPtr MemoryIndexReader::termDocs() { return termPositions(); } Collection MemoryIndexReader::getTermFreqVectors(int32_t docNumber) { Collection vectors(Collection::newInstance()); for (MapStringMemoryIndexInfo::iterator fieldName = memoryIndex->fields.begin(); fieldName != memoryIndex->fields.end(); ++fieldName) { vectors.add(getTermFreqVector(docNumber, fieldName->first)); } return vectors; } void MemoryIndexReader::getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper) { for (MapStringMemoryIndexInfo::iterator fieldName = memoryIndex->fields.begin(); fieldName != memoryIndex->fields.end(); ++fieldName) { getTermFreqVector(docNumber, fieldName->first, mapper); } } void MemoryIndexReader::getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper) { MemoryIndexInfoPtr info(getInfo(field)); if (!info) { return; } info->sortTerms(); mapper->setExpectations(field, info->sortedTerms.size(), memoryIndex->stride != 1, true); for (int32_t i = info->sortedTerms.size(); --i >=0;) { Collection positions(info->sortedTerms[i].second); int32_t size = positions.size(); Collection offsets(Collection::newInstance(size / memoryIndex->stride)); for (int32_t k = 0, j = 1; j < size; ++k, j += memoryIndex->stride) { int32_t start = positions[j]; int32_t end = positions[j + 1]; offsets[k] = newLucene(start, end); } mapper->map(info->sortedTerms[i].first, memoryIndex->numPositions(info->sortedTerms[i].second), offsets, info->sortedTerms[i].second); } } TermFreqVectorPtr MemoryIndexReader::getTermFreqVector(int32_t docNumber, const String& field) { MemoryIndexInfoPtr info(getInfo(field)); if (!info) { return TermFreqVectorPtr(); } info->sortTerms(); return newLucene(shared_from_this(), info, field); } SimilarityPtr MemoryIndexReader::getSimilarity() { SearcherPtr searcher(_searcher.lock()); if (searcher) { return searcher->getSimilarity(); } return Similarity::getDefault(); } void MemoryIndexReader::setSearcher(const SearcherPtr& searcher) { _searcher = searcher; } ByteArray MemoryIndexReader::norms(const String& field) { ByteArray norms(cachedNorms); SimilarityPtr sim(getSimilarity()); if (field != cachedFieldName || sim != cachedSimilarity) { // not cached? MemoryIndexInfoPtr info(getInfo(field)); int32_t numTokens = info ? info->numTokens : 0; int32_t numOverlapTokens = info ? info->numOverlapTokens : 0; double boost = info ? info->getBoost() : 1.0; FieldInvertStatePtr invertState(newLucene(0, numTokens, numOverlapTokens, 0, boost)); double n = sim->computeNorm(field, invertState); uint8_t norm = Similarity::encodeNorm(n); norms = ByteArray::newInstance(1); norms[0] = norm; // cache it for future reuse cachedNorms = norms; cachedFieldName = field; cachedSimilarity = sim; } return norms; } void MemoryIndexReader::norms(const String& field, ByteArray norms, int32_t offset) { ByteArray _norms(this->norms(field)); MiscUtils::arrayCopy(_norms.get(), 0, norms.get(), offset, _norms.size()); } void MemoryIndexReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { boost::throw_exception(UnsupportedOperationException()); } int32_t MemoryIndexReader::numDocs() { return memoryIndex->fields.empty() ? 0 : 1; } int32_t MemoryIndexReader::maxDoc() { return 1; } DocumentPtr MemoryIndexReader::document(int32_t n) { return newLucene(); // there are no stored fields } DocumentPtr MemoryIndexReader::document(int32_t n, const FieldSelectorPtr& fieldSelector) { return newLucene(); // there are no stored fields } bool MemoryIndexReader::isDeleted(int32_t n) { return false; } bool MemoryIndexReader::hasDeletions() { return false; } void MemoryIndexReader::doDelete(int32_t docNum) { boost::throw_exception(UnsupportedOperationException()); } void MemoryIndexReader::doUndeleteAll() { boost::throw_exception(UnsupportedOperationException()); } void MemoryIndexReader::doCommit(MapStringString commitUserData) { } void MemoryIndexReader::doClose() { } HashSet MemoryIndexReader::getFieldNames(FieldOption fieldOption) { static HashSet emptySet; LUCENE_RUN_ONCE( emptySet = HashSet::newInstance(); ); if (fieldOption == FIELD_OPTION_UNINDEXED) { return emptySet; } if (fieldOption == FIELD_OPTION_INDEXED_NO_TERMVECTOR) { return emptySet; } if (fieldOption == FIELD_OPTION_TERMVECTOR_WITH_OFFSET && memoryIndex->stride == 1) { return emptySet; } if (fieldOption == FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET && memoryIndex->stride == 1) { return emptySet; } HashSet fieldSet(HashSet::newInstance()); for (MapStringMemoryIndexInfo::iterator field = memoryIndex->fields.begin(); field != memoryIndex->fields.end(); ++field) { fieldSet.add(field->first); } return fieldSet; } MemoryIndexTermEnum::MemoryIndexTermEnum(const MemoryIndexReaderPtr& reader, int32_t ix, int32_t jx) { _reader = reader; i = ix; j = jx; } MemoryIndexTermEnum::~MemoryIndexTermEnum() { } bool MemoryIndexTermEnum::next() { MemoryIndexReaderPtr reader(_reader); if (j >= reader->memoryIndex->sortedFields.size()) { return false; } MemoryIndexInfoPtr info(reader->getInfo(j)); if (++i < info->sortedTerms.size()) { return true; } // move to successor ++j; i = 0; if (j >= reader->memoryIndex->sortedFields.size()) { return false; } reader->getInfo(j)->sortTerms(); return true; } TermPtr MemoryIndexTermEnum::term() { MemoryIndexReaderPtr reader(_reader); if (j >= reader->memoryIndex->sortedFields.size()) { return TermPtr(); } MemoryIndexInfoPtr info(reader->getInfo(j)); if (i >= info->sortedTerms.size()) { return TermPtr(); } return createTerm(info, j, info->sortedTerms[i].first); } int32_t MemoryIndexTermEnum::docFreq() { MemoryIndexReaderPtr reader(_reader); if (j >= reader->memoryIndex->sortedFields.size()) { return 0; } MemoryIndexInfoPtr info(reader->getInfo(j)); if (i >= info->sortedTerms.size()) { return 0; } return reader->memoryIndex->numPositions(info->getPositions(i)); } void MemoryIndexTermEnum::close() { } TermPtr MemoryIndexTermEnum::createTerm(const MemoryIndexInfoPtr& info, int32_t pos, const String& text) { TermPtr _template(info->_template); if (!_template) { // not yet cached? MemoryIndexReaderPtr reader(_reader); String fieldName(reader->memoryIndex->sortedFields[pos].first); _template = newLucene(fieldName); info->_template = _template; } return _template->createTerm(text); } MemoryIndexCollector::MemoryIndexCollector(Collection scores) { this->scores = scores; } MemoryIndexCollector::~MemoryIndexCollector() { } void MemoryIndexCollector::collect(int32_t doc) { scores[0] = scorer->score(); } void MemoryIndexCollector::setScorer(const ScorerPtr& scorer) { this->scorer = scorer; } bool MemoryIndexCollector::acceptsDocsOutOfOrder() { return true; } void MemoryIndexCollector::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { } MemoryIndexTermPositions::MemoryIndexTermPositions(const MemoryIndexReaderPtr& reader) { _reader = reader; hasNext = false; cursor = 0; } MemoryIndexTermPositions::~MemoryIndexTermPositions() { } void MemoryIndexTermPositions::seek(const TermPtr& term) { this->term = term; if (!term) { hasNext = true; // term == null means match all docs } else { MemoryIndexReaderPtr reader(_reader); MemoryIndexInfoPtr info(reader->getInfo(term->field())); current = info ? info->getPositions(term->text()) : Collection(); hasNext = current; cursor = 0; } } void MemoryIndexTermPositions::seek(const TermEnumPtr& termEnum) { seek(termEnum->term()); } int32_t MemoryIndexTermPositions::doc() { return 0; } int32_t MemoryIndexTermPositions::freq() { MemoryIndexReaderPtr reader(_reader); int32_t freq = current ? reader->memoryIndex->numPositions(current) : (term ? 0 : 1); return freq; } bool MemoryIndexTermPositions::next() { bool _next = hasNext; hasNext = false; return _next; } int32_t MemoryIndexTermPositions::read(Collection& docs, Collection& freqs) { if (!hasNext) { return 0; } hasNext = false; docs[0] = 0; freqs[0] = freq(); return 1; } bool MemoryIndexTermPositions::skipTo(int32_t target) { return next(); } void MemoryIndexTermPositions::close() { } int32_t MemoryIndexTermPositions::nextPosition() { // implements TermPositions MemoryIndexReaderPtr reader(_reader); int32_t pos = current[cursor]; cursor += reader->memoryIndex->stride; return pos; } int32_t MemoryIndexTermPositions::getPayloadLength() { boost::throw_exception(UnsupportedOperationException()); } ByteArray MemoryIndexTermPositions::getPayload(ByteArray data, int32_t offset) { boost::throw_exception(UnsupportedOperationException()); return ByteArray(); } bool MemoryIndexTermPositions::isPayloadAvailable() { return false; // unsupported } MemoryIndexTermPositionVector::MemoryIndexTermPositionVector(const MemoryIndexReaderPtr& reader, const MemoryIndexInfoPtr& info, const String& fieldName) { this->_reader = reader; this->sortedTerms = info->sortedTerms; this->fieldName = fieldName; } MemoryIndexTermPositionVector::~MemoryIndexTermPositionVector() { } String MemoryIndexTermPositionVector::getField() { return fieldName; } int32_t MemoryIndexTermPositionVector::size() { return sortedTerms.size(); } Collection MemoryIndexTermPositionVector::getTerms() { Collection terms(Collection::newInstance(sortedTerms.size())); for (int32_t i = sortedTerms.size(); --i >= 0;) { terms[i] = sortedTerms[i].first; } return terms; } Collection MemoryIndexTermPositionVector::getTermFrequencies() { MemoryIndexReaderPtr reader(_reader); Collection freqs(Collection::newInstance(sortedTerms.size())); for (int32_t i = sortedTerms.size(); --i >= 0;) { freqs[i] = reader->memoryIndex->numPositions(sortedTerms[i].second); } return freqs; } int32_t MemoryIndexTermPositionVector::indexOf(const String& term) { CollectionStringIntCollection::iterator search = std::lower_bound(sortedTerms.begin(), sortedTerms.end(), std::make_pair(term, Collection()), lessTerm()); return (search == sortedTerms.end() || term < search->first) ? -1 : std::distance(sortedTerms.begin(), search); } Collection MemoryIndexTermPositionVector::indexesOf(Collection terms, int32_t start, int32_t length) { Collection indexes(Collection::newInstance(length)); for (int32_t i = 0; i < length; ++i) { indexes[i] = indexOf(terms[start++]); } return indexes; } Collection MemoryIndexTermPositionVector::getTermPositions(int32_t index) { return sortedTerms[index].second; } Collection MemoryIndexTermPositionVector::getOffsets(int32_t index) { MemoryIndexReaderPtr reader(_reader); if (reader->memoryIndex->stride == 1) { return Collection(); // no offsets stored } Collection positions(sortedTerms[index].second); int32_t size = positions.size(); Collection offsets(Collection::newInstance(size / reader->memoryIndex->stride)); for (int32_t i = 0, j = 1; j < size; ++i, j += reader->memoryIndex->stride) { int32_t start = positions[j]; int32_t end = positions[j + 1]; offsets[i] = newLucene(start, end); } return offsets; } } LucenePlusPlus-rel_3.0.9/src/contrib/msvc/000077500000000000000000000000001456444476200205275ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/msvc/ContribInc.cpp000066400000000000000000000005531456444476200232700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" LucenePlusPlus-rel_3.0.9/src/contrib/msvc/dllmain.cpp000066400000000000000000000012631456444476200226550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #if defined(_WIN32) && defined(LPP_HAVE_DLL) BOOL APIENTRY DllMain(HMODULE module, DWORD ul_reason_for_call, LPVOID lpReserved) { switch (ul_reason_for_call) { case DLL_PROCESS_ATTACH: case DLL_THREAD_ATTACH: case DLL_THREAD_DETACH: case DLL_PROCESS_DETACH: break; } return TRUE; } #endif LucenePlusPlus-rel_3.0.9/src/contrib/msvc/lucene_contrib.vcproj000066400000000000000000001242651456444476200247610ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.9/src/contrib/msvc/lucene_contrib.vcxproj000066400000000000000000001312041456444476200251400ustar00rootroot00000000000000 Debug DLL Win32 Debug Static Win32 Release DLL Win32 Release Static Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B} lucene_contrib Win32Proj StaticLibrary Unicode true StaticLibrary Unicode DynamicLibrary Unicode true DynamicLibrary Unicode <_ProjectFileVersion>10.0.40219.1 $(ProjectDir)$(Configuration)\ $(Configuration)\ true $(ProjectDir)$(Configuration)\ $(Configuration)\ false $(ProjectDir)$(Configuration)\ $(Configuration)\ $(ProjectDir)$(Configuration)\ $(Configuration)\ /Zm120 %(AdditionalOptions) Disabled ..\include;..\..\..\include;..\snowball\libstemmer_c\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;_DEBUG;_WINDOWS;_USRDLL;LPP_HAVE_DLL;LPP_BUILDING_LIB;%(PreprocessorDefinitions) true Async EnableFastChecks MultiThreadedDebugDLL Use ContribInc.h Level3 EditAndContinue 4996;%(DisableSpecificWarnings) false lucene++.lib;%(AdditionalDependencies) $(BOOST_ROOT)\lib32-msvc-10.0;..\..\..\lib;%(AdditionalLibraryDirectories) true Windows MachineX86 copy "$(OutDir)$(ProjectName).lib" "..\..\..\lib\." copy "$(OutDir)$(ProjectName).dll" "..\..\..\bin\." /Zm120 %(AdditionalOptions) MaxSpeed AnySuitable true Speed true ..\include;..\..\..\include;..\snowball\libstemmer_c\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;NDEBUG;_WINDOWS;_USRDLL;LPP_HAVE_DLL;LPP_BUILDING_LIB;%(PreprocessorDefinitions) Async MultiThreadedDLL true Use ContribInc.h Level3 ProgramDatabase 4996;%(DisableSpecificWarnings) false lucene++.lib;%(AdditionalDependencies) $(BOOST_ROOT)\stage\lib;..\..\..\lib;%(AdditionalLibraryDirectories) true Windows true true MachineX86 copy "$(OutDir)$(ProjectName).lib" "..\..\..\lib\." copy "$(OutDir)$(ProjectName).dll" "..\..\..\bin\." /Zm120 %(AdditionalOptions) Disabled ..\include;..\..\..\include;..\snowball\libstemmer_c\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;_DEBUG;_LIB;LPP_BUILDING_LIB;%(PreprocessorDefinitions) true EnableFastChecks MultiThreadedDebugDLL Use ContribInc.h Level3 EditAndContinue 4996;%(DisableSpecificWarnings) false /IGNORE:4221 %(AdditionalOptions) $(BOOST_ROOT)\stage\lib;%(AdditionalLibraryDirectories) copy "$(OutDir)$(ProjectName).lib" "..\..\..\lib\." /Zm120 %(AdditionalOptions) MaxSpeed AnySuitable true Speed true ..\include;..\..\..\include;..\snowball\libstemmer_c\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;NDEBUG;_LIB;LPP_BUILDING_LIB;%(PreprocessorDefinitions) MultiThreadedDLL true Use ContribInc.h Level3 ProgramDatabase 4996;%(DisableSpecificWarnings) false /IGNORE:4221 %(AdditionalOptions) $(BOOST_ROOT)\stage\lib;%(AdditionalLibraryDirectories) copy "$(OutDir)$(ProjectName).lib" "..\..\..\lib\." Create Create Create Create false false false false Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) Config.h;%(ForcedIncludeFiles) {46a95afd-95fd-4280-b22e-1b56f273144a} false LucenePlusPlus-rel_3.0.9/src/contrib/msvc/lucene_contrib.vcxproj.filters000066400000000000000000000633721456444476200266210ustar00rootroot00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hpp;hxx;hm;inl;inc;xsd {ed281916-1385-43dd-ba49-d40e8504292f} {cbc62969-9e1a-4ea1-8a5b-70dc05c54a74} {af08d7d6-346f-4315-8a08-e5670d8ea661} {5f664ed0-5376-4c1d-be74-9f437a8f28a0} {25265c9f-fb03-4de0-af5b-1dc8a22b6a39} {cbd88c58-498d-4cf6-a8b8-cab8ae9c1a33} {f22aa0ca-0c74-4b24-8c13-ac4bf2eaeb62} {9d6a5d6b-5270-4d71-bf47-e1fd628f0de5} {de0f0dac-e3c9-4c68-9645-bd7facc23cad} {b3fa72fe-e465-4c37-879c-ca78a4cdfa04} {ff7c09cf-c9c4-48f1-b627-c4577cf04005} {9a006a1c-6c67-4ad5-9a3a-1e73278296de} {15af4be9-0258-4006-8d4a-e14efa2d23d2} {897982c5-a448-4a64-a394-5b6621b73523} {892af3ea-b061-4b0e-83ba-d221682d4d98} {1f5a687a-5a25-4d1f-aff1-224dd446d1b4} {65078152-1671-49dc-8eff-10daa009b07b} {99f2b070-1b33-44cc-b645-c887144eb557} {a5432a73-3546-49ee-9b10-974d0dfbd3a7} {0df31b77-1d5d-42dc-839d-c77784ea81c4} {72f1545b-b051-45f8-b0b3-bf0facf48e9b} {2d2a742a-d676-42ee-be16-54d6ed3e42b0} source files source files analyzers\common\analysis\reverse analyzers\common\analysis\ar analyzers\common\analysis\ar analyzers\common\analysis\ar analyzers\common\analysis\ar analyzers\common\analysis\ar analyzers\common\analysis\ar analyzers\common\analysis\br analyzers\common\analysis\br analyzers\common\analysis\br analyzers\common\analysis\cjk analyzers\common\analysis\cjk analyzers\common\analysis\cn analyzers\common\analysis\cn analyzers\common\analysis\cn analyzers\common\analysis\cz analyzers\common\analysis\de analyzers\common\analysis\de analyzers\common\analysis\de analyzers\common\analysis\el analyzers\common\analysis\el analyzers\common\analysis\fa analyzers\common\analysis\fa analyzers\common\analysis\fa analyzers\common\analysis\fr analyzers\common\analysis\fr analyzers\common\analysis\fr analyzers\common\analysis\fr analyzers\common\analysis\nl analyzers\common\analysis\nl analyzers\common\analysis\nl analyzers\common\analysis\ru analyzers\common\analysis\ru analyzers\common\analysis\ru analyzers\common\analysis\ru analyzers\common\analysis\ru snowball snowball snowball\libstemmer\runtime snowball\libstemmer\runtime snowball\libstemmer\libstemmer snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter memory header files header files header files header files header files header files header files analyzers\common\analysis\reverse analyzers\common\analysis\ar analyzers\common\analysis\ar analyzers\common\analysis\ar analyzers\common\analysis\ar analyzers\common\analysis\ar analyzers\common\analysis\ar analyzers\common\analysis\br analyzers\common\analysis\br analyzers\common\analysis\br analyzers\common\analysis\cjk analyzers\common\analysis\cjk analyzers\common\analysis\cn analyzers\common\analysis\cn analyzers\common\analysis\cn analyzers\common\analysis\cz analyzers\common\analysis\de analyzers\common\analysis\de analyzers\common\analysis\de analyzers\common\analysis\el analyzers\common\analysis\el analyzers\common\analysis\fa analyzers\common\analysis\fa analyzers\common\analysis\fa analyzers\common\analysis\fr analyzers\common\analysis\fr analyzers\common\analysis\fr analyzers\common\analysis\fr analyzers\common\analysis\nl analyzers\common\analysis\nl analyzers\common\analysis\nl analyzers\common\analysis\ru analyzers\common\analysis\ru analyzers\common\analysis\ru analyzers\common\analysis\ru analyzers\common\analysis\ru snowball snowball snowball\libstemmer\runtime snowball\libstemmer\runtime snowball\libstemmer\libstemmer snowball\libstemmer\libstemmer snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src snowball\libstemmer\src highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter highlighter memory LucenePlusPlus-rel_3.0.9/src/contrib/snowball/000077500000000000000000000000001456444476200214005ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/snowball/SnowballAnalyzer.cpp000066400000000000000000000046421456444476200254010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "SnowballAnalyzer.h" #include "StandardTokenizer.h" #include "StandardFilter.h" #include "LowerCaseFilter.h" #include "StopFilter.h" #include "SnowballFilter.h" namespace Lucene { SnowballAnalyzer::SnowballAnalyzer(LuceneVersion::Version matchVersion, const String& name) { this->matchVersion = matchVersion; this->name = name; } SnowballAnalyzer::SnowballAnalyzer(LuceneVersion::Version matchVersion, const String& name, HashSet stopwords) { this->stopSet = stopwords; this->matchVersion = matchVersion; this->name = name; } SnowballAnalyzer::~SnowballAnalyzer() { } TokenStreamPtr SnowballAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { TokenStreamPtr result = newLucene(matchVersion, reader); result = newLucene(result); result = newLucene(result); if (stopSet) { result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); } result = newLucene(result, name); return result; } TokenStreamPtr SnowballAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { SnowballAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(matchVersion, reader); streams->result = newLucene(streams->source); streams->result = newLucene(streams->result); if (stopSet) { streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); } streams->result = newLucene(streams->result, name); setPreviousTokenStream(streams); } else { streams->source->reset(reader); } return streams->result; } SnowballAnalyzerSavedStreams::~SnowballAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/SnowballFilter.cpp000066400000000000000000000031441456444476200250350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "ContribInc.h" #include "SnowballFilter.h" #include "TermAttribute.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" #include "libstemmer_c/include/libstemmer.h" namespace Lucene { SnowballFilter::SnowballFilter(const TokenStreamPtr& input, const String& name) : TokenFilter(input) { stemmer = sb_stemmer_new(StringUtils::toUTF8(name).c_str(), "UTF_8"); if (stemmer == NULL) { boost::throw_exception(IllegalArgumentException(L"language not available for stemming:" + name)); } termAtt = addAttribute(); utf8Result = newLucene(); } SnowballFilter::~SnowballFilter() { } bool SnowballFilter::incrementToken() { if (input->incrementToken()) { StringUtils::toUTF8(termAtt->termBuffer().get(), termAtt->termLength(), utf8Result); const sb_symbol* stemmed = sb_stemmer_stem(stemmer, utf8Result->result.get(), utf8Result->length); if (stemmed == NULL) { boost::throw_exception(RuntimeException(L"exception stemming word:" + termAtt->term())); } int32_t newlen = StringUtils::toUnicode(stemmed, sb_stemmer_length(stemmer), termAtt->termBuffer()); termAtt->setTermLength(newlen); return true; } else { return false; } } } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/000077500000000000000000000000001456444476200240455ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/MANIFEST000066400000000000000000000037411456444476200252030ustar00rootroot00000000000000README src_c/stem_ISO_8859_1_danish.c src_c/stem_ISO_8859_1_danish.h src_c/stem_ISO_8859_1_dutch.c src_c/stem_ISO_8859_1_dutch.h src_c/stem_ISO_8859_1_english.c src_c/stem_ISO_8859_1_english.h src_c/stem_ISO_8859_1_finnish.c src_c/stem_ISO_8859_1_finnish.h src_c/stem_ISO_8859_1_french.c src_c/stem_ISO_8859_1_french.h src_c/stem_ISO_8859_1_german.c src_c/stem_ISO_8859_1_german.h src_c/stem_ISO_8859_1_hungarian.c src_c/stem_ISO_8859_1_hungarian.h src_c/stem_ISO_8859_1_italian.c src_c/stem_ISO_8859_1_italian.h src_c/stem_ISO_8859_1_norwegian.c src_c/stem_ISO_8859_1_norwegian.h src_c/stem_ISO_8859_1_porter.c src_c/stem_ISO_8859_1_porter.h src_c/stem_ISO_8859_1_portuguese.c src_c/stem_ISO_8859_1_portuguese.h src_c/stem_ISO_8859_1_spanish.c src_c/stem_ISO_8859_1_spanish.h src_c/stem_ISO_8859_1_swedish.c src_c/stem_ISO_8859_1_swedish.h src_c/stem_ISO_8859_2_romanian.c src_c/stem_ISO_8859_2_romanian.h src_c/stem_KOI8_R_russian.c src_c/stem_KOI8_R_russian.h src_c/stem_UTF_8_danish.c src_c/stem_UTF_8_danish.h src_c/stem_UTF_8_dutch.c src_c/stem_UTF_8_dutch.h src_c/stem_UTF_8_english.c src_c/stem_UTF_8_english.h src_c/stem_UTF_8_finnish.c src_c/stem_UTF_8_finnish.h src_c/stem_UTF_8_french.c src_c/stem_UTF_8_french.h src_c/stem_UTF_8_german.c src_c/stem_UTF_8_german.h src_c/stem_UTF_8_hungarian.c src_c/stem_UTF_8_hungarian.h src_c/stem_UTF_8_italian.c src_c/stem_UTF_8_italian.h src_c/stem_UTF_8_norwegian.c src_c/stem_UTF_8_norwegian.h src_c/stem_UTF_8_porter.c src_c/stem_UTF_8_porter.h src_c/stem_UTF_8_portuguese.c src_c/stem_UTF_8_portuguese.h src_c/stem_UTF_8_romanian.c src_c/stem_UTF_8_romanian.h src_c/stem_UTF_8_russian.c src_c/stem_UTF_8_russian.h src_c/stem_UTF_8_spanish.c src_c/stem_UTF_8_spanish.h src_c/stem_UTF_8_swedish.c src_c/stem_UTF_8_swedish.h src_c/stem_UTF_8_turkish.c src_c/stem_UTF_8_turkish.h runtime/api.c runtime/api.h runtime/header.h runtime/utilities.c libstemmer/libstemmer.c libstemmer/libstemmer_utf8.c libstemmer/modules.h libstemmer/modules_utf8.h include/libstemmer.h LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/Makefile000066400000000000000000000003671456444476200255130ustar00rootroot00000000000000include mkinc.mak CFLAGS=-Iinclude all: libstemmer.o stemwords libstemmer.o: $(snowball_sources:.c=.o) $(AR) -cru $@ $^ stemwords: examples/stemwords.o libstemmer.o $(CC) -o $@ $^ clean: rm -f stemwords *.o src_c/*.o runtime/*.o libstemmer/*.o LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/README000066400000000000000000000117751456444476200247400ustar00rootroot00000000000000libstemmer_c ============ This document pertains to the C version of the libstemmer distribution, available for download from: http://snowball.tartarus.org/dist/libstemmer_c.tgz Compiling the library ===================== A simple makefile is provided for Unix style systems. On such systems, it should be possible simply to run "make", and the file "libstemmer.o" and the example program "stemwords" will be generated. If this doesn't work on your system, you need to write your own build system (or call the compiler directly). The files to compile are all contained in the "libstemmer", "runtime" and "src_c" directories, and the public header file is contained in the "include" directory. The library comes in two flavours; UTF-8 only, and UTF-8 plus other character sets. To use the utf-8 only flavour, compile "libstemmer_utf8.c" instead of "libstemmer.c". For convenience "mkinc.mak" is a makefile fragment listing the source files and header files used to compile the standard version of the library. "mkinc_utf8.mak" is a comparable makefile fragment listing just the source files for the UTF-8 only version of the library. Using the library ================= The library provides a simple C API. Essentially, a new stemmer can be obtained by using "sb_stemmer_new". "sb_stemmer_stem" is then used to stem a word, "sb_stemmer_length" returns the stemmed length of the last word processed, and "sb_stemmer_delete" is used to delete a stemmer. Creating a stemmer is a relatively expensive operation - the expected usage pattern is that a new stemmer is created when needed, used to stem many words, and deleted after some time. Stemmers are re-entrant, but not threadsafe. In other words, if you wish to access the same stemmer object from multiple threads, you must ensure that all access is protected by a mutex or similar device. libstemmer does not currently incorporate any mechanism for caching the results of stemming operations. Such caching can greatly increase the performance of a stemmer under certain situations, so suitable patches will be considered for inclusion. The standard libstemmer sources contain an algorithm for each of the supported languages. The algorithm may be selected using the english name of the language, or using the 2 or 3 letter ISO 639 language codes. In addition, the traditional "Porter" stemming algorithm for english is included for backwards compatibility purposes, but we recommend use of the "English" stemmer in preference for new projects. (Some minor algorithms which are included only as curiosities in the snowball website, such as the Lovins stemmer and the Kraaij Pohlmann stemmer, are not included in the standard libstemmer sources. These are not really supported by the snowball project, but it would be possible to compile a modified libstemmer library containing these if desired.) The stemwords example ===================== The stemwords example program allows you to run any of the stemmers compiled into the libstemmer library on a sample vocabulary. For details on how to use it, run it with the "-h" command line option. Using the library in a larger system ==================================== If you are incorporating the library into the build system of a larger program, I recommend copying the unpacked tarball without modification into a subdirectory of the sources of your program. Future versions of the library are intended to keep the same structure, so this will keep the work required to move to a new version of the library to a minimum. As an additional convenience, the list of source and header files used in the library is detailed in mkinc.mak - a file which is in a suitable format for inclusion by a Makefile. By including this file in your build system, you can link the snowball system into your program with a few extra rules. Using the library in a system using GNU autotools ================================================= The libstemmer_c library can be integrated into a larger system which uses the GNU autotool framework (and in particular, automake and autoconf) as follows: 1) Unpack libstemmer_c.tgz in the top level project directory so that there is a libstemmer_c subdirectory of the top level directory of the project. 2) Add a file "Makefile.am" to the unpacked libstemmer_c folder, containing: noinst_LTLIBRARIES = libstemmer.la include $(srcdir)/mkinc.mak noinst_HEADERS = $(snowball_headers) libstemmer_la_SOURCES = $(snowball_sources) (You may also need to add other lines to this, for example, if you are using compiler options which are not compatible with compiling the libstemmer library.) 3) Add libstemmer_c to the AC_CONFIG_FILES declaration in the project's configure.ac file. 4) Add to the top level makefile the following lines (or modify existing assignments to these variables appropriately): AUTOMAKE_OPTIONS = subdir-objects AM_CPPFLAGS = -I$(top_srcdir)/libstemmer_c/include SUBDIRS=libstemmer_c _LIBADD = libstemmer_c/libstemmer.la (Where is the name of the library or executable which links against libstemmer.) LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/examples/000077500000000000000000000000001456444476200256635ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/examples/stemwords.c000066400000000000000000000123421456444476200300600ustar00rootroot00000000000000/* This is a simple program which uses libstemmer to provide a command * line interface for stemming using any of the algorithms provided. */ #include #include /* for malloc, free */ #include /* for memmove */ #include /* for isupper, tolower */ #include "libstemmer.h" const char * progname; static int pretty = 1; static void stem_file(struct sb_stemmer * stemmer, FILE * f_in, FILE * f_out) { #define INC 10 int lim = INC; sb_symbol * b = (sb_symbol *) malloc(lim * sizeof(sb_symbol)); while(1) { int ch = getc(f_in); if (ch == EOF) { free(b); return; } { int i = 0; int inlen = 0; while(1) { if (ch == '\n' || ch == EOF) break; if (i == lim) { sb_symbol * newb; newb = (sb_symbol *) realloc(b, (lim + INC) * sizeof(sb_symbol)); if (newb == 0) goto error; b = newb; lim = lim + INC; } /* Update count of utf-8 characters. */ if (ch < 0x80 || ch > 0xBF) inlen += 1; /* force lower case: */ if (isupper(ch)) ch = tolower(ch); b[i] = ch; i++; ch = getc(f_in); } { const sb_symbol * stemmed = sb_stemmer_stem(stemmer, b, i); if (stemmed == NULL) { fprintf(stderr, "Out of memory"); exit(1); } else { if (pretty == 1) { fwrite(b, i, 1, f_out); fputs(" -> ", f_out); } else if (pretty == 2) { fwrite(b, i, 1, f_out); if (sb_stemmer_length(stemmer) > 0) { int j; if (inlen < 30) { for (j = 30 - inlen; j > 0; j--) fputs(" ", f_out); } else { fputs("\n", f_out); for (j = 30; j > 0; j--) fputs(" ", f_out); } } } fputs((char *)stemmed, f_out); putc('\n', f_out); } } } } error: if (b != 0) free(b); return; } /** Display the command line syntax, and then exit. * @param n The value to exit with. */ static void usage(int n) { printf("usage: %s [-l ] [-i ] [-o ] [-c ] [-p[2]] [-h]\n" "\n" "The input file consists of a list of words to be stemmed, one per\n" "line. Words should be in lower case, but (for English) A-Z letters\n" "are mapped to their a-z equivalents anyway. If omitted, stdin is\n" "used.\n" "\n" "If -c is given, the argument is the character encoding of the input\n" "and output files. If it is omitted, the UTF-8 encoding is used.\n" "\n" "If -p is given the output file consists of each word of the input\n" "file followed by \"->\" followed by its stemmed equivalent.\n" "If -p2 is given the output file is a two column layout containing\n" "the input words in the first column and the stemmed eqivalents in\n" "the second column.\n" "Otherwise, the output file consists of the stemmed words, one per\n" "line.\n" "\n" "-h displays this help\n", progname); exit(n); } int main(int argc, char * argv[]) { char * in = 0; char * out = 0; FILE * f_in; FILE * f_out; struct sb_stemmer * stemmer; char * language = "english"; char * charenc = NULL; char * s; int i = 1; pretty = 0; progname = argv[0]; while(i < argc) { s = argv[i++]; if (s[0] == '-') { if (strcmp(s, "-o") == 0) { if (i >= argc) { fprintf(stderr, "%s requires an argument\n", s); exit(1); } out = argv[i++]; } else if (strcmp(s, "-i") == 0) { if (i >= argc) { fprintf(stderr, "%s requires an argument\n", s); exit(1); } in = argv[i++]; } else if (strcmp(s, "-l") == 0) { if (i >= argc) { fprintf(stderr, "%s requires an argument\n", s); exit(1); } language = argv[i++]; } else if (strcmp(s, "-c") == 0) { if (i >= argc) { fprintf(stderr, "%s requires an argument\n", s); exit(1); } charenc = argv[i++]; } else if (strcmp(s, "-p2") == 0) { pretty = 2; } else if (strcmp(s, "-p") == 0) { pretty = 1; } else if (strcmp(s, "-h") == 0) { usage(0); } else { fprintf(stderr, "option %s unknown\n", s); usage(1); } } else { fprintf(stderr, "unexpected parameter %s\n", s); usage(1); } } /* prepare the files */ f_in = (in == 0) ? stdin : fopen(in, "r"); if (f_in == 0) { fprintf(stderr, "file %s not found\n", in); exit(1); } f_out = (out == 0) ? stdout : fopen(out, "w"); if (f_out == 0) { fprintf(stderr, "file %s cannot be opened\n", out); exit(1); } /* do the stemming process: */ stemmer = sb_stemmer_new(language, charenc); if (stemmer == 0) { if (charenc == NULL) { fprintf(stderr, "language `%s' not available for stemming\n", language); exit(1); } else { fprintf(stderr, "language `%s' not available for stemming in encoding `%s'\n", language, charenc); exit(1); } } stem_file(stemmer, f_in, f_out); sb_stemmer_delete(stemmer); if (in != 0) (void) fclose(f_in); if (out != 0) (void) fclose(f_out); return 0; } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/include/000077500000000000000000000000001456444476200254705ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/include/libstemmer.h000066400000000000000000000055701456444476200300130ustar00rootroot00000000000000 /* Make header file work when included from C++ */ #ifdef __cplusplus extern "C" { #endif struct sb_stemmer; typedef unsigned char sb_symbol; /* FIXME - should be able to get a version number for each stemming * algorithm (which will be incremented each time the output changes). */ /** Returns an array of the names of the available stemming algorithms. * Note that these are the canonical names - aliases (ie, other names for * the same algorithm) will not be included in the list. * The list is terminated with a null pointer. * * The list must not be modified in any way. */ const char** sb_stemmer_list(void); /** Create a new stemmer object, using the specified algorithm, for the * specified character encoding. * * All algorithms will usually be available in UTF-8, but may also be * available in other character encodings. * * @param algorithm The algorithm name. This is either the english * name of the algorithm, or the 2 or 3 letter ISO 639 codes for the * language. Note that case is significant in this parameter - the * value should be supplied in lower case. * * @param charenc The character encoding. NULL may be passed as * this value, in which case UTF-8 encoding will be assumed. Otherwise, * the argument may be one of "UTF_8", "ISO_8859_1" (ie, Latin 1), * "CP850" (ie, MS-DOS Latin 1) or "KOI8_R" (Russian). Note that * case is significant in this parameter. * * @return NULL if the specified algorithm is not recognised, or the * algorithm is not available for the requested encoding. Otherwise, * returns a pointer to a newly created stemmer for the requested algorithm. * The returned pointer must be deleted by calling sb_stemmer_delete(). * * @note NULL will also be returned if an out of memory error occurs. */ struct sb_stemmer* sb_stemmer_new(const char* algorithm, const char* charenc); /** Delete a stemmer object. * * This frees all resources allocated for the stemmer. After calling * this function, the supplied stemmer may no longer be used in any way. * * It is safe to pass a null pointer to this function - this will have * no effect. */ void sb_stemmer_delete(struct sb_stemmer* stemmer); /** Stem a word. * * The return value is owned by the stemmer - it must not be freed or * modified, and it will become invalid when the stemmer is called again, * or if the stemmer is freed. * * The length of the return value can be obtained using sb_stemmer_length(). * * If an out-of-memory error occurs, this will return NULL. */ const sb_symbol* sb_stemmer_stem(struct sb_stemmer* stemmer, const sb_symbol* word, int size); /** Get the length of the result of the last stemmed word. * This should not be called before sb_stemmer_stem() has been called. */ int sb_stemmer_length(struct sb_stemmer* stemmer); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/libstemmer/000077500000000000000000000000001456444476200262105ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/libstemmer/libstemmer.c000066400000000000000000000042531456444476200305230ustar00rootroot00000000000000 #include #include #include "../include/libstemmer.h" #include "../runtime/api.h" #include "modules.h" struct sb_stemmer { struct SN_env * (*create)(void); void (*close)(struct SN_env *); int (*stem)(struct SN_env *); struct SN_env * env; }; extern const char ** sb_stemmer_list(void) { return algorithm_names; } static stemmer_encoding_t sb_getenc(const char * charenc) { struct stemmer_encoding * encoding; if (charenc == NULL) return ENC_UTF_8; for (encoding = encodings; encoding->name != 0; encoding++) { if (strcmp(encoding->name, charenc) == 0) break; } if (encoding->name == NULL) return ENC_UNKNOWN; return encoding->enc; } extern struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc) { stemmer_encoding_t enc; struct stemmer_modules * module; struct sb_stemmer * stemmer; enc = sb_getenc(charenc); if (enc == ENC_UNKNOWN) return NULL; for (module = modules; module->name != 0; module++) { if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break; } if (module->name == NULL) return NULL; stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer)); if (stemmer == NULL) return NULL; stemmer->create = module->create; stemmer->close = module->close; stemmer->stem = module->stem; stemmer->env = stemmer->create(); if (stemmer->env == NULL) { sb_stemmer_delete(stemmer); return NULL; } return stemmer; } void sb_stemmer_delete(struct sb_stemmer * stemmer) { if (stemmer == 0) return; if (stemmer->close == 0) return; stemmer->close(stemmer->env); stemmer->close = 0; free(stemmer); } const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size) { int ret; if (SN_set_current(stemmer->env, size, (const symbol *)(word))) { stemmer->env->l = 0; return NULL; } ret = stemmer->stem(stemmer->env); if (ret < 0) return NULL; stemmer->env->p[stemmer->env->l] = 0; return (const sb_symbol *)(stemmer->env->p); } int sb_stemmer_length(struct sb_stemmer * stemmer) { return stemmer->env->l; } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/libstemmer/libstemmer_c.in000066400000000000000000000042551456444476200312130ustar00rootroot00000000000000 #include #include #include "../include/libstemmer.h" #include "../runtime/api.h" #include "@MODULES_H@" struct sb_stemmer { struct SN_env * (*create)(void); void (*close)(struct SN_env *); int (*stem)(struct SN_env *); struct SN_env * env; }; extern const char ** sb_stemmer_list(void) { return algorithm_names; } static stemmer_encoding_t sb_getenc(const char * charenc) { struct stemmer_encoding * encoding; if (charenc == NULL) return ENC_UTF_8; for (encoding = encodings; encoding->name != 0; encoding++) { if (strcmp(encoding->name, charenc) == 0) break; } if (encoding->name == NULL) return ENC_UNKNOWN; return encoding->enc; } extern struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc) { stemmer_encoding_t enc; struct stemmer_modules * module; struct sb_stemmer * stemmer; enc = sb_getenc(charenc); if (enc == ENC_UNKNOWN) return NULL; for (module = modules; module->name != 0; module++) { if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break; } if (module->name == NULL) return NULL; stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer)); if (stemmer == NULL) return NULL; stemmer->create = module->create; stemmer->close = module->close; stemmer->stem = module->stem; stemmer->env = stemmer->create(); if (stemmer->env == NULL) { sb_stemmer_delete(stemmer); return NULL; } return stemmer; } void sb_stemmer_delete(struct sb_stemmer * stemmer) { if (stemmer == 0) return; if (stemmer->close == 0) return; stemmer->close(stemmer->env); stemmer->close = 0; free(stemmer); } const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size) { int ret; if (SN_set_current(stemmer->env, size, (const symbol *)(word))) { stemmer->env->l = 0; return NULL; } ret = stemmer->stem(stemmer->env); if (ret < 0) return NULL; stemmer->env->p[stemmer->env->l] = 0; return (const sb_symbol *)(stemmer->env->p); } int sb_stemmer_length(struct sb_stemmer * stemmer) { return stemmer->env->l; } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/libstemmer/libstemmer_utf8.c000066400000000000000000000042601456444476200314670ustar00rootroot00000000000000 #include #include #include "../include/libstemmer.h" #include "../runtime/api.h" #include "modules_utf8.h" struct sb_stemmer { struct SN_env * (*create)(void); void (*close)(struct SN_env *); int (*stem)(struct SN_env *); struct SN_env * env; }; extern const char ** sb_stemmer_list(void) { return algorithm_names; } static stemmer_encoding_t sb_getenc(const char * charenc) { struct stemmer_encoding * encoding; if (charenc == NULL) return ENC_UTF_8; for (encoding = encodings; encoding->name != 0; encoding++) { if (strcmp(encoding->name, charenc) == 0) break; } if (encoding->name == NULL) return ENC_UNKNOWN; return encoding->enc; } extern struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc) { stemmer_encoding_t enc; struct stemmer_modules * module; struct sb_stemmer * stemmer; enc = sb_getenc(charenc); if (enc == ENC_UNKNOWN) return NULL; for (module = modules; module->name != 0; module++) { if (strcmp(module->name, algorithm) == 0 && module->enc == enc) break; } if (module->name == NULL) return NULL; stemmer = (struct sb_stemmer *) malloc(sizeof(struct sb_stemmer)); if (stemmer == NULL) return NULL; stemmer->create = module->create; stemmer->close = module->close; stemmer->stem = module->stem; stemmer->env = stemmer->create(); if (stemmer->env == NULL) { sb_stemmer_delete(stemmer); return NULL; } return stemmer; } void sb_stemmer_delete(struct sb_stemmer * stemmer) { if (stemmer == 0) return; if (stemmer->close == 0) return; stemmer->close(stemmer->env); stemmer->close = 0; free(stemmer); } const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer, const sb_symbol * word, int size) { int ret; if (SN_set_current(stemmer->env, size, (const symbol *)(word))) { stemmer->env->l = 0; return NULL; } ret = stemmer->stem(stemmer->env); if (ret < 0) return NULL; stemmer->env->p[stemmer->env->l] = 0; return (const sb_symbol *)(stemmer->env->p); } int sb_stemmer_length(struct sb_stemmer * stemmer) { return stemmer->env->l; } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/libstemmer/modules.h000066400000000000000000000314421456444476200300350ustar00rootroot00000000000000/* libstemmer/modules.h: List of stemming modules. * * This file is generated by mkmodules.pl from a list of module names. * Do not edit manually. * * Modules included by this file are: danish, dutch, english, finnish, french, * german, hungarian, italian, norwegian, porter, portuguese, romanian, * russian, spanish, swedish, turkish */ #include "../src_c/stem_ISO_8859_1_danish.h" #include "../src_c/stem_UTF_8_danish.h" #include "../src_c/stem_ISO_8859_1_dutch.h" #include "../src_c/stem_UTF_8_dutch.h" #include "../src_c/stem_ISO_8859_1_english.h" #include "../src_c/stem_UTF_8_english.h" #include "../src_c/stem_ISO_8859_1_finnish.h" #include "../src_c/stem_UTF_8_finnish.h" #include "../src_c/stem_ISO_8859_1_french.h" #include "../src_c/stem_UTF_8_french.h" #include "../src_c/stem_ISO_8859_1_german.h" #include "../src_c/stem_UTF_8_german.h" #include "../src_c/stem_ISO_8859_1_hungarian.h" #include "../src_c/stem_UTF_8_hungarian.h" #include "../src_c/stem_ISO_8859_1_italian.h" #include "../src_c/stem_UTF_8_italian.h" #include "../src_c/stem_ISO_8859_1_norwegian.h" #include "../src_c/stem_UTF_8_norwegian.h" #include "../src_c/stem_ISO_8859_1_porter.h" #include "../src_c/stem_UTF_8_porter.h" #include "../src_c/stem_ISO_8859_1_portuguese.h" #include "../src_c/stem_UTF_8_portuguese.h" #include "../src_c/stem_ISO_8859_2_romanian.h" #include "../src_c/stem_UTF_8_romanian.h" #include "../src_c/stem_KOI8_R_russian.h" #include "../src_c/stem_UTF_8_russian.h" #include "../src_c/stem_ISO_8859_1_spanish.h" #include "../src_c/stem_UTF_8_spanish.h" #include "../src_c/stem_ISO_8859_1_swedish.h" #include "../src_c/stem_UTF_8_swedish.h" #include "../src_c/stem_UTF_8_turkish.h" typedef enum { ENC_UNKNOWN=0, ENC_ISO_8859_1, ENC_ISO_8859_2, ENC_KOI8_R, ENC_UTF_8 } stemmer_encoding_t; struct stemmer_encoding { const char* name; stemmer_encoding_t enc; }; static struct stemmer_encoding encodings[] = { {"ISO_8859_1", ENC_ISO_8859_1}, {"ISO_8859_2", ENC_ISO_8859_2}, {"KOI8_R", ENC_KOI8_R}, {"UTF_8", ENC_UTF_8}, {0,ENC_UNKNOWN} }; struct stemmer_modules { const char* name; stemmer_encoding_t enc; struct SN_env* (*create)(void); void (*close)(struct SN_env*); int (*stem)(struct SN_env*); }; static struct stemmer_modules modules[] = { {"da", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, {"dan", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, {"danish", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, {"de", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, {"deu", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, {"dut", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"dutch", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"en", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, {"eng", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, {"english", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, {"es", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"esl", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"fi", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, {"fin", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, {"finnish", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, {"fr", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, {"fra", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, {"fre", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, {"french", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, {"ger", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, {"german", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, {"hu", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, {"hun", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, {"hungarian", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, {"it", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, {"ita", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, {"italian", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, {"nl", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"nld", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"no", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, {"nor", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, {"norwegian", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, {"por", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, {"porter", ENC_ISO_8859_1, porter_ISO_8859_1_create_env, porter_ISO_8859_1_close_env, porter_ISO_8859_1_stem}, {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem}, {"portuguese", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, {"pt", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, {"ro", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, {"romanian", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, {"ron", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, {"ru", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, {"rum", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, {"rus", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, {"russian", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, {"spa", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"spanish", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"sv", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, {"swe", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, {"swedish", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, {0,ENC_UNKNOWN,0,0,0} }; static const char* algorithm_names[] = { "danish", "dutch", "english", "finnish", "french", "german", "hungarian", "italian", "norwegian", "porter", "portuguese", "romanian", "russian", "spanish", "swedish", "turkish", 0 }; LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/libstemmer/modules.txt000066400000000000000000000047331456444476200304300ustar00rootroot00000000000000# This file contains a list of stemmers to include in the distribution. # The format is a set of space separated lines - on each line: # First item is name of stemmer. # Second item is comma separated list of character sets. # Third item is comma separated list of names to refer to the stemmer by. # # Lines starting with a #, or blank lines, are ignored. # List all the main algorithms for each language, in UTF-8, and also with # the most commonly used encoding. danish UTF_8,ISO_8859_1 danish,da,dan dutch UTF_8,ISO_8859_1 dutch,nl,dut,nld english UTF_8,ISO_8859_1 english,en,eng finnish UTF_8,ISO_8859_1 finnish,fi,fin french UTF_8,ISO_8859_1 french,fr,fre,fra german UTF_8,ISO_8859_1 german,de,ger,deu hungarian UTF_8,ISO_8859_1 hungarian,hu,hun italian UTF_8,ISO_8859_1 italian,it,ita norwegian UTF_8,ISO_8859_1 norwegian,no,nor portuguese UTF_8,ISO_8859_1 portuguese,pt,por romanian UTF_8,ISO_8859_2 romanian,ro,rum,ron russian UTF_8,KOI8_R russian,ru,rus spanish UTF_8,ISO_8859_1 spanish,es,esl,spa swedish UTF_8,ISO_8859_1 swedish,sv,swe turkish UTF_8 turkish,tr,tur # Also include the traditional porter algorithm for english. # The porter algorithm is included in the libstemmer distribution to assist # with backwards compatibility, but for new systems the english algorithm # should be used in preference. porter UTF_8,ISO_8859_1 porter # Some other stemmers in the snowball project are not included in the standard # distribution. To compile a libstemmer with them in, add them to this list, # and regenerate the distribution. (You will need a full source checkout for # this.) They are included in the snowball website as curiosities, but are not # intended for general use, and use of them is is not fully supported. These # algorithms are: # # german2 - This is a slight modification of the german stemmer. #german2 UTF_8,ISO_8859_1 german2 # # kraaij_pohlmann - This is a different dutch stemmer. #kraaij_pohlmann UTF_8,ISO_8859_1 kraaij_pohlmann # # lovins - This is an english stemmer, but fairly outdated, and # only really applicable to a restricted type of input text # (keywords in academic publications). #lovins UTF_8,ISO_8859_1 lovins LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/libstemmer/modules_utf8.h000066400000000000000000000150741456444476200310060ustar00rootroot00000000000000/* libstemmer/modules_utf8.h: List of stemming modules. * * This file is generated by mkmodules.pl from a list of module names. * Do not edit manually. * * Modules included by this file are: danish, dutch, english, finnish, french, * german, hungarian, italian, norwegian, porter, portuguese, romanian, * russian, spanish, swedish, turkish */ #include "../src_c/stem_UTF_8_danish.h" #include "../src_c/stem_UTF_8_dutch.h" #include "../src_c/stem_UTF_8_english.h" #include "../src_c/stem_UTF_8_finnish.h" #include "../src_c/stem_UTF_8_french.h" #include "../src_c/stem_UTF_8_german.h" #include "../src_c/stem_UTF_8_hungarian.h" #include "../src_c/stem_UTF_8_italian.h" #include "../src_c/stem_UTF_8_norwegian.h" #include "../src_c/stem_UTF_8_porter.h" #include "../src_c/stem_UTF_8_portuguese.h" #include "../src_c/stem_UTF_8_romanian.h" #include "../src_c/stem_UTF_8_russian.h" #include "../src_c/stem_UTF_8_spanish.h" #include "../src_c/stem_UTF_8_swedish.h" #include "../src_c/stem_UTF_8_turkish.h" typedef enum { ENC_UNKNOWN=0, ENC_UTF_8 } stemmer_encoding_t; struct stemmer_encoding { const char* name; stemmer_encoding_t enc; }; static struct stemmer_encoding encodings[] = { {"UTF_8", ENC_UTF_8}, {0,ENC_UNKNOWN} }; struct stemmer_modules { const char* name; stemmer_encoding_t enc; struct SN_env* (*create)(void); void (*close)(struct SN_env*); int (*stem)(struct SN_env*); }; static struct stemmer_modules modules[] = { {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem}, {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, {0,ENC_UNKNOWN,0,0,0} }; static const char* algorithm_names[] = { "danish", "dutch", "english", "finnish", "french", "german", "hungarian", "italian", "norwegian", "porter", "portuguese", "romanian", "russian", "spanish", "swedish", "turkish", 0 }; LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/libstemmer/modules_utf8.txt000066400000000000000000000046521456444476200313760ustar00rootroot00000000000000# This file contains a list of stemmers to include in the distribution. # The format is a set of space separated lines - on each line: # First item is name of stemmer. # Second item is comma separated list of character sets. # Third item is comma separated list of names to refer to the stemmer by. # # Lines starting with a #, or blank lines, are ignored. # List all the main algorithms for each language, in UTF-8. danish UTF_8 danish,da,dan dutch UTF_8 dutch,nl,dut,nld english UTF_8 english,en,eng finnish UTF_8 finnish,fi,fin french UTF_8 french,fr,fre,fra german UTF_8 german,de,ger,deu hungarian UTF_8 hungarian,hu,hun italian UTF_8 italian,it,ita norwegian UTF_8 norwegian,no,nor portuguese UTF_8 portuguese,pt,por romanian UTF_8 romanian,ro,rum,ron russian UTF_8 russian,ru,rus spanish UTF_8 spanish,es,esl,spa swedish UTF_8 swedish,sv,swe turkish UTF_8 turkish,tr,tur # Also include the traditional porter algorithm for english. # The porter algorithm is included in the libstemmer distribution to assist # with backwards compatibility, but for new systems the english algorithm # should be used in preference. porter UTF_8 porter # Some other stemmers in the snowball project are not included in the standard # distribution. To compile a libstemmer with them in, add them to this list, # and regenerate the distribution. (You will need a full source checkout for # this.) They are included in the snowball website as curiosities, but are not # intended for general use, and use of them is is not fully supported. These # algorithms are: # # german2 - This is a slight modification of the german stemmer. #german2 UTF_8 german2 # # kraaij_pohlmann - This is a different dutch stemmer. #kraaij_pohlmann UTF_8 kraaij_pohlmann # # lovins - This is an english stemmer, but fairly outdated, and # only really applicable to a restricted type of input text # (keywords in academic publications). #lovins UTF_8 lovins LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/mkinc.mak000066400000000000000000000050671456444476200256500ustar00rootroot00000000000000# libstemmer/mkinc.mak: List of stemming module source files # # This file is generated by mkmodules.pl from a list of module names. # Do not edit manually. # # Modules included by this file are: danish, dutch, english, finnish, french, # german, hungarian, italian, norwegian, porter, portuguese, romanian, # russian, spanish, swedish, turkish snowball_sources= \ src_c/stem_ISO_8859_1_danish.c \ src_c/stem_UTF_8_danish.c \ src_c/stem_ISO_8859_1_dutch.c \ src_c/stem_UTF_8_dutch.c \ src_c/stem_ISO_8859_1_english.c \ src_c/stem_UTF_8_english.c \ src_c/stem_ISO_8859_1_finnish.c \ src_c/stem_UTF_8_finnish.c \ src_c/stem_ISO_8859_1_french.c \ src_c/stem_UTF_8_french.c \ src_c/stem_ISO_8859_1_german.c \ src_c/stem_UTF_8_german.c \ src_c/stem_ISO_8859_1_hungarian.c \ src_c/stem_UTF_8_hungarian.c \ src_c/stem_ISO_8859_1_italian.c \ src_c/stem_UTF_8_italian.c \ src_c/stem_ISO_8859_1_norwegian.c \ src_c/stem_UTF_8_norwegian.c \ src_c/stem_ISO_8859_1_porter.c \ src_c/stem_UTF_8_porter.c \ src_c/stem_ISO_8859_1_portuguese.c \ src_c/stem_UTF_8_portuguese.c \ src_c/stem_ISO_8859_2_romanian.c \ src_c/stem_UTF_8_romanian.c \ src_c/stem_KOI8_R_russian.c \ src_c/stem_UTF_8_russian.c \ src_c/stem_ISO_8859_1_spanish.c \ src_c/stem_UTF_8_spanish.c \ src_c/stem_ISO_8859_1_swedish.c \ src_c/stem_UTF_8_swedish.c \ src_c/stem_UTF_8_turkish.c \ runtime/api.c \ runtime/utilities.c \ libstemmer/libstemmer.c snowball_headers= \ src_c/stem_ISO_8859_1_danish.h \ src_c/stem_UTF_8_danish.h \ src_c/stem_ISO_8859_1_dutch.h \ src_c/stem_UTF_8_dutch.h \ src_c/stem_ISO_8859_1_english.h \ src_c/stem_UTF_8_english.h \ src_c/stem_ISO_8859_1_finnish.h \ src_c/stem_UTF_8_finnish.h \ src_c/stem_ISO_8859_1_french.h \ src_c/stem_UTF_8_french.h \ src_c/stem_ISO_8859_1_german.h \ src_c/stem_UTF_8_german.h \ src_c/stem_ISO_8859_1_hungarian.h \ src_c/stem_UTF_8_hungarian.h \ src_c/stem_ISO_8859_1_italian.h \ src_c/stem_UTF_8_italian.h \ src_c/stem_ISO_8859_1_norwegian.h \ src_c/stem_UTF_8_norwegian.h \ src_c/stem_ISO_8859_1_porter.h \ src_c/stem_UTF_8_porter.h \ src_c/stem_ISO_8859_1_portuguese.h \ src_c/stem_UTF_8_portuguese.h \ src_c/stem_ISO_8859_2_romanian.h \ src_c/stem_UTF_8_romanian.h \ src_c/stem_KOI8_R_russian.h \ src_c/stem_UTF_8_russian.h \ src_c/stem_ISO_8859_1_spanish.h \ src_c/stem_UTF_8_spanish.h \ src_c/stem_ISO_8859_1_swedish.h \ src_c/stem_UTF_8_swedish.h \ src_c/stem_UTF_8_turkish.h \ include/libstemmer.h \ libstemmer/modules.h \ runtime/api.h \ runtime/header.h LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/mkinc_utf8.mak000066400000000000000000000030221456444476200266030ustar00rootroot00000000000000# libstemmer/mkinc_utf8.mak: List of stemming module source files # # This file is generated by mkmodules.pl from a list of module names. # Do not edit manually. # # Modules included by this file are: danish, dutch, english, finnish, french, # german, hungarian, italian, norwegian, porter, portuguese, romanian, # russian, spanish, swedish, turkish snowball_sources= \ src_c/stem_UTF_8_danish.c \ src_c/stem_UTF_8_dutch.c \ src_c/stem_UTF_8_english.c \ src_c/stem_UTF_8_finnish.c \ src_c/stem_UTF_8_french.c \ src_c/stem_UTF_8_german.c \ src_c/stem_UTF_8_hungarian.c \ src_c/stem_UTF_8_italian.c \ src_c/stem_UTF_8_norwegian.c \ src_c/stem_UTF_8_porter.c \ src_c/stem_UTF_8_portuguese.c \ src_c/stem_UTF_8_romanian.c \ src_c/stem_UTF_8_russian.c \ src_c/stem_UTF_8_spanish.c \ src_c/stem_UTF_8_swedish.c \ src_c/stem_UTF_8_turkish.c \ runtime/api.c \ runtime/utilities.c \ libstemmer/libstemmer_utf8.c snowball_headers= \ src_c/stem_UTF_8_danish.h \ src_c/stem_UTF_8_dutch.h \ src_c/stem_UTF_8_english.h \ src_c/stem_UTF_8_finnish.h \ src_c/stem_UTF_8_french.h \ src_c/stem_UTF_8_german.h \ src_c/stem_UTF_8_hungarian.h \ src_c/stem_UTF_8_italian.h \ src_c/stem_UTF_8_norwegian.h \ src_c/stem_UTF_8_porter.h \ src_c/stem_UTF_8_portuguese.h \ src_c/stem_UTF_8_romanian.h \ src_c/stem_UTF_8_russian.h \ src_c/stem_UTF_8_spanish.h \ src_c/stem_UTF_8_swedish.h \ src_c/stem_UTF_8_turkish.h \ include/libstemmer.h \ libstemmer/modules_utf8.h \ runtime/api.h \ runtime/header.h LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/runtime/000077500000000000000000000000001456444476200255305ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/runtime/api.c000066400000000000000000000025501456444476200264470ustar00rootroot00000000000000 #include /* for calloc, free */ #include "header.h" extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size) { struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env)); if (z == NULL) return NULL; z->p = create_s(); if (z->p == NULL) goto error; if (S_size) { int i; z->S = (symbol * *) calloc(S_size, sizeof(symbol *)); if (z->S == NULL) goto error; for (i = 0; i < S_size; i++) { z->S[i] = create_s(); if (z->S[i] == NULL) goto error; } } if (I_size) { z->I = (int *) calloc(I_size, sizeof(int)); if (z->I == NULL) goto error; } if (B_size) { z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char)); if (z->B == NULL) goto error; } return z; error: SN_close_env(z, S_size); return NULL; } extern void SN_close_env(struct SN_env * z, int S_size) { if (z == NULL) return; if (S_size) { int i; for (i = 0; i < S_size; i++) { lose_s(z->S[i]); } free(z->S); } free(z->I); free(z->B); if (z->p) lose_s(z->p); free(z); } extern int SN_set_current(struct SN_env * z, int size, const symbol * s) { int err = replace_s(z, 0, z->l, size, s, NULL); z->c = 0; return err; } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/runtime/api.h000066400000000000000000000014021456444476200264470ustar00rootroot00000000000000 typedef unsigned char symbol; /* Or replace 'char' above with 'short' for 16 bit characters. More precisely, replace 'char' with whatever type guarantees the character width you need. Note however that sizeof(symbol) should divide HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise there is an alignment problem. In the unlikely event of a problem here, consult Martin Porter. */ struct SN_env { symbol* p; int c; int l; int lb; int bra; int ket; symbol** S; int* I; unsigned char* B; }; extern struct SN_env* SN_create_env(int S_size, int I_size, int B_size); extern void SN_close_env(struct SN_env* z, int S_size); extern int SN_set_current(struct SN_env* z, int size, const symbol* s); LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/runtime/header.h000066400000000000000000000046061456444476200271370ustar00rootroot00000000000000 #include #include "api.h" #define MAXINT INT_MAX #define MININT INT_MIN #define HEAD 2*sizeof(int) #define SIZE(p) ((int *)(p))[-1] #define SET_SIZE(p, n) ((int *)(p))[-1] = n #define CAPACITY(p) ((int *)(p))[-2] struct among { int s_size; /* number of chars in string */ const symbol* s; /* search string */ int substring_i;/* index to longest matching substring */ int result; /* result of the lookup */ int (* function)(struct SN_env*); }; extern symbol* create_s(void); extern void lose_s(symbol* p); extern int skip_utf8(const symbol* p, int c, int lb, int l, int n); extern int in_grouping_U(struct SN_env* z, const unsigned char* s, int min, int max, int repeat); extern int in_grouping_b_U(struct SN_env* z, const unsigned char* s, int min, int max, int repeat); extern int out_grouping_U(struct SN_env* z, const unsigned char* s, int min, int max, int repeat); extern int out_grouping_b_U(struct SN_env* z, const unsigned char* s, int min, int max, int repeat); extern int in_grouping(struct SN_env* z, const unsigned char* s, int min, int max, int repeat); extern int in_grouping_b(struct SN_env* z, const unsigned char* s, int min, int max, int repeat); extern int out_grouping(struct SN_env* z, const unsigned char* s, int min, int max, int repeat); extern int out_grouping_b(struct SN_env* z, const unsigned char* s, int min, int max, int repeat); extern int eq_s(struct SN_env* z, int s_size, const symbol* s); extern int eq_s_b(struct SN_env* z, int s_size, const symbol* s); extern int eq_v(struct SN_env* z, const symbol* p); extern int eq_v_b(struct SN_env* z, const symbol* p); extern int find_among(struct SN_env* z, const struct among* v, int v_size); extern int find_among_b(struct SN_env* z, const struct among* v, int v_size); extern int replace_s(struct SN_env* z, int c_bra, int c_ket, int s_size, const symbol* s, int* adjustment); extern int slice_from_s(struct SN_env* z, int s_size, const symbol* s); extern int slice_from_v(struct SN_env* z, const symbol* p); extern int slice_del(struct SN_env* z); extern int insert_s(struct SN_env* z, int bra, int ket, int s_size, const symbol* s); extern int insert_v(struct SN_env* z, int bra, int ket, const symbol* p); extern symbol* slice_to(struct SN_env* z, symbol* p); extern symbol* assign_to(struct SN_env* z, symbol* p); extern void debug(struct SN_env* z, int number, int line_count); LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/runtime/utilities.c000066400000000000000000000313431456444476200277130ustar00rootroot00000000000000 #include #include #include #include "header.h" #define unless(C) if(!(C)) #define CREATE_SIZE 1 extern symbol * create_s(void) { symbol * p; void * mem = malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol)); if (mem == NULL) return NULL; p = (symbol *) (HEAD + (char *) mem); CAPACITY(p) = CREATE_SIZE; SET_SIZE(p, CREATE_SIZE); return p; } extern void lose_s(symbol * p) { if (p == NULL) return; free((char *) p - HEAD); } /* new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new position, or 0 on failure. -- used to implement hop and next in the utf8 case. */ extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) { int b; if (n >= 0) { for (; n > 0; n--) { if (c >= l) return -1; b = p[c++]; if (b >= 0xC0) { /* 1100 0000 */ while (c < l) { b = p[c]; if (b >= 0xC0 || b < 0x80) break; /* break unless b is 10------ */ c++; } } } } else { for (; n < 0; n++) { if (c <= lb) return -1; b = p[--c]; if (b >= 0x80) { /* 1000 0000 */ while (c > lb) { b = p[c]; if (b >= 0xC0) break; /* 1100 0000 */ c--; } } } } return c; } /* Code for character groupings: utf8 cases */ static int get_utf8(const symbol * p, int c, int l, int * slot) { int b0, b1; if (c >= l) return 0; b0 = p[c++]; if (b0 < 0xC0 || c == l) { /* 1100 0000 */ * slot = b0; return 1; } b1 = p[c++]; if (b0 < 0xE0 || c == l) { /* 1110 0000 */ * slot = (b0 & 0x1F) << 6 | (b1 & 0x3F); return 2; } * slot = (b0 & 0xF) << 12 | (b1 & 0x3F) << 6 | (p[c] & 0x3F); return 3; } static int get_b_utf8(const symbol * p, int c, int lb, int * slot) { int b0, b1; if (c <= lb) return 0; b0 = p[--c]; if (b0 < 0x80 || c == lb) { /* 1000 0000 */ * slot = b0; return 1; } b1 = p[--c]; if (b1 >= 0xC0 || c == lb) { /* 1100 0000 */ * slot = (b1 & 0x1F) << 6 | (b0 & 0x3F); return 2; } * slot = (p[c] & 0xF) << 12 | (b1 & 0x3F) << 6 | (b0 & 0x3F); return 3; } extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; int w = get_utf8(z->p, z->c, z->l, & ch); unless (w) return -1; if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return w; z->c += w; } while (repeat); return 0; } extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; int w = get_b_utf8(z->p, z->c, z->lb, & ch); unless (w) return -1; if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return w; z->c -= w; } while (repeat); return 0; } extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; int w = get_utf8(z->p, z->c, z->l, & ch); unless (w) return -1; unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return w; z->c += w; } while (repeat); return 0; } extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; int w = get_b_utf8(z->p, z->c, z->lb, & ch); unless (w) return -1; unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return w; z->c -= w; } while (repeat); return 0; } /* Code for character groupings: non-utf8 cases */ extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; if (z->c >= z->l) return -1; ch = z->p[z->c]; if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 1; z->c++; } while (repeat); return 0; } extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; if (z->c <= z->lb) return -1; ch = z->p[z->c - 1]; if (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 1; z->c--; } while (repeat); return 0; } extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; if (z->c >= z->l) return -1; ch = z->p[z->c]; unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 1; z->c++; } while (repeat); return 0; } extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat) { do { int ch; if (z->c <= z->lb) return -1; ch = z->p[z->c - 1]; unless (ch > max || (ch -= min) < 0 || (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 1; z->c--; } while (repeat); return 0; } extern int eq_s(struct SN_env * z, int s_size, const symbol * s) { if (z->l - z->c < s_size || memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0; z->c += s_size; return 1; } extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s) { if (z->c - z->lb < s_size || memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0; z->c -= s_size; return 1; } extern int eq_v(struct SN_env * z, const symbol * p) { return eq_s(z, SIZE(p), p); } extern int eq_v_b(struct SN_env * z, const symbol * p) { return eq_s_b(z, SIZE(p), p); } extern int find_among(struct SN_env * z, const struct among * v, int v_size) { int i = 0; int j = v_size; int c = z->c; int l = z->l; symbol * q = z->p + c; const struct among * w; int common_i = 0; int common_j = 0; int first_key_inspected = 0; while(1) { int k = i + ((j - i) >> 1); int diff = 0; int common = common_i < common_j ? common_i : common_j; /* smaller */ w = v + k; { int i2; for (i2 = common; i2 < w->s_size; i2++) { if (c + common == l) { diff = -1; break; } diff = q[common] - w->s[i2]; if (diff != 0) break; common++; } } if (diff < 0) { j = k; common_j = common; } else { i = k; common_i = common; } if (j - i <= 1) { if (i > 0) break; /* v->s has been inspected */ if (j == i) break; /* only one item in v */ /* - but now we need to go round once more to get v->s inspected. This looks messy, but is actually the optimal approach. */ if (first_key_inspected) break; first_key_inspected = 1; } } while(1) { w = v + i; if (common_i >= w->s_size) { z->c = c + w->s_size; if (w->function == 0) return w->result; { int res = w->function(z); z->c = c + w->s_size; if (res) return w->result; } } i = w->substring_i; if (i < 0) return 0; } } /* find_among_b is for backwards processing. Same comments apply */ extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) { int i = 0; int j = v_size; int c = z->c; int lb = z->lb; symbol * q = z->p + c - 1; const struct among * w; int common_i = 0; int common_j = 0; int first_key_inspected = 0; while(1) { int k = i + ((j - i) >> 1); int diff = 0; int common = common_i < common_j ? common_i : common_j; w = v + k; { int i2; for (i2 = w->s_size - 1 - common; i2 >= 0; i2--) { if (c - common == lb) { diff = -1; break; } diff = q[- common] - w->s[i2]; if (diff != 0) break; common++; } } if (diff < 0) { j = k; common_j = common; } else { i = k; common_i = common; } if (j - i <= 1) { if (i > 0) break; if (j == i) break; if (first_key_inspected) break; first_key_inspected = 1; } } while(1) { w = v + i; if (common_i >= w->s_size) { z->c = c - w->s_size; if (w->function == 0) return w->result; { int res = w->function(z); z->c = c - w->s_size; if (res) return w->result; } } i = w->substring_i; if (i < 0) return 0; } } /* Increase the size of the buffer pointed to by p to at least n symbols. * If insufficient memory, returns NULL and frees the old buffer. */ static symbol * increase_size(symbol * p, int n) { symbol * q; int new_size = n + 20; void * mem = realloc((char *) p - HEAD, HEAD + (new_size + 1) * sizeof(symbol)); if (mem == NULL) { lose_s(p); return NULL; } q = (symbol *) (HEAD + (char *)mem); CAPACITY(q) = new_size; return q; } /* to replace symbols between c_bra and c_ket in z->p by the s_size symbols at s. Returns 0 on success, -1 on error. Also, frees z->p (and sets it to NULL) on error. */ extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjptr) { int adjustment; int len; if (z->p == NULL) { z->p = create_s(); if (z->p == NULL) return -1; } adjustment = s_size - (c_ket - c_bra); len = SIZE(z->p); if (adjustment != 0) { if (adjustment + len > CAPACITY(z->p)) { z->p = increase_size(z->p, adjustment + len); if (z->p == NULL) return -1; } memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol)); SET_SIZE(z->p, adjustment + len); z->l += adjustment; if (z->c >= c_ket) z->c += adjustment; else if (z->c > c_bra) z->c = c_bra; } unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol)); if (adjptr != NULL) *adjptr = adjustment; return 0; } static int slice_check(struct SN_env * z) { if (z->bra < 0 || z->bra > z->ket || z->ket > z->l || z->p == NULL || z->l > SIZE(z->p)) /* this line could be removed */ { #if 0 fprintf(stderr, "faulty slice operation:\n"); debug(z, -1, 0); #endif return -1; } return 0; } extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s) { if (slice_check(z)) return -1; return replace_s(z, z->bra, z->ket, s_size, s, NULL); } extern int slice_from_v(struct SN_env * z, const symbol * p) { return slice_from_s(z, SIZE(p), p); } extern int slice_del(struct SN_env * z) { return slice_from_s(z, 0, 0); } extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s) { int adjustment; if (replace_s(z, bra, ket, s_size, s, &adjustment)) return -1; if (bra <= z->bra) z->bra += adjustment; if (bra <= z->ket) z->ket += adjustment; return 0; } extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p) { int adjustment; if (replace_s(z, bra, ket, SIZE(p), p, &adjustment)) return -1; if (bra <= z->bra) z->bra += adjustment; if (bra <= z->ket) z->ket += adjustment; return 0; } extern symbol * slice_to(struct SN_env * z, symbol * p) { if (slice_check(z)) { lose_s(p); return NULL; } { int len = z->ket - z->bra; if (CAPACITY(p) < len) { p = increase_size(p, len); if (p == NULL) return NULL; } memmove(p, z->p + z->bra, len * sizeof(symbol)); SET_SIZE(p, len); } return p; } extern symbol * assign_to(struct SN_env * z, symbol * p) { int len = z->l; if (CAPACITY(p) < len) { p = increase_size(p, len); if (p == NULL) return NULL; } memmove(p, z->p, len * sizeof(symbol)); SET_SIZE(p, len); return p; } #if 0 extern void debug(struct SN_env * z, int number, int line_count) { int i; int limit = SIZE(z->p); /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/ if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit); for (i = 0; i <= limit; i++) { if (z->lb == i) printf("{"); if (z->bra == i) printf("["); if (z->c == i) printf("|"); if (z->ket == i) printf("]"); if (z->l == i) printf("}"); if (i < limit) { int ch = z->p[i]; if (ch == 0) ch = '#'; printf("%c", ch); } } printf("'\n"); } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/000077500000000000000000000000001456444476200251365ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_danish.c000066400000000000000000000264001456444476200312310ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int danish_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_undouble(struct SN_env * z); static int r_other_suffix(struct SN_env * z); static int r_consonant_pair(struct SN_env * z); static int r_main_suffix(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * danish_ISO_8859_1_create_env(void); extern void danish_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[3] = { 'h', 'e', 'd' }; static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' }; static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' }; static const symbol s_0_3[1] = { 'e' }; static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' }; static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' }; static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' }; static const symbol s_0_7[3] = { 'e', 'n', 'e' }; static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' }; static const symbol s_0_9[3] = { 'e', 'r', 'e' }; static const symbol s_0_10[2] = { 'e', 'n' }; static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' }; static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' }; static const symbol s_0_13[2] = { 'e', 'r' }; static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' }; static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' }; static const symbol s_0_16[1] = { 's' }; static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' }; static const symbol s_0_18[2] = { 'e', 's' }; static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' }; static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' }; static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' }; static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' }; static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' }; static const symbol s_0_24[3] = { 'e', 'n', 's' }; static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' }; static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' }; static const symbol s_0_27[3] = { 'e', 'r', 's' }; static const symbol s_0_28[3] = { 'e', 't', 's' }; static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' }; static const symbol s_0_30[2] = { 'e', 't' }; static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' }; static const struct among a_0[32] = { /* 0 */ { 3, s_0_0, -1, 1, 0}, /* 1 */ { 5, s_0_1, 0, 1, 0}, /* 2 */ { 4, s_0_2, -1, 1, 0}, /* 3 */ { 1, s_0_3, -1, 1, 0}, /* 4 */ { 5, s_0_4, 3, 1, 0}, /* 5 */ { 4, s_0_5, 3, 1, 0}, /* 6 */ { 6, s_0_6, 5, 1, 0}, /* 7 */ { 3, s_0_7, 3, 1, 0}, /* 8 */ { 4, s_0_8, 3, 1, 0}, /* 9 */ { 3, s_0_9, 3, 1, 0}, /* 10 */ { 2, s_0_10, -1, 1, 0}, /* 11 */ { 5, s_0_11, 10, 1, 0}, /* 12 */ { 4, s_0_12, 10, 1, 0}, /* 13 */ { 2, s_0_13, -1, 1, 0}, /* 14 */ { 5, s_0_14, 13, 1, 0}, /* 15 */ { 4, s_0_15, 13, 1, 0}, /* 16 */ { 1, s_0_16, -1, 2, 0}, /* 17 */ { 4, s_0_17, 16, 1, 0}, /* 18 */ { 2, s_0_18, 16, 1, 0}, /* 19 */ { 5, s_0_19, 18, 1, 0}, /* 20 */ { 7, s_0_20, 19, 1, 0}, /* 21 */ { 4, s_0_21, 18, 1, 0}, /* 22 */ { 5, s_0_22, 18, 1, 0}, /* 23 */ { 4, s_0_23, 18, 1, 0}, /* 24 */ { 3, s_0_24, 16, 1, 0}, /* 25 */ { 6, s_0_25, 24, 1, 0}, /* 26 */ { 5, s_0_26, 24, 1, 0}, /* 27 */ { 3, s_0_27, 16, 1, 0}, /* 28 */ { 3, s_0_28, 16, 1, 0}, /* 29 */ { 5, s_0_29, 28, 1, 0}, /* 30 */ { 2, s_0_30, -1, 1, 0}, /* 31 */ { 4, s_0_31, 30, 1, 0} }; static const symbol s_1_0[2] = { 'g', 'd' }; static const symbol s_1_1[2] = { 'd', 't' }; static const symbol s_1_2[2] = { 'g', 't' }; static const symbol s_1_3[2] = { 'k', 't' }; static const struct among a_1[4] = { /* 0 */ { 2, s_1_0, -1, -1, 0}, /* 1 */ { 2, s_1_1, -1, -1, 0}, /* 2 */ { 2, s_1_2, -1, -1, 0}, /* 3 */ { 2, s_1_3, -1, -1, 0} }; static const symbol s_2_0[2] = { 'i', 'g' }; static const symbol s_2_1[3] = { 'l', 'i', 'g' }; static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' }; static const symbol s_2_3[3] = { 'e', 'l', 's' }; static const symbol s_2_4[4] = { 'l', 0xF8, 's', 't' }; static const struct among a_2[5] = { /* 0 */ { 2, s_2_0, -1, 1, 0}, /* 1 */ { 3, s_2_1, 0, 1, 0}, /* 2 */ { 4, s_2_2, 1, 1, 0}, /* 3 */ { 3, s_2_3, -1, 1, 0}, /* 4 */ { 4, s_2_4, -1, 2, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 }; static const symbol s_0[] = { 's', 't' }; static const symbol s_1[] = { 'i', 'g' }; static const symbol s_2[] = { 'l', 0xF8, 's' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; { int c_test = z->c; /* test, line 33 */ { int ret = z->c + 3; if (0 > ret || ret > z->l) return 0; z->c = ret; /* hop, line 33 */ } z->I[1] = z->c; /* setmark x, line 33 */ z->c = c_test; } if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */ { /* gopast */ /* non v, line 34 */ int ret = in_grouping(z, g_v, 97, 248, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 34 */ /* try, line 35 */ if (!(z->I[0] < z->I[1])) goto lab0; z->I[0] = z->I[1]; lab0: return 1; } static int r_main_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 41 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 41 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 41 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_0, 32); /* substring, line 41 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 41 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 48 */ if (ret < 0) return ret; } break; case 2: if (in_grouping_b(z, g_s_ending, 97, 229, 0)) return 0; { int ret = slice_del(z); /* delete, line 50 */ if (ret < 0) return ret; } break; } return 1; } static int r_consonant_pair(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 55 */ { int mlimit; /* setlimit, line 56 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 56 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 56 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit; return 0; } if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit; return 0; } /* substring, line 56 */ z->bra = z->c; /* ], line 56 */ z->lb = mlimit; } z->c = z->l - m_test; } if (z->c <= z->lb) return 0; z->c--; /* next, line 62 */ z->bra = z->c; /* ], line 62 */ { int ret = slice_del(z); /* delete, line 62 */ if (ret < 0) return ret; } return 1; } static int r_other_suffix(struct SN_env * z) { int among_var; { int m1 = z->l - z->c; (void)m1; /* do, line 66 */ z->ket = z->c; /* [, line 66 */ if (!(eq_s_b(z, 2, s_0))) goto lab0; z->bra = z->c; /* ], line 66 */ if (!(eq_s_b(z, 2, s_1))) goto lab0; { int ret = slice_del(z); /* delete, line 66 */ if (ret < 0) return ret; } lab0: z->c = z->l - m1; } { int mlimit; /* setlimit, line 67 */ int m2 = z->l - z->c; (void)m2; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 67 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m2; z->ket = z->c; /* [, line 67 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_2, 5); /* substring, line 67 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 67 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 70 */ if (ret < 0) return ret; } { int m3 = z->l - z->c; (void)m3; /* do, line 70 */ { int ret = r_consonant_pair(z); if (ret == 0) goto lab1; /* call consonant_pair, line 70 */ if (ret < 0) return ret; } lab1: z->c = z->l - m3; } break; case 2: { int ret = slice_from_s(z, 3, s_2); /* <-, line 72 */ if (ret < 0) return ret; } break; } return 1; } static int r_undouble(struct SN_env * z) { { int mlimit; /* setlimit, line 76 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 76 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 76 */ if (out_grouping_b(z, g_v, 97, 248, 0)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 76 */ z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */ if (z->S[0] == 0) return -1; /* -> ch, line 76 */ z->lb = mlimit; } if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */ { int ret = slice_del(z); /* delete, line 78 */ if (ret < 0) return ret; } return 1; } extern int danish_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 84 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 84 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 85 */ { int m2 = z->l - z->c; (void)m2; /* do, line 86 */ { int ret = r_main_suffix(z); if (ret == 0) goto lab1; /* call main_suffix, line 86 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 87 */ { int ret = r_consonant_pair(z); if (ret == 0) goto lab2; /* call consonant_pair, line 87 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 88 */ { int ret = r_other_suffix(z); if (ret == 0) goto lab3; /* call other_suffix, line 88 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 89 */ { int ret = r_undouble(z); if (ret == 0) goto lab4; /* call undouble, line 89 */ if (ret < 0) return ret; } lab4: z->c = z->l - m5; } z->c = z->lb; return 1; } extern struct SN_env * danish_ISO_8859_1_create_env(void) { return SN_create_env(1, 2, 0); } extern void danish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 1); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_danish.h000066400000000000000000000005021456444476200312310ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* danish_ISO_8859_1_create_env(void); extern void danish_ISO_8859_1_close_env(struct SN_env* z); extern int danish_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_dutch.c000066400000000000000000000505011456444476200310710ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int dutch_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_standard_suffix(struct SN_env * z); static int r_undouble(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_en_ending(struct SN_env * z); static int r_e_ending(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * dutch_ISO_8859_1_create_env(void); extern void dutch_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[1] = { 0xE1 }; static const symbol s_0_2[1] = { 0xE4 }; static const symbol s_0_3[1] = { 0xE9 }; static const symbol s_0_4[1] = { 0xEB }; static const symbol s_0_5[1] = { 0xED }; static const symbol s_0_6[1] = { 0xEF }; static const symbol s_0_7[1] = { 0xF3 }; static const symbol s_0_8[1] = { 0xF6 }; static const symbol s_0_9[1] = { 0xFA }; static const symbol s_0_10[1] = { 0xFC }; static const struct among a_0[11] = { /* 0 */ { 0, 0, -1, 6, 0}, /* 1 */ { 1, s_0_1, 0, 1, 0}, /* 2 */ { 1, s_0_2, 0, 1, 0}, /* 3 */ { 1, s_0_3, 0, 2, 0}, /* 4 */ { 1, s_0_4, 0, 2, 0}, /* 5 */ { 1, s_0_5, 0, 3, 0}, /* 6 */ { 1, s_0_6, 0, 3, 0}, /* 7 */ { 1, s_0_7, 0, 4, 0}, /* 8 */ { 1, s_0_8, 0, 4, 0}, /* 9 */ { 1, s_0_9, 0, 5, 0}, /* 10 */ { 1, s_0_10, 0, 5, 0} }; static const symbol s_1_1[1] = { 'I' }; static const symbol s_1_2[1] = { 'Y' }; static const struct among a_1[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 1, s_1_1, 0, 2, 0}, /* 2 */ { 1, s_1_2, 0, 1, 0} }; static const symbol s_2_0[2] = { 'd', 'd' }; static const symbol s_2_1[2] = { 'k', 'k' }; static const symbol s_2_2[2] = { 't', 't' }; static const struct among a_2[3] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 2, s_2_1, -1, -1, 0}, /* 2 */ { 2, s_2_2, -1, -1, 0} }; static const symbol s_3_0[3] = { 'e', 'n', 'e' }; static const symbol s_3_1[2] = { 's', 'e' }; static const symbol s_3_2[2] = { 'e', 'n' }; static const symbol s_3_3[5] = { 'h', 'e', 'd', 'e', 'n' }; static const symbol s_3_4[1] = { 's' }; static const struct among a_3[5] = { /* 0 */ { 3, s_3_0, -1, 2, 0}, /* 1 */ { 2, s_3_1, -1, 3, 0}, /* 2 */ { 2, s_3_2, -1, 2, 0}, /* 3 */ { 5, s_3_3, 2, 1, 0}, /* 4 */ { 1, s_3_4, -1, 3, 0} }; static const symbol s_4_0[3] = { 'e', 'n', 'd' }; static const symbol s_4_1[2] = { 'i', 'g' }; static const symbol s_4_2[3] = { 'i', 'n', 'g' }; static const symbol s_4_3[4] = { 'l', 'i', 'j', 'k' }; static const symbol s_4_4[4] = { 'b', 'a', 'a', 'r' }; static const symbol s_4_5[3] = { 'b', 'a', 'r' }; static const struct among a_4[6] = { /* 0 */ { 3, s_4_0, -1, 1, 0}, /* 1 */ { 2, s_4_1, -1, 2, 0}, /* 2 */ { 3, s_4_2, -1, 1, 0}, /* 3 */ { 4, s_4_3, -1, 3, 0}, /* 4 */ { 4, s_4_4, -1, 4, 0}, /* 5 */ { 3, s_4_5, -1, 5, 0} }; static const symbol s_5_0[2] = { 'a', 'a' }; static const symbol s_5_1[2] = { 'e', 'e' }; static const symbol s_5_2[2] = { 'o', 'o' }; static const symbol s_5_3[2] = { 'u', 'u' }; static const struct among a_5[4] = { /* 0 */ { 2, s_5_0, -1, -1, 0}, /* 1 */ { 2, s_5_1, -1, -1, 0}, /* 2 */ { 2, s_5_2, -1, -1, 0}, /* 3 */ { 2, s_5_3, -1, -1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; static const unsigned char g_v_I[] = { 1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; static const unsigned char g_v_j[] = { 17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; static const symbol s_0[] = { 'a' }; static const symbol s_1[] = { 'e' }; static const symbol s_2[] = { 'i' }; static const symbol s_3[] = { 'o' }; static const symbol s_4[] = { 'u' }; static const symbol s_5[] = { 'y' }; static const symbol s_6[] = { 'Y' }; static const symbol s_7[] = { 'i' }; static const symbol s_8[] = { 'I' }; static const symbol s_9[] = { 'y' }; static const symbol s_10[] = { 'Y' }; static const symbol s_11[] = { 'y' }; static const symbol s_12[] = { 'i' }; static const symbol s_13[] = { 'e' }; static const symbol s_14[] = { 'g', 'e', 'm' }; static const symbol s_15[] = { 'h', 'e', 'i', 'd' }; static const symbol s_16[] = { 'h', 'e', 'i', 'd' }; static const symbol s_17[] = { 'c' }; static const symbol s_18[] = { 'e', 'n' }; static const symbol s_19[] = { 'i', 'g' }; static const symbol s_20[] = { 'e' }; static const symbol s_21[] = { 'e' }; static int r_prelude(struct SN_env * z) { int among_var; { int c_test = z->c; /* test, line 42 */ while(1) { /* repeat, line 42 */ int c1 = z->c; z->bra = z->c; /* [, line 43 */ if (z->c >= z->l || z->p[z->c + 0] >> 5 != 7 || !((340306450 >> (z->p[z->c + 0] & 0x1f)) & 1)) among_var = 6; else among_var = find_among(z, a_0, 11); /* substring, line 43 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 43 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_0); /* <-, line 45 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 47 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_2); /* <-, line 49 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_3); /* <-, line 51 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_4); /* <-, line 53 */ if (ret < 0) return ret; } break; case 6: if (z->c >= z->l) goto lab0; z->c++; /* next, line 54 */ break; } continue; lab0: z->c = c1; break; } z->c = c_test; } { int c_keep = z->c; /* try, line 57 */ z->bra = z->c; /* [, line 57 */ if (!(eq_s(z, 1, s_5))) { z->c = c_keep; goto lab1; } z->ket = z->c; /* ], line 57 */ { int ret = slice_from_s(z, 1, s_6); /* <-, line 57 */ if (ret < 0) return ret; } lab1: ; } while(1) { /* repeat, line 58 */ int c2 = z->c; while(1) { /* goto, line 58 */ int c3 = z->c; if (in_grouping(z, g_v, 97, 232, 0)) goto lab3; z->bra = z->c; /* [, line 59 */ { int c4 = z->c; /* or, line 59 */ if (!(eq_s(z, 1, s_7))) goto lab5; z->ket = z->c; /* ], line 59 */ if (in_grouping(z, g_v, 97, 232, 0)) goto lab5; { int ret = slice_from_s(z, 1, s_8); /* <-, line 59 */ if (ret < 0) return ret; } goto lab4; lab5: z->c = c4; if (!(eq_s(z, 1, s_9))) goto lab3; z->ket = z->c; /* ], line 60 */ { int ret = slice_from_s(z, 1, s_10); /* <-, line 60 */ if (ret < 0) return ret; } } lab4: z->c = c3; break; lab3: z->c = c3; if (z->c >= z->l) goto lab2; z->c++; /* goto, line 58 */ } continue; lab2: z->c = c2; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; { /* gopast */ /* grouping v, line 69 */ int ret = out_grouping(z, g_v, 97, 232, 1); if (ret < 0) return 0; z->c += ret; } { /* gopast */ /* non v, line 69 */ int ret = in_grouping(z, g_v, 97, 232, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 69 */ /* try, line 70 */ if (!(z->I[0] < 3)) goto lab0; z->I[0] = 3; lab0: { /* gopast */ /* grouping v, line 71 */ int ret = out_grouping(z, g_v, 97, 232, 1); if (ret < 0) return 0; z->c += ret; } { /* gopast */ /* non v, line 71 */ int ret = in_grouping(z, g_v, 97, 232, 1); if (ret < 0) return 0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 71 */ return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 75 */ int c1 = z->c; z->bra = z->c; /* [, line 77 */ if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 89)) among_var = 3; else among_var = find_among(z, a_1, 3); /* substring, line 77 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 77 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */ if (ret < 0) return ret; } break; case 3: if (z->c >= z->l) goto lab0; z->c++; /* next, line 80 */ break; } continue; lab0: z->c = c1; break; } return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_undouble(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 91 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1050640 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_2, 3))) return 0; /* among, line 91 */ z->c = z->l - m_test; } z->ket = z->c; /* [, line 91 */ if (z->c <= z->lb) return 0; z->c--; /* next, line 91 */ z->bra = z->c; /* ], line 91 */ { int ret = slice_del(z); /* delete, line 91 */ if (ret < 0) return ret; } return 1; } static int r_e_ending(struct SN_env * z) { z->B[0] = 0; /* unset e_found, line 95 */ z->ket = z->c; /* [, line 96 */ if (!(eq_s_b(z, 1, s_13))) return 0; z->bra = z->c; /* ], line 96 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 96 */ if (ret < 0) return ret; } { int m_test = z->l - z->c; /* test, line 96 */ if (out_grouping_b(z, g_v, 97, 232, 0)) return 0; z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 96 */ if (ret < 0) return ret; } z->B[0] = 1; /* set e_found, line 97 */ { int ret = r_undouble(z); if (ret == 0) return 0; /* call undouble, line 98 */ if (ret < 0) return ret; } return 1; } static int r_en_ending(struct SN_env * z) { { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 102 */ if (ret < 0) return ret; } { int m1 = z->l - z->c; (void)m1; /* and, line 102 */ if (out_grouping_b(z, g_v, 97, 232, 0)) return 0; z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* not, line 102 */ if (!(eq_s_b(z, 3, s_14))) goto lab0; return 0; lab0: z->c = z->l - m2; } } { int ret = slice_del(z); /* delete, line 102 */ if (ret < 0) return ret; } { int ret = r_undouble(z); if (ret == 0) return 0; /* call undouble, line 103 */ if (ret < 0) return ret; } return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; { int m1 = z->l - z->c; (void)m1; /* do, line 107 */ z->ket = z->c; /* [, line 108 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((540704 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0; among_var = find_among_b(z, a_3, 5); /* substring, line 108 */ if (!(among_var)) goto lab0; z->bra = z->c; /* ], line 108 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = r_R1(z); if (ret == 0) goto lab0; /* call R1, line 110 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 4, s_15); /* <-, line 110 */ if (ret < 0) return ret; } break; case 2: { int ret = r_en_ending(z); if (ret == 0) goto lab0; /* call en_ending, line 113 */ if (ret < 0) return ret; } break; case 3: { int ret = r_R1(z); if (ret == 0) goto lab0; /* call R1, line 116 */ if (ret < 0) return ret; } if (out_grouping_b(z, g_v_j, 97, 232, 0)) goto lab0; { int ret = slice_del(z); /* delete, line 116 */ if (ret < 0) return ret; } break; } lab0: z->c = z->l - m1; } { int m2 = z->l - z->c; (void)m2; /* do, line 120 */ { int ret = r_e_ending(z); if (ret == 0) goto lab1; /* call e_ending, line 120 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 122 */ z->ket = z->c; /* [, line 122 */ if (!(eq_s_b(z, 4, s_16))) goto lab2; z->bra = z->c; /* ], line 122 */ { int ret = r_R2(z); if (ret == 0) goto lab2; /* call R2, line 122 */ if (ret < 0) return ret; } { int m4 = z->l - z->c; (void)m4; /* not, line 122 */ if (!(eq_s_b(z, 1, s_17))) goto lab3; goto lab2; lab3: z->c = z->l - m4; } { int ret = slice_del(z); /* delete, line 122 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 123 */ if (!(eq_s_b(z, 2, s_18))) goto lab2; z->bra = z->c; /* ], line 123 */ { int ret = r_en_ending(z); if (ret == 0) goto lab2; /* call en_ending, line 123 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m5 = z->l - z->c; (void)m5; /* do, line 126 */ z->ket = z->c; /* [, line 127 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((264336 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab4; among_var = find_among_b(z, a_4, 6); /* substring, line 127 */ if (!(among_var)) goto lab4; z->bra = z->c; /* ], line 127 */ switch(among_var) { case 0: goto lab4; case 1: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 129 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 129 */ if (ret < 0) return ret; } { int m6 = z->l - z->c; (void)m6; /* or, line 130 */ z->ket = z->c; /* [, line 130 */ if (!(eq_s_b(z, 2, s_19))) goto lab6; z->bra = z->c; /* ], line 130 */ { int ret = r_R2(z); if (ret == 0) goto lab6; /* call R2, line 130 */ if (ret < 0) return ret; } { int m7 = z->l - z->c; (void)m7; /* not, line 130 */ if (!(eq_s_b(z, 1, s_20))) goto lab7; goto lab6; lab7: z->c = z->l - m7; } { int ret = slice_del(z); /* delete, line 130 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m6; { int ret = r_undouble(z); if (ret == 0) goto lab4; /* call undouble, line 130 */ if (ret < 0) return ret; } } lab5: break; case 2: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 133 */ if (ret < 0) return ret; } { int m8 = z->l - z->c; (void)m8; /* not, line 133 */ if (!(eq_s_b(z, 1, s_21))) goto lab8; goto lab4; lab8: z->c = z->l - m8; } { int ret = slice_del(z); /* delete, line 133 */ if (ret < 0) return ret; } break; case 3: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 136 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 136 */ if (ret < 0) return ret; } { int ret = r_e_ending(z); if (ret == 0) goto lab4; /* call e_ending, line 136 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 139 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 139 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 142 */ if (ret < 0) return ret; } if (!(z->B[0])) goto lab4; /* Boolean test e_found, line 142 */ { int ret = slice_del(z); /* delete, line 142 */ if (ret < 0) return ret; } break; } lab4: z->c = z->l - m5; } { int m9 = z->l - z->c; (void)m9; /* do, line 146 */ if (out_grouping_b(z, g_v_I, 73, 232, 0)) goto lab9; { int m_test = z->l - z->c; /* test, line 148 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((2129954 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab9; if (!(find_among_b(z, a_5, 4))) goto lab9; /* among, line 149 */ if (out_grouping_b(z, g_v, 97, 232, 0)) goto lab9; z->c = z->l - m_test; } z->ket = z->c; /* [, line 152 */ if (z->c <= z->lb) goto lab9; z->c--; /* next, line 152 */ z->bra = z->c; /* ], line 152 */ { int ret = slice_del(z); /* delete, line 152 */ if (ret < 0) return ret; } lab9: z->c = z->l - m9; } return 1; } extern int dutch_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 159 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 159 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 160 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 160 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 161 */ { int m3 = z->l - z->c; (void)m3; /* do, line 162 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab2; /* call standard_suffix, line 162 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } z->c = z->lb; { int c4 = z->c; /* do, line 163 */ { int ret = r_postlude(z); if (ret == 0) goto lab3; /* call postlude, line 163 */ if (ret < 0) return ret; } lab3: z->c = c4; } return 1; } extern struct SN_env * dutch_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 1); } extern void dutch_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h000066400000000000000000000004771456444476200311050ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* dutch_ISO_8859_1_create_env(void); extern void dutch_ISO_8859_1_close_env(struct SN_env* z); extern int dutch_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_english.c000066400000000000000000001134661456444476200314250ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int english_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_exception2(struct SN_env * z); static int r_exception1(struct SN_env * z); static int r_Step_5(struct SN_env * z); static int r_Step_4(struct SN_env * z); static int r_Step_3(struct SN_env * z); static int r_Step_2(struct SN_env * z); static int r_Step_1c(struct SN_env * z); static int r_Step_1b(struct SN_env * z); static int r_Step_1a(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_shortv(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * english_ISO_8859_1_create_env(void); extern void english_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[5] = { 'a', 'r', 's', 'e', 'n' }; static const symbol s_0_1[6] = { 'c', 'o', 'm', 'm', 'u', 'n' }; static const symbol s_0_2[5] = { 'g', 'e', 'n', 'e', 'r' }; static const struct among a_0[3] = { /* 0 */ { 5, s_0_0, -1, -1, 0}, /* 1 */ { 6, s_0_1, -1, -1, 0}, /* 2 */ { 5, s_0_2, -1, -1, 0} }; static const symbol s_1_0[1] = { '\'' }; static const symbol s_1_1[3] = { '\'', 's', '\'' }; static const symbol s_1_2[2] = { '\'', 's' }; static const struct among a_1[3] = { /* 0 */ { 1, s_1_0, -1, 1, 0}, /* 1 */ { 3, s_1_1, 0, 1, 0}, /* 2 */ { 2, s_1_2, -1, 1, 0} }; static const symbol s_2_0[3] = { 'i', 'e', 'd' }; static const symbol s_2_1[1] = { 's' }; static const symbol s_2_2[3] = { 'i', 'e', 's' }; static const symbol s_2_3[4] = { 's', 's', 'e', 's' }; static const symbol s_2_4[2] = { 's', 's' }; static const symbol s_2_5[2] = { 'u', 's' }; static const struct among a_2[6] = { /* 0 */ { 3, s_2_0, -1, 2, 0}, /* 1 */ { 1, s_2_1, -1, 3, 0}, /* 2 */ { 3, s_2_2, 1, 2, 0}, /* 3 */ { 4, s_2_3, 1, 1, 0}, /* 4 */ { 2, s_2_4, 1, -1, 0}, /* 5 */ { 2, s_2_5, 1, -1, 0} }; static const symbol s_3_1[2] = { 'b', 'b' }; static const symbol s_3_2[2] = { 'd', 'd' }; static const symbol s_3_3[2] = { 'f', 'f' }; static const symbol s_3_4[2] = { 'g', 'g' }; static const symbol s_3_5[2] = { 'b', 'l' }; static const symbol s_3_6[2] = { 'm', 'm' }; static const symbol s_3_7[2] = { 'n', 'n' }; static const symbol s_3_8[2] = { 'p', 'p' }; static const symbol s_3_9[2] = { 'r', 'r' }; static const symbol s_3_10[2] = { 'a', 't' }; static const symbol s_3_11[2] = { 't', 't' }; static const symbol s_3_12[2] = { 'i', 'z' }; static const struct among a_3[13] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 2, s_3_1, 0, 2, 0}, /* 2 */ { 2, s_3_2, 0, 2, 0}, /* 3 */ { 2, s_3_3, 0, 2, 0}, /* 4 */ { 2, s_3_4, 0, 2, 0}, /* 5 */ { 2, s_3_5, 0, 1, 0}, /* 6 */ { 2, s_3_6, 0, 2, 0}, /* 7 */ { 2, s_3_7, 0, 2, 0}, /* 8 */ { 2, s_3_8, 0, 2, 0}, /* 9 */ { 2, s_3_9, 0, 2, 0}, /* 10 */ { 2, s_3_10, 0, 1, 0}, /* 11 */ { 2, s_3_11, 0, 2, 0}, /* 12 */ { 2, s_3_12, 0, 1, 0} }; static const symbol s_4_0[2] = { 'e', 'd' }; static const symbol s_4_1[3] = { 'e', 'e', 'd' }; static const symbol s_4_2[3] = { 'i', 'n', 'g' }; static const symbol s_4_3[4] = { 'e', 'd', 'l', 'y' }; static const symbol s_4_4[5] = { 'e', 'e', 'd', 'l', 'y' }; static const symbol s_4_5[5] = { 'i', 'n', 'g', 'l', 'y' }; static const struct among a_4[6] = { /* 0 */ { 2, s_4_0, -1, 2, 0}, /* 1 */ { 3, s_4_1, 0, 1, 0}, /* 2 */ { 3, s_4_2, -1, 2, 0}, /* 3 */ { 4, s_4_3, -1, 2, 0}, /* 4 */ { 5, s_4_4, 3, 1, 0}, /* 5 */ { 5, s_4_5, -1, 2, 0} }; static const symbol s_5_0[4] = { 'a', 'n', 'c', 'i' }; static const symbol s_5_1[4] = { 'e', 'n', 'c', 'i' }; static const symbol s_5_2[3] = { 'o', 'g', 'i' }; static const symbol s_5_3[2] = { 'l', 'i' }; static const symbol s_5_4[3] = { 'b', 'l', 'i' }; static const symbol s_5_5[4] = { 'a', 'b', 'l', 'i' }; static const symbol s_5_6[4] = { 'a', 'l', 'l', 'i' }; static const symbol s_5_7[5] = { 'f', 'u', 'l', 'l', 'i' }; static const symbol s_5_8[6] = { 'l', 'e', 's', 's', 'l', 'i' }; static const symbol s_5_9[5] = { 'o', 'u', 's', 'l', 'i' }; static const symbol s_5_10[5] = { 'e', 'n', 't', 'l', 'i' }; static const symbol s_5_11[5] = { 'a', 'l', 'i', 't', 'i' }; static const symbol s_5_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' }; static const symbol s_5_13[5] = { 'i', 'v', 'i', 't', 'i' }; static const symbol s_5_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_5_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_5_16[5] = { 'a', 'l', 'i', 's', 'm' }; static const symbol s_5_17[5] = { 'a', 't', 'i', 'o', 'n' }; static const symbol s_5_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' }; static const symbol s_5_19[4] = { 'i', 'z', 'e', 'r' }; static const symbol s_5_20[4] = { 'a', 't', 'o', 'r' }; static const symbol s_5_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' }; static const symbol s_5_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' }; static const symbol s_5_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' }; static const struct among a_5[24] = { /* 0 */ { 4, s_5_0, -1, 3, 0}, /* 1 */ { 4, s_5_1, -1, 2, 0}, /* 2 */ { 3, s_5_2, -1, 13, 0}, /* 3 */ { 2, s_5_3, -1, 16, 0}, /* 4 */ { 3, s_5_4, 3, 12, 0}, /* 5 */ { 4, s_5_5, 4, 4, 0}, /* 6 */ { 4, s_5_6, 3, 8, 0}, /* 7 */ { 5, s_5_7, 3, 14, 0}, /* 8 */ { 6, s_5_8, 3, 15, 0}, /* 9 */ { 5, s_5_9, 3, 10, 0}, /* 10 */ { 5, s_5_10, 3, 5, 0}, /* 11 */ { 5, s_5_11, -1, 8, 0}, /* 12 */ { 6, s_5_12, -1, 12, 0}, /* 13 */ { 5, s_5_13, -1, 11, 0}, /* 14 */ { 6, s_5_14, -1, 1, 0}, /* 15 */ { 7, s_5_15, 14, 7, 0}, /* 16 */ { 5, s_5_16, -1, 8, 0}, /* 17 */ { 5, s_5_17, -1, 7, 0}, /* 18 */ { 7, s_5_18, 17, 6, 0}, /* 19 */ { 4, s_5_19, -1, 6, 0}, /* 20 */ { 4, s_5_20, -1, 7, 0}, /* 21 */ { 7, s_5_21, -1, 11, 0}, /* 22 */ { 7, s_5_22, -1, 9, 0}, /* 23 */ { 7, s_5_23, -1, 10, 0} }; static const symbol s_6_0[5] = { 'i', 'c', 'a', 't', 'e' }; static const symbol s_6_1[5] = { 'a', 't', 'i', 'v', 'e' }; static const symbol s_6_2[5] = { 'a', 'l', 'i', 'z', 'e' }; static const symbol s_6_3[5] = { 'i', 'c', 'i', 't', 'i' }; static const symbol s_6_4[4] = { 'i', 'c', 'a', 'l' }; static const symbol s_6_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_6_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_6_7[3] = { 'f', 'u', 'l' }; static const symbol s_6_8[4] = { 'n', 'e', 's', 's' }; static const struct among a_6[9] = { /* 0 */ { 5, s_6_0, -1, 4, 0}, /* 1 */ { 5, s_6_1, -1, 6, 0}, /* 2 */ { 5, s_6_2, -1, 3, 0}, /* 3 */ { 5, s_6_3, -1, 4, 0}, /* 4 */ { 4, s_6_4, -1, 4, 0}, /* 5 */ { 6, s_6_5, -1, 1, 0}, /* 6 */ { 7, s_6_6, 5, 2, 0}, /* 7 */ { 3, s_6_7, -1, 5, 0}, /* 8 */ { 4, s_6_8, -1, 5, 0} }; static const symbol s_7_0[2] = { 'i', 'c' }; static const symbol s_7_1[4] = { 'a', 'n', 'c', 'e' }; static const symbol s_7_2[4] = { 'e', 'n', 'c', 'e' }; static const symbol s_7_3[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_7_4[4] = { 'i', 'b', 'l', 'e' }; static const symbol s_7_5[3] = { 'a', 't', 'e' }; static const symbol s_7_6[3] = { 'i', 'v', 'e' }; static const symbol s_7_7[3] = { 'i', 'z', 'e' }; static const symbol s_7_8[3] = { 'i', 't', 'i' }; static const symbol s_7_9[2] = { 'a', 'l' }; static const symbol s_7_10[3] = { 'i', 's', 'm' }; static const symbol s_7_11[3] = { 'i', 'o', 'n' }; static const symbol s_7_12[2] = { 'e', 'r' }; static const symbol s_7_13[3] = { 'o', 'u', 's' }; static const symbol s_7_14[3] = { 'a', 'n', 't' }; static const symbol s_7_15[3] = { 'e', 'n', 't' }; static const symbol s_7_16[4] = { 'm', 'e', 'n', 't' }; static const symbol s_7_17[5] = { 'e', 'm', 'e', 'n', 't' }; static const struct among a_7[18] = { /* 0 */ { 2, s_7_0, -1, 1, 0}, /* 1 */ { 4, s_7_1, -1, 1, 0}, /* 2 */ { 4, s_7_2, -1, 1, 0}, /* 3 */ { 4, s_7_3, -1, 1, 0}, /* 4 */ { 4, s_7_4, -1, 1, 0}, /* 5 */ { 3, s_7_5, -1, 1, 0}, /* 6 */ { 3, s_7_6, -1, 1, 0}, /* 7 */ { 3, s_7_7, -1, 1, 0}, /* 8 */ { 3, s_7_8, -1, 1, 0}, /* 9 */ { 2, s_7_9, -1, 1, 0}, /* 10 */ { 3, s_7_10, -1, 1, 0}, /* 11 */ { 3, s_7_11, -1, 2, 0}, /* 12 */ { 2, s_7_12, -1, 1, 0}, /* 13 */ { 3, s_7_13, -1, 1, 0}, /* 14 */ { 3, s_7_14, -1, 1, 0}, /* 15 */ { 3, s_7_15, -1, 1, 0}, /* 16 */ { 4, s_7_16, 15, 1, 0}, /* 17 */ { 5, s_7_17, 16, 1, 0} }; static const symbol s_8_0[1] = { 'e' }; static const symbol s_8_1[1] = { 'l' }; static const struct among a_8[2] = { /* 0 */ { 1, s_8_0, -1, 1, 0}, /* 1 */ { 1, s_8_1, -1, 2, 0} }; static const symbol s_9_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' }; static const symbol s_9_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' }; static const symbol s_9_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' }; static const symbol s_9_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' }; static const symbol s_9_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' }; static const symbol s_9_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' }; static const symbol s_9_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' }; static const symbol s_9_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' }; static const struct among a_9[8] = { /* 0 */ { 7, s_9_0, -1, -1, 0}, /* 1 */ { 7, s_9_1, -1, -1, 0}, /* 2 */ { 6, s_9_2, -1, -1, 0}, /* 3 */ { 7, s_9_3, -1, -1, 0}, /* 4 */ { 6, s_9_4, -1, -1, 0}, /* 5 */ { 7, s_9_5, -1, -1, 0}, /* 6 */ { 7, s_9_6, -1, -1, 0}, /* 7 */ { 6, s_9_7, -1, -1, 0} }; static const symbol s_10_0[5] = { 'a', 'n', 'd', 'e', 's' }; static const symbol s_10_1[5] = { 'a', 't', 'l', 'a', 's' }; static const symbol s_10_2[4] = { 'b', 'i', 'a', 's' }; static const symbol s_10_3[6] = { 'c', 'o', 's', 'm', 'o', 's' }; static const symbol s_10_4[5] = { 'd', 'y', 'i', 'n', 'g' }; static const symbol s_10_5[5] = { 'e', 'a', 'r', 'l', 'y' }; static const symbol s_10_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' }; static const symbol s_10_7[4] = { 'h', 'o', 'w', 'e' }; static const symbol s_10_8[4] = { 'i', 'd', 'l', 'y' }; static const symbol s_10_9[5] = { 'l', 'y', 'i', 'n', 'g' }; static const symbol s_10_10[4] = { 'n', 'e', 'w', 's' }; static const symbol s_10_11[4] = { 'o', 'n', 'l', 'y' }; static const symbol s_10_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' }; static const symbol s_10_13[5] = { 's', 'k', 'i', 'e', 's' }; static const symbol s_10_14[4] = { 's', 'k', 'i', 's' }; static const symbol s_10_15[3] = { 's', 'k', 'y' }; static const symbol s_10_16[5] = { 't', 'y', 'i', 'n', 'g' }; static const symbol s_10_17[4] = { 'u', 'g', 'l', 'y' }; static const struct among a_10[18] = { /* 0 */ { 5, s_10_0, -1, -1, 0}, /* 1 */ { 5, s_10_1, -1, -1, 0}, /* 2 */ { 4, s_10_2, -1, -1, 0}, /* 3 */ { 6, s_10_3, -1, -1, 0}, /* 4 */ { 5, s_10_4, -1, 3, 0}, /* 5 */ { 5, s_10_5, -1, 9, 0}, /* 6 */ { 6, s_10_6, -1, 7, 0}, /* 7 */ { 4, s_10_7, -1, -1, 0}, /* 8 */ { 4, s_10_8, -1, 6, 0}, /* 9 */ { 5, s_10_9, -1, 4, 0}, /* 10 */ { 4, s_10_10, -1, -1, 0}, /* 11 */ { 4, s_10_11, -1, 10, 0}, /* 12 */ { 6, s_10_12, -1, 11, 0}, /* 13 */ { 5, s_10_13, -1, 2, 0}, /* 14 */ { 4, s_10_14, -1, 1, 0}, /* 15 */ { 3, s_10_15, -1, -1, 0}, /* 16 */ { 5, s_10_16, -1, 5, 0}, /* 17 */ { 4, s_10_17, -1, 8, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1 }; static const unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 }; static const unsigned char g_valid_LI[] = { 55, 141, 2 }; static const symbol s_0[] = { '\'' }; static const symbol s_1[] = { 'y' }; static const symbol s_2[] = { 'Y' }; static const symbol s_3[] = { 'y' }; static const symbol s_4[] = { 'Y' }; static const symbol s_5[] = { 's', 's' }; static const symbol s_6[] = { 'i' }; static const symbol s_7[] = { 'i', 'e' }; static const symbol s_8[] = { 'e', 'e' }; static const symbol s_9[] = { 'e' }; static const symbol s_10[] = { 'e' }; static const symbol s_11[] = { 'y' }; static const symbol s_12[] = { 'Y' }; static const symbol s_13[] = { 'i' }; static const symbol s_14[] = { 't', 'i', 'o', 'n' }; static const symbol s_15[] = { 'e', 'n', 'c', 'e' }; static const symbol s_16[] = { 'a', 'n', 'c', 'e' }; static const symbol s_17[] = { 'a', 'b', 'l', 'e' }; static const symbol s_18[] = { 'e', 'n', 't' }; static const symbol s_19[] = { 'i', 'z', 'e' }; static const symbol s_20[] = { 'a', 't', 'e' }; static const symbol s_21[] = { 'a', 'l' }; static const symbol s_22[] = { 'f', 'u', 'l' }; static const symbol s_23[] = { 'o', 'u', 's' }; static const symbol s_24[] = { 'i', 'v', 'e' }; static const symbol s_25[] = { 'b', 'l', 'e' }; static const symbol s_26[] = { 'l' }; static const symbol s_27[] = { 'o', 'g' }; static const symbol s_28[] = { 'f', 'u', 'l' }; static const symbol s_29[] = { 'l', 'e', 's', 's' }; static const symbol s_30[] = { 't', 'i', 'o', 'n' }; static const symbol s_31[] = { 'a', 't', 'e' }; static const symbol s_32[] = { 'a', 'l' }; static const symbol s_33[] = { 'i', 'c' }; static const symbol s_34[] = { 's' }; static const symbol s_35[] = { 't' }; static const symbol s_36[] = { 'l' }; static const symbol s_37[] = { 's', 'k', 'i' }; static const symbol s_38[] = { 's', 'k', 'y' }; static const symbol s_39[] = { 'd', 'i', 'e' }; static const symbol s_40[] = { 'l', 'i', 'e' }; static const symbol s_41[] = { 't', 'i', 'e' }; static const symbol s_42[] = { 'i', 'd', 'l' }; static const symbol s_43[] = { 'g', 'e', 'n', 't', 'l' }; static const symbol s_44[] = { 'u', 'g', 'l', 'i' }; static const symbol s_45[] = { 'e', 'a', 'r', 'l', 'i' }; static const symbol s_46[] = { 'o', 'n', 'l', 'i' }; static const symbol s_47[] = { 's', 'i', 'n', 'g', 'l' }; static const symbol s_48[] = { 'Y' }; static const symbol s_49[] = { 'y' }; static int r_prelude(struct SN_env * z) { z->B[0] = 0; /* unset Y_found, line 26 */ { int c1 = z->c; /* do, line 27 */ z->bra = z->c; /* [, line 27 */ if (!(eq_s(z, 1, s_0))) goto lab0; z->ket = z->c; /* ], line 27 */ { int ret = slice_del(z); /* delete, line 27 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 28 */ z->bra = z->c; /* [, line 28 */ if (!(eq_s(z, 1, s_1))) goto lab1; z->ket = z->c; /* ], line 28 */ { int ret = slice_from_s(z, 1, s_2); /* <-, line 28 */ if (ret < 0) return ret; } z->B[0] = 1; /* set Y_found, line 28 */ lab1: z->c = c2; } { int c3 = z->c; /* do, line 29 */ while(1) { /* repeat, line 29 */ int c4 = z->c; while(1) { /* goto, line 29 */ int c5 = z->c; if (in_grouping(z, g_v, 97, 121, 0)) goto lab4; z->bra = z->c; /* [, line 29 */ if (!(eq_s(z, 1, s_3))) goto lab4; z->ket = z->c; /* ], line 29 */ z->c = c5; break; lab4: z->c = c5; if (z->c >= z->l) goto lab3; z->c++; /* goto, line 29 */ } { int ret = slice_from_s(z, 1, s_4); /* <-, line 29 */ if (ret < 0) return ret; } z->B[0] = 1; /* set Y_found, line 29 */ continue; lab3: z->c = c4; break; } z->c = c3; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; { int c1 = z->c; /* do, line 35 */ { int c2 = z->c; /* or, line 41 */ if (z->c + 4 >= z->l || z->p[z->c + 4] >> 5 != 3 || !((2375680 >> (z->p[z->c + 4] & 0x1f)) & 1)) goto lab2; if (!(find_among(z, a_0, 3))) goto lab2; /* among, line 36 */ goto lab1; lab2: z->c = c2; { /* gopast */ /* grouping v, line 41 */ int ret = out_grouping(z, g_v, 97, 121, 1); if (ret < 0) goto lab0; z->c += ret; } { /* gopast */ /* non v, line 41 */ int ret = in_grouping(z, g_v, 97, 121, 1); if (ret < 0) goto lab0; z->c += ret; } } lab1: z->I[0] = z->c; /* setmark p1, line 42 */ { /* gopast */ /* grouping v, line 43 */ int ret = out_grouping(z, g_v, 97, 121, 1); if (ret < 0) goto lab0; z->c += ret; } { /* gopast */ /* non v, line 43 */ int ret = in_grouping(z, g_v, 97, 121, 1); if (ret < 0) goto lab0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 43 */ lab0: z->c = c1; } return 1; } static int r_shortv(struct SN_env * z) { { int m1 = z->l - z->c; (void)m1; /* or, line 51 */ if (out_grouping_b(z, g_v_WXY, 89, 121, 0)) goto lab1; if (in_grouping_b(z, g_v, 97, 121, 0)) goto lab1; if (out_grouping_b(z, g_v, 97, 121, 0)) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (out_grouping_b(z, g_v, 97, 121, 0)) return 0; if (in_grouping_b(z, g_v, 97, 121, 0)) return 0; if (z->c > z->lb) return 0; /* atlimit, line 52 */ } lab0: return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_Step_1a(struct SN_env * z) { int among_var; { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 59 */ z->ket = z->c; /* [, line 60 */ if (z->c <= z->lb || (z->p[z->c - 1] != 39 && z->p[z->c - 1] != 115)) { z->c = z->l - m_keep; goto lab0; } among_var = find_among_b(z, a_1, 3); /* substring, line 60 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 60 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab0; } case 1: { int ret = slice_del(z); /* delete, line 62 */ if (ret < 0) return ret; } break; } lab0: ; } z->ket = z->c; /* [, line 65 */ if (z->c <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 115)) return 0; among_var = find_among_b(z, a_2, 6); /* substring, line 65 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 65 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 2, s_5); /* <-, line 66 */ if (ret < 0) return ret; } break; case 2: { int m1 = z->l - z->c; (void)m1; /* or, line 68 */ { int ret = z->c - 2; if (z->lb > ret || ret > z->l) goto lab2; z->c = ret; /* hop, line 68 */ } { int ret = slice_from_s(z, 1, s_6); /* <-, line 68 */ if (ret < 0) return ret; } goto lab1; lab2: z->c = z->l - m1; { int ret = slice_from_s(z, 2, s_7); /* <-, line 68 */ if (ret < 0) return ret; } } lab1: break; case 3: if (z->c <= z->lb) return 0; z->c--; /* next, line 69 */ { /* gopast */ /* grouping v, line 69 */ int ret = out_grouping_b(z, g_v, 97, 121, 1); if (ret < 0) return 0; z->c -= ret; } { int ret = slice_del(z); /* delete, line 69 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_1b(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 75 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((33554576 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_4, 6); /* substring, line 75 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 75 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 77 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 2, s_8); /* <-, line 77 */ if (ret < 0) return ret; } break; case 2: { int m_test = z->l - z->c; /* test, line 80 */ { /* gopast */ /* grouping v, line 80 */ int ret = out_grouping_b(z, g_v, 97, 121, 1); if (ret < 0) return 0; z->c -= ret; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 80 */ if (ret < 0) return ret; } { int m_test = z->l - z->c; /* test, line 81 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68514004 >> (z->p[z->c - 1] & 0x1f)) & 1)) among_var = 3; else among_var = find_among_b(z, a_3, 13); /* substring, line 81 */ if (!(among_var)) return 0; z->c = z->l - m_test; } switch(among_var) { case 0: return 0; case 1: { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_9); /* <+, line 83 */ z->c = c_keep; if (ret < 0) return ret; } break; case 2: z->ket = z->c; /* [, line 86 */ if (z->c <= z->lb) return 0; z->c--; /* next, line 86 */ z->bra = z->c; /* ], line 86 */ { int ret = slice_del(z); /* delete, line 86 */ if (ret < 0) return ret; } break; case 3: if (z->c != z->I[0]) return 0; /* atmark, line 87 */ { int m_test = z->l - z->c; /* test, line 87 */ { int ret = r_shortv(z); if (ret == 0) return 0; /* call shortv, line 87 */ if (ret < 0) return ret; } z->c = z->l - m_test; } { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_10); /* <+, line 87 */ z->c = c_keep; if (ret < 0) return ret; } break; } break; } return 1; } static int r_Step_1c(struct SN_env * z) { z->ket = z->c; /* [, line 94 */ { int m1 = z->l - z->c; (void)m1; /* or, line 94 */ if (!(eq_s_b(z, 1, s_11))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_12))) return 0; } lab0: z->bra = z->c; /* ], line 94 */ if (out_grouping_b(z, g_v, 97, 121, 0)) return 0; { int m2 = z->l - z->c; (void)m2; /* not, line 95 */ if (z->c > z->lb) goto lab2; /* atlimit, line 95 */ return 0; lab2: z->c = z->l - m2; } { int ret = slice_from_s(z, 1, s_13); /* <-, line 96 */ if (ret < 0) return ret; } return 1; } static int r_Step_2(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 100 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((815616 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_5, 24); /* substring, line 100 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 100 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 100 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 4, s_14); /* <-, line 101 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 4, s_15); /* <-, line 102 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 4, s_16); /* <-, line 103 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 4, s_17); /* <-, line 104 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 3, s_18); /* <-, line 105 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 3, s_19); /* <-, line 107 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 3, s_20); /* <-, line 109 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 2, s_21); /* <-, line 111 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_from_s(z, 3, s_22); /* <-, line 112 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_from_s(z, 3, s_23); /* <-, line 114 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_from_s(z, 3, s_24); /* <-, line 116 */ if (ret < 0) return ret; } break; case 12: { int ret = slice_from_s(z, 3, s_25); /* <-, line 118 */ if (ret < 0) return ret; } break; case 13: if (!(eq_s_b(z, 1, s_26))) return 0; { int ret = slice_from_s(z, 2, s_27); /* <-, line 119 */ if (ret < 0) return ret; } break; case 14: { int ret = slice_from_s(z, 3, s_28); /* <-, line 120 */ if (ret < 0) return ret; } break; case 15: { int ret = slice_from_s(z, 4, s_29); /* <-, line 121 */ if (ret < 0) return ret; } break; case 16: if (in_grouping_b(z, g_valid_LI, 99, 116, 0)) return 0; { int ret = slice_del(z); /* delete, line 122 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_3(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 127 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((528928 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_6, 9); /* substring, line 127 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 127 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 127 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 4, s_30); /* <-, line 128 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 3, s_31); /* <-, line 129 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 2, s_32); /* <-, line 130 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 2, s_33); /* <-, line 132 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } break; case 6: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 136 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 136 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_4(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 141 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1864232 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_7, 18); /* substring, line 141 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 141 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 141 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 144 */ if (ret < 0) return ret; } break; case 2: { int m1 = z->l - z->c; (void)m1; /* or, line 145 */ if (!(eq_s_b(z, 1, s_34))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_35))) return 0; } lab0: { int ret = slice_del(z); /* delete, line 145 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_5(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 150 */ if (z->c <= z->lb || (z->p[z->c - 1] != 101 && z->p[z->c - 1] != 108)) return 0; among_var = find_among_b(z, a_8, 2); /* substring, line 150 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 150 */ switch(among_var) { case 0: return 0; case 1: { int m1 = z->l - z->c; (void)m1; /* or, line 151 */ { int ret = r_R2(z); if (ret == 0) goto lab1; /* call R2, line 151 */ if (ret < 0) return ret; } goto lab0; lab1: z->c = z->l - m1; { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 151 */ if (ret < 0) return ret; } { int m2 = z->l - z->c; (void)m2; /* not, line 151 */ { int ret = r_shortv(z); if (ret == 0) goto lab2; /* call shortv, line 151 */ if (ret < 0) return ret; } return 0; lab2: z->c = z->l - m2; } } lab0: { int ret = slice_del(z); /* delete, line 151 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 152 */ if (ret < 0) return ret; } if (!(eq_s_b(z, 1, s_36))) return 0; { int ret = slice_del(z); /* delete, line 152 */ if (ret < 0) return ret; } break; } return 1; } static int r_exception2(struct SN_env * z) { z->ket = z->c; /* [, line 158 */ if (z->c - 5 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 103)) return 0; if (!(find_among_b(z, a_9, 8))) return 0; /* substring, line 158 */ z->bra = z->c; /* ], line 158 */ if (z->c > z->lb) return 0; /* atlimit, line 158 */ return 1; } static int r_exception1(struct SN_env * z) { int among_var; z->bra = z->c; /* [, line 170 */ if (z->c + 2 >= z->l || z->p[z->c + 2] >> 5 != 3 || !((42750482 >> (z->p[z->c + 2] & 0x1f)) & 1)) return 0; among_var = find_among(z, a_10, 18); /* substring, line 170 */ if (!(among_var)) return 0; z->ket = z->c; /* ], line 170 */ if (z->c < z->l) return 0; /* atlimit, line 170 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 3, s_37); /* <-, line 174 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 3, s_38); /* <-, line 175 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 3, s_39); /* <-, line 176 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 3, s_40); /* <-, line 177 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 3, s_41); /* <-, line 178 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 3, s_42); /* <-, line 182 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 5, s_43); /* <-, line 183 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 4, s_44); /* <-, line 184 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_from_s(z, 5, s_45); /* <-, line 185 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_from_s(z, 4, s_46); /* <-, line 186 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_from_s(z, 5, s_47); /* <-, line 187 */ if (ret < 0) return ret; } break; } return 1; } static int r_postlude(struct SN_env * z) { if (!(z->B[0])) return 0; /* Boolean test Y_found, line 203 */ while(1) { /* repeat, line 203 */ int c1 = z->c; while(1) { /* goto, line 203 */ int c2 = z->c; z->bra = z->c; /* [, line 203 */ if (!(eq_s(z, 1, s_48))) goto lab1; z->ket = z->c; /* ], line 203 */ z->c = c2; break; lab1: z->c = c2; if (z->c >= z->l) goto lab0; z->c++; /* goto, line 203 */ } { int ret = slice_from_s(z, 1, s_49); /* <-, line 203 */ if (ret < 0) return ret; } continue; lab0: z->c = c1; break; } return 1; } extern int english_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* or, line 207 */ { int ret = r_exception1(z); if (ret == 0) goto lab1; /* call exception1, line 207 */ if (ret < 0) return ret; } goto lab0; lab1: z->c = c1; { int c2 = z->c; /* not, line 208 */ { int ret = z->c + 3; if (0 > ret || ret > z->l) goto lab3; z->c = ret; /* hop, line 208 */ } goto lab2; lab3: z->c = c2; } goto lab0; lab2: z->c = c1; { int c3 = z->c; /* do, line 209 */ { int ret = r_prelude(z); if (ret == 0) goto lab4; /* call prelude, line 209 */ if (ret < 0) return ret; } lab4: z->c = c3; } { int c4 = z->c; /* do, line 210 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab5; /* call mark_regions, line 210 */ if (ret < 0) return ret; } lab5: z->c = c4; } z->lb = z->c; z->c = z->l; /* backwards, line 211 */ { int m5 = z->l - z->c; (void)m5; /* do, line 213 */ { int ret = r_Step_1a(z); if (ret == 0) goto lab6; /* call Step_1a, line 213 */ if (ret < 0) return ret; } lab6: z->c = z->l - m5; } { int m6 = z->l - z->c; (void)m6; /* or, line 215 */ { int ret = r_exception2(z); if (ret == 0) goto lab8; /* call exception2, line 215 */ if (ret < 0) return ret; } goto lab7; lab8: z->c = z->l - m6; { int m7 = z->l - z->c; (void)m7; /* do, line 217 */ { int ret = r_Step_1b(z); if (ret == 0) goto lab9; /* call Step_1b, line 217 */ if (ret < 0) return ret; } lab9: z->c = z->l - m7; } { int m8 = z->l - z->c; (void)m8; /* do, line 218 */ { int ret = r_Step_1c(z); if (ret == 0) goto lab10; /* call Step_1c, line 218 */ if (ret < 0) return ret; } lab10: z->c = z->l - m8; } { int m9 = z->l - z->c; (void)m9; /* do, line 220 */ { int ret = r_Step_2(z); if (ret == 0) goto lab11; /* call Step_2, line 220 */ if (ret < 0) return ret; } lab11: z->c = z->l - m9; } { int m10 = z->l - z->c; (void)m10; /* do, line 221 */ { int ret = r_Step_3(z); if (ret == 0) goto lab12; /* call Step_3, line 221 */ if (ret < 0) return ret; } lab12: z->c = z->l - m10; } { int m11 = z->l - z->c; (void)m11; /* do, line 222 */ { int ret = r_Step_4(z); if (ret == 0) goto lab13; /* call Step_4, line 222 */ if (ret < 0) return ret; } lab13: z->c = z->l - m11; } { int m12 = z->l - z->c; (void)m12; /* do, line 224 */ { int ret = r_Step_5(z); if (ret == 0) goto lab14; /* call Step_5, line 224 */ if (ret < 0) return ret; } lab14: z->c = z->l - m12; } } lab7: z->c = z->lb; { int c13 = z->c; /* do, line 227 */ { int ret = r_postlude(z); if (ret == 0) goto lab15; /* call postlude, line 227 */ if (ret < 0) return ret; } lab15: z->c = c13; } } lab0: return 1; } extern struct SN_env * english_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 1); } extern void english_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_english.h000066400000000000000000000005051456444476200314170ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* english_ISO_8859_1_create_env(void); extern void english_ISO_8859_1_close_env(struct SN_env* z); extern int english_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_finnish.c000066400000000000000000000623641456444476200314320ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int finnish_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_tidy(struct SN_env * z); static int r_other_endings(struct SN_env * z); static int r_t_plural(struct SN_env * z); static int r_i_plural(struct SN_env * z); static int r_case_ending(struct SN_env * z); static int r_VI(struct SN_env * z); static int r_LONG(struct SN_env * z); static int r_possessive(struct SN_env * z); static int r_particle_etc(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * finnish_ISO_8859_1_create_env(void); extern void finnish_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[2] = { 'p', 'a' }; static const symbol s_0_1[3] = { 's', 't', 'i' }; static const symbol s_0_2[4] = { 'k', 'a', 'a', 'n' }; static const symbol s_0_3[3] = { 'h', 'a', 'n' }; static const symbol s_0_4[3] = { 'k', 'i', 'n' }; static const symbol s_0_5[3] = { 'h', 0xE4, 'n' }; static const symbol s_0_6[4] = { 'k', 0xE4, 0xE4, 'n' }; static const symbol s_0_7[2] = { 'k', 'o' }; static const symbol s_0_8[2] = { 'p', 0xE4 }; static const symbol s_0_9[2] = { 'k', 0xF6 }; static const struct among a_0[10] = { /* 0 */ { 2, s_0_0, -1, 1, 0}, /* 1 */ { 3, s_0_1, -1, 2, 0}, /* 2 */ { 4, s_0_2, -1, 1, 0}, /* 3 */ { 3, s_0_3, -1, 1, 0}, /* 4 */ { 3, s_0_4, -1, 1, 0}, /* 5 */ { 3, s_0_5, -1, 1, 0}, /* 6 */ { 4, s_0_6, -1, 1, 0}, /* 7 */ { 2, s_0_7, -1, 1, 0}, /* 8 */ { 2, s_0_8, -1, 1, 0}, /* 9 */ { 2, s_0_9, -1, 1, 0} }; static const symbol s_1_0[3] = { 'l', 'l', 'a' }; static const symbol s_1_1[2] = { 'n', 'a' }; static const symbol s_1_2[3] = { 's', 's', 'a' }; static const symbol s_1_3[2] = { 't', 'a' }; static const symbol s_1_4[3] = { 'l', 't', 'a' }; static const symbol s_1_5[3] = { 's', 't', 'a' }; static const struct among a_1[6] = { /* 0 */ { 3, s_1_0, -1, -1, 0}, /* 1 */ { 2, s_1_1, -1, -1, 0}, /* 2 */ { 3, s_1_2, -1, -1, 0}, /* 3 */ { 2, s_1_3, -1, -1, 0}, /* 4 */ { 3, s_1_4, 3, -1, 0}, /* 5 */ { 3, s_1_5, 3, -1, 0} }; static const symbol s_2_0[3] = { 'l', 'l', 0xE4 }; static const symbol s_2_1[2] = { 'n', 0xE4 }; static const symbol s_2_2[3] = { 's', 's', 0xE4 }; static const symbol s_2_3[2] = { 't', 0xE4 }; static const symbol s_2_4[3] = { 'l', 't', 0xE4 }; static const symbol s_2_5[3] = { 's', 't', 0xE4 }; static const struct among a_2[6] = { /* 0 */ { 3, s_2_0, -1, -1, 0}, /* 1 */ { 2, s_2_1, -1, -1, 0}, /* 2 */ { 3, s_2_2, -1, -1, 0}, /* 3 */ { 2, s_2_3, -1, -1, 0}, /* 4 */ { 3, s_2_4, 3, -1, 0}, /* 5 */ { 3, s_2_5, 3, -1, 0} }; static const symbol s_3_0[3] = { 'l', 'l', 'e' }; static const symbol s_3_1[3] = { 'i', 'n', 'e' }; static const struct among a_3[2] = { /* 0 */ { 3, s_3_0, -1, -1, 0}, /* 1 */ { 3, s_3_1, -1, -1, 0} }; static const symbol s_4_0[3] = { 'n', 's', 'a' }; static const symbol s_4_1[3] = { 'm', 'm', 'e' }; static const symbol s_4_2[3] = { 'n', 'n', 'e' }; static const symbol s_4_3[2] = { 'n', 'i' }; static const symbol s_4_4[2] = { 's', 'i' }; static const symbol s_4_5[2] = { 'a', 'n' }; static const symbol s_4_6[2] = { 'e', 'n' }; static const symbol s_4_7[2] = { 0xE4, 'n' }; static const symbol s_4_8[3] = { 'n', 's', 0xE4 }; static const struct among a_4[9] = { /* 0 */ { 3, s_4_0, -1, 3, 0}, /* 1 */ { 3, s_4_1, -1, 3, 0}, /* 2 */ { 3, s_4_2, -1, 3, 0}, /* 3 */ { 2, s_4_3, -1, 2, 0}, /* 4 */ { 2, s_4_4, -1, 1, 0}, /* 5 */ { 2, s_4_5, -1, 4, 0}, /* 6 */ { 2, s_4_6, -1, 6, 0}, /* 7 */ { 2, s_4_7, -1, 5, 0}, /* 8 */ { 3, s_4_8, -1, 3, 0} }; static const symbol s_5_0[2] = { 'a', 'a' }; static const symbol s_5_1[2] = { 'e', 'e' }; static const symbol s_5_2[2] = { 'i', 'i' }; static const symbol s_5_3[2] = { 'o', 'o' }; static const symbol s_5_4[2] = { 'u', 'u' }; static const symbol s_5_5[2] = { 0xE4, 0xE4 }; static const symbol s_5_6[2] = { 0xF6, 0xF6 }; static const struct among a_5[7] = { /* 0 */ { 2, s_5_0, -1, -1, 0}, /* 1 */ { 2, s_5_1, -1, -1, 0}, /* 2 */ { 2, s_5_2, -1, -1, 0}, /* 3 */ { 2, s_5_3, -1, -1, 0}, /* 4 */ { 2, s_5_4, -1, -1, 0}, /* 5 */ { 2, s_5_5, -1, -1, 0}, /* 6 */ { 2, s_5_6, -1, -1, 0} }; static const symbol s_6_0[1] = { 'a' }; static const symbol s_6_1[3] = { 'l', 'l', 'a' }; static const symbol s_6_2[2] = { 'n', 'a' }; static const symbol s_6_3[3] = { 's', 's', 'a' }; static const symbol s_6_4[2] = { 't', 'a' }; static const symbol s_6_5[3] = { 'l', 't', 'a' }; static const symbol s_6_6[3] = { 's', 't', 'a' }; static const symbol s_6_7[3] = { 't', 't', 'a' }; static const symbol s_6_8[3] = { 'l', 'l', 'e' }; static const symbol s_6_9[3] = { 'i', 'n', 'e' }; static const symbol s_6_10[3] = { 'k', 's', 'i' }; static const symbol s_6_11[1] = { 'n' }; static const symbol s_6_12[3] = { 'h', 'a', 'n' }; static const symbol s_6_13[3] = { 'd', 'e', 'n' }; static const symbol s_6_14[4] = { 's', 'e', 'e', 'n' }; static const symbol s_6_15[3] = { 'h', 'e', 'n' }; static const symbol s_6_16[4] = { 't', 't', 'e', 'n' }; static const symbol s_6_17[3] = { 'h', 'i', 'n' }; static const symbol s_6_18[4] = { 's', 'i', 'i', 'n' }; static const symbol s_6_19[3] = { 'h', 'o', 'n' }; static const symbol s_6_20[3] = { 'h', 0xE4, 'n' }; static const symbol s_6_21[3] = { 'h', 0xF6, 'n' }; static const symbol s_6_22[1] = { 0xE4 }; static const symbol s_6_23[3] = { 'l', 'l', 0xE4 }; static const symbol s_6_24[2] = { 'n', 0xE4 }; static const symbol s_6_25[3] = { 's', 's', 0xE4 }; static const symbol s_6_26[2] = { 't', 0xE4 }; static const symbol s_6_27[3] = { 'l', 't', 0xE4 }; static const symbol s_6_28[3] = { 's', 't', 0xE4 }; static const symbol s_6_29[3] = { 't', 't', 0xE4 }; static const struct among a_6[30] = { /* 0 */ { 1, s_6_0, -1, 8, 0}, /* 1 */ { 3, s_6_1, 0, -1, 0}, /* 2 */ { 2, s_6_2, 0, -1, 0}, /* 3 */ { 3, s_6_3, 0, -1, 0}, /* 4 */ { 2, s_6_4, 0, -1, 0}, /* 5 */ { 3, s_6_5, 4, -1, 0}, /* 6 */ { 3, s_6_6, 4, -1, 0}, /* 7 */ { 3, s_6_7, 4, 9, 0}, /* 8 */ { 3, s_6_8, -1, -1, 0}, /* 9 */ { 3, s_6_9, -1, -1, 0}, /* 10 */ { 3, s_6_10, -1, -1, 0}, /* 11 */ { 1, s_6_11, -1, 7, 0}, /* 12 */ { 3, s_6_12, 11, 1, 0}, /* 13 */ { 3, s_6_13, 11, -1, r_VI}, /* 14 */ { 4, s_6_14, 11, -1, r_LONG}, /* 15 */ { 3, s_6_15, 11, 2, 0}, /* 16 */ { 4, s_6_16, 11, -1, r_VI}, /* 17 */ { 3, s_6_17, 11, 3, 0}, /* 18 */ { 4, s_6_18, 11, -1, r_VI}, /* 19 */ { 3, s_6_19, 11, 4, 0}, /* 20 */ { 3, s_6_20, 11, 5, 0}, /* 21 */ { 3, s_6_21, 11, 6, 0}, /* 22 */ { 1, s_6_22, -1, 8, 0}, /* 23 */ { 3, s_6_23, 22, -1, 0}, /* 24 */ { 2, s_6_24, 22, -1, 0}, /* 25 */ { 3, s_6_25, 22, -1, 0}, /* 26 */ { 2, s_6_26, 22, -1, 0}, /* 27 */ { 3, s_6_27, 26, -1, 0}, /* 28 */ { 3, s_6_28, 26, -1, 0}, /* 29 */ { 3, s_6_29, 26, 9, 0} }; static const symbol s_7_0[3] = { 'e', 'j', 'a' }; static const symbol s_7_1[3] = { 'm', 'm', 'a' }; static const symbol s_7_2[4] = { 'i', 'm', 'm', 'a' }; static const symbol s_7_3[3] = { 'm', 'p', 'a' }; static const symbol s_7_4[4] = { 'i', 'm', 'p', 'a' }; static const symbol s_7_5[3] = { 'm', 'm', 'i' }; static const symbol s_7_6[4] = { 'i', 'm', 'm', 'i' }; static const symbol s_7_7[3] = { 'm', 'p', 'i' }; static const symbol s_7_8[4] = { 'i', 'm', 'p', 'i' }; static const symbol s_7_9[3] = { 'e', 'j', 0xE4 }; static const symbol s_7_10[3] = { 'm', 'm', 0xE4 }; static const symbol s_7_11[4] = { 'i', 'm', 'm', 0xE4 }; static const symbol s_7_12[3] = { 'm', 'p', 0xE4 }; static const symbol s_7_13[4] = { 'i', 'm', 'p', 0xE4 }; static const struct among a_7[14] = { /* 0 */ { 3, s_7_0, -1, -1, 0}, /* 1 */ { 3, s_7_1, -1, 1, 0}, /* 2 */ { 4, s_7_2, 1, -1, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0}, /* 4 */ { 4, s_7_4, 3, -1, 0}, /* 5 */ { 3, s_7_5, -1, 1, 0}, /* 6 */ { 4, s_7_6, 5, -1, 0}, /* 7 */ { 3, s_7_7, -1, 1, 0}, /* 8 */ { 4, s_7_8, 7, -1, 0}, /* 9 */ { 3, s_7_9, -1, -1, 0}, /* 10 */ { 3, s_7_10, -1, 1, 0}, /* 11 */ { 4, s_7_11, 10, -1, 0}, /* 12 */ { 3, s_7_12, -1, 1, 0}, /* 13 */ { 4, s_7_13, 12, -1, 0} }; static const symbol s_8_0[1] = { 'i' }; static const symbol s_8_1[1] = { 'j' }; static const struct among a_8[2] = { /* 0 */ { 1, s_8_0, -1, -1, 0}, /* 1 */ { 1, s_8_1, -1, -1, 0} }; static const symbol s_9_0[3] = { 'm', 'm', 'a' }; static const symbol s_9_1[4] = { 'i', 'm', 'm', 'a' }; static const struct among a_9[2] = { /* 0 */ { 3, s_9_0, -1, 1, 0}, /* 1 */ { 4, s_9_1, 0, -1, 0} }; static const unsigned char g_AEI[] = { 17, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }; static const unsigned char g_V1[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; static const unsigned char g_V2[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; static const unsigned char g_particle_end[] = { 17, 97, 24, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; static const symbol s_0[] = { 'k' }; static const symbol s_1[] = { 'k', 's', 'e' }; static const symbol s_2[] = { 'k', 's', 'i' }; static const symbol s_3[] = { 'i' }; static const symbol s_4[] = { 'a' }; static const symbol s_5[] = { 'e' }; static const symbol s_6[] = { 'i' }; static const symbol s_7[] = { 'o' }; static const symbol s_8[] = { 0xE4 }; static const symbol s_9[] = { 0xF6 }; static const symbol s_10[] = { 'i', 'e' }; static const symbol s_11[] = { 'e' }; static const symbol s_12[] = { 'p', 'o' }; static const symbol s_13[] = { 't' }; static const symbol s_14[] = { 'p', 'o' }; static const symbol s_15[] = { 'j' }; static const symbol s_16[] = { 'o' }; static const symbol s_17[] = { 'u' }; static const symbol s_18[] = { 'o' }; static const symbol s_19[] = { 'j' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; if (out_grouping(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* grouping V1, line 46 */ { /* gopast */ /* non V1, line 46 */ int ret = in_grouping(z, g_V1, 97, 246, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 46 */ if (out_grouping(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* grouping V1, line 47 */ { /* gopast */ /* non V1, line 47 */ int ret = in_grouping(z, g_V1, 97, 246, 1); if (ret < 0) return 0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 47 */ return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_particle_etc(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 55 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 55 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 55 */ among_var = find_among_b(z, a_0, 10); /* substring, line 55 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 55 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: if (in_grouping_b(z, g_particle_end, 97, 246, 0)) return 0; break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 64 */ if (ret < 0) return ret; } break; } { int ret = slice_del(z); /* delete, line 66 */ if (ret < 0) return ret; } return 1; } static int r_possessive(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 69 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 69 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 69 */ among_var = find_among_b(z, a_4, 9); /* substring, line 69 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 69 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int m2 = z->l - z->c; (void)m2; /* not, line 72 */ if (!(eq_s_b(z, 1, s_0))) goto lab0; return 0; lab0: z->c = z->l - m2; } { int ret = slice_del(z); /* delete, line 72 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 74 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 74 */ if (!(eq_s_b(z, 3, s_1))) return 0; z->bra = z->c; /* ], line 74 */ { int ret = slice_from_s(z, 3, s_2); /* <-, line 74 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 78 */ if (ret < 0) return ret; } break; case 4: if (z->c - 1 <= z->lb || z->p[z->c - 1] != 97) return 0; if (!(find_among_b(z, a_1, 6))) return 0; /* among, line 81 */ { int ret = slice_del(z); /* delete, line 81 */ if (ret < 0) return ret; } break; case 5: if (z->c - 1 <= z->lb || z->p[z->c - 1] != 228) return 0; if (!(find_among_b(z, a_2, 6))) return 0; /* among, line 83 */ { int ret = slice_del(z); /* delete, line 84 */ if (ret < 0) return ret; } break; case 6: if (z->c - 2 <= z->lb || z->p[z->c - 1] != 101) return 0; if (!(find_among_b(z, a_3, 2))) return 0; /* among, line 86 */ { int ret = slice_del(z); /* delete, line 86 */ if (ret < 0) return ret; } break; } return 1; } static int r_LONG(struct SN_env * z) { if (!(find_among_b(z, a_5, 7))) return 0; /* among, line 91 */ return 1; } static int r_VI(struct SN_env * z) { if (!(eq_s_b(z, 1, s_3))) return 0; if (in_grouping_b(z, g_V2, 97, 246, 0)) return 0; return 1; } static int r_case_ending(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 96 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 96 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 96 */ among_var = find_among_b(z, a_6, 30); /* substring, line 96 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 96 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: if (!(eq_s_b(z, 1, s_4))) return 0; break; case 2: if (!(eq_s_b(z, 1, s_5))) return 0; break; case 3: if (!(eq_s_b(z, 1, s_6))) return 0; break; case 4: if (!(eq_s_b(z, 1, s_7))) return 0; break; case 5: if (!(eq_s_b(z, 1, s_8))) return 0; break; case 6: if (!(eq_s_b(z, 1, s_9))) return 0; break; case 7: { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ { int m2 = z->l - z->c; (void)m2; /* and, line 113 */ { int m3 = z->l - z->c; (void)m3; /* or, line 112 */ { int ret = r_LONG(z); if (ret == 0) goto lab2; /* call LONG, line 111 */ if (ret < 0) return ret; } goto lab1; lab2: z->c = z->l - m3; if (!(eq_s_b(z, 2, s_10))) { z->c = z->l - m_keep; goto lab0; } } lab1: z->c = z->l - m2; if (z->c <= z->lb) { z->c = z->l - m_keep; goto lab0; } z->c--; /* next, line 113 */ } z->bra = z->c; /* ], line 113 */ lab0: ; } break; case 8: if (in_grouping_b(z, g_V1, 97, 246, 0)) return 0; if (out_grouping_b(z, g_V1, 97, 246, 0)) return 0; break; case 9: if (!(eq_s_b(z, 1, s_11))) return 0; break; } { int ret = slice_del(z); /* delete, line 138 */ if (ret < 0) return ret; } z->B[0] = 1; /* set ending_removed, line 139 */ return 1; } static int r_other_endings(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 142 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[1]) return 0; z->c = z->I[1]; /* tomark, line 142 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 142 */ among_var = find_among_b(z, a_7, 14); /* substring, line 142 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 142 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int m2 = z->l - z->c; (void)m2; /* not, line 146 */ if (!(eq_s_b(z, 2, s_12))) goto lab0; return 0; lab0: z->c = z->l - m2; } break; } { int ret = slice_del(z); /* delete, line 151 */ if (ret < 0) return ret; } return 1; } static int r_i_plural(struct SN_env * z) { { int mlimit; /* setlimit, line 154 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 154 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 154 */ if (z->c <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 106)) { z->lb = mlimit; return 0; } if (!(find_among_b(z, a_8, 2))) { z->lb = mlimit; return 0; } /* substring, line 154 */ z->bra = z->c; /* ], line 154 */ z->lb = mlimit; } { int ret = slice_del(z); /* delete, line 158 */ if (ret < 0) return ret; } return 1; } static int r_t_plural(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 161 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 161 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 162 */ if (!(eq_s_b(z, 1, s_13))) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 162 */ { int m_test = z->l - z->c; /* test, line 162 */ if (in_grouping_b(z, g_V1, 97, 246, 0)) { z->lb = mlimit; return 0; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 163 */ if (ret < 0) return ret; } z->lb = mlimit; } { int mlimit; /* setlimit, line 165 */ int m2 = z->l - z->c; (void)m2; if (z->c < z->I[1]) return 0; z->c = z->I[1]; /* tomark, line 165 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m2; z->ket = z->c; /* [, line 165 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] != 97) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_9, 2); /* substring, line 165 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 165 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int m3 = z->l - z->c; (void)m3; /* not, line 167 */ if (!(eq_s_b(z, 2, s_14))) goto lab0; return 0; lab0: z->c = z->l - m3; } break; } { int ret = slice_del(z); /* delete, line 170 */ if (ret < 0) return ret; } return 1; } static int r_tidy(struct SN_env * z) { { int mlimit; /* setlimit, line 173 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 173 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* do, line 174 */ { int m3 = z->l - z->c; (void)m3; /* and, line 174 */ { int ret = r_LONG(z); if (ret == 0) goto lab0; /* call LONG, line 174 */ if (ret < 0) return ret; } z->c = z->l - m3; z->ket = z->c; /* [, line 174 */ if (z->c <= z->lb) goto lab0; z->c--; /* next, line 174 */ z->bra = z->c; /* ], line 174 */ { int ret = slice_del(z); /* delete, line 174 */ if (ret < 0) return ret; } } lab0: z->c = z->l - m2; } { int m4 = z->l - z->c; (void)m4; /* do, line 175 */ z->ket = z->c; /* [, line 175 */ if (in_grouping_b(z, g_AEI, 97, 228, 0)) goto lab1; z->bra = z->c; /* ], line 175 */ if (out_grouping_b(z, g_V1, 97, 246, 0)) goto lab1; { int ret = slice_del(z); /* delete, line 175 */ if (ret < 0) return ret; } lab1: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 176 */ z->ket = z->c; /* [, line 176 */ if (!(eq_s_b(z, 1, s_15))) goto lab2; z->bra = z->c; /* ], line 176 */ { int m6 = z->l - z->c; (void)m6; /* or, line 176 */ if (!(eq_s_b(z, 1, s_16))) goto lab4; goto lab3; lab4: z->c = z->l - m6; if (!(eq_s_b(z, 1, s_17))) goto lab2; } lab3: { int ret = slice_del(z); /* delete, line 176 */ if (ret < 0) return ret; } lab2: z->c = z->l - m5; } { int m7 = z->l - z->c; (void)m7; /* do, line 177 */ z->ket = z->c; /* [, line 177 */ if (!(eq_s_b(z, 1, s_18))) goto lab5; z->bra = z->c; /* ], line 177 */ if (!(eq_s_b(z, 1, s_19))) goto lab5; { int ret = slice_del(z); /* delete, line 177 */ if (ret < 0) return ret; } lab5: z->c = z->l - m7; } z->lb = mlimit; } if (in_grouping_b(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* non V1, line 179 */ z->ket = z->c; /* [, line 179 */ if (z->c <= z->lb) return 0; z->c--; /* next, line 179 */ z->bra = z->c; /* ], line 179 */ z->S[0] = slice_to(z, z->S[0]); /* -> x, line 179 */ if (z->S[0] == 0) return -1; /* -> x, line 179 */ if (!(eq_v_b(z, z->S[0]))) return 0; /* name x, line 179 */ { int ret = slice_del(z); /* delete, line 179 */ if (ret < 0) return ret; } return 1; } extern int finnish_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 185 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 185 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->B[0] = 0; /* unset ending_removed, line 186 */ z->lb = z->c; z->c = z->l; /* backwards, line 187 */ { int m2 = z->l - z->c; (void)m2; /* do, line 188 */ { int ret = r_particle_etc(z); if (ret == 0) goto lab1; /* call particle_etc, line 188 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 189 */ { int ret = r_possessive(z); if (ret == 0) goto lab2; /* call possessive, line 189 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 190 */ { int ret = r_case_ending(z); if (ret == 0) goto lab3; /* call case_ending, line 190 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 191 */ { int ret = r_other_endings(z); if (ret == 0) goto lab4; /* call other_endings, line 191 */ if (ret < 0) return ret; } lab4: z->c = z->l - m5; } { int m6 = z->l - z->c; (void)m6; /* or, line 192 */ if (!(z->B[0])) goto lab6; /* Boolean test ending_removed, line 192 */ { int m7 = z->l - z->c; (void)m7; /* do, line 192 */ { int ret = r_i_plural(z); if (ret == 0) goto lab7; /* call i_plural, line 192 */ if (ret < 0) return ret; } lab7: z->c = z->l - m7; } goto lab5; lab6: z->c = z->l - m6; { int m8 = z->l - z->c; (void)m8; /* do, line 192 */ { int ret = r_t_plural(z); if (ret == 0) goto lab8; /* call t_plural, line 192 */ if (ret < 0) return ret; } lab8: z->c = z->l - m8; } } lab5: { int m9 = z->l - z->c; (void)m9; /* do, line 193 */ { int ret = r_tidy(z); if (ret == 0) goto lab9; /* call tidy, line 193 */ if (ret < 0) return ret; } lab9: z->c = z->l - m9; } z->c = z->lb; return 1; } extern struct SN_env * finnish_ISO_8859_1_create_env(void) { return SN_create_env(1, 2, 1); } extern void finnish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 1); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h000066400000000000000000000005051456444476200314240ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* finnish_ISO_8859_1_create_env(void); extern void finnish_ISO_8859_1_close_env(struct SN_env* z); extern int finnish_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_french.c000066400000000000000000001345611456444476200312400ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int french_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_un_accent(struct SN_env * z); static int r_un_double(struct SN_env * z); static int r_residual_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_i_verb_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * french_ISO_8859_1_create_env(void); extern void french_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[3] = { 'c', 'o', 'l' }; static const symbol s_0_1[3] = { 'p', 'a', 'r' }; static const symbol s_0_2[3] = { 't', 'a', 'p' }; static const struct among a_0[3] = { /* 0 */ { 3, s_0_0, -1, -1, 0}, /* 1 */ { 3, s_0_1, -1, -1, 0}, /* 2 */ { 3, s_0_2, -1, -1, 0} }; static const symbol s_1_1[1] = { 'I' }; static const symbol s_1_2[1] = { 'U' }; static const symbol s_1_3[1] = { 'Y' }; static const struct among a_1[4] = { /* 0 */ { 0, 0, -1, 4, 0}, /* 1 */ { 1, s_1_1, 0, 1, 0}, /* 2 */ { 1, s_1_2, 0, 2, 0}, /* 3 */ { 1, s_1_3, 0, 3, 0} }; static const symbol s_2_0[3] = { 'i', 'q', 'U' }; static const symbol s_2_1[3] = { 'a', 'b', 'l' }; static const symbol s_2_2[3] = { 'I', 0xE8, 'r' }; static const symbol s_2_3[3] = { 'i', 0xE8, 'r' }; static const symbol s_2_4[3] = { 'e', 'u', 's' }; static const symbol s_2_5[2] = { 'i', 'v' }; static const struct among a_2[6] = { /* 0 */ { 3, s_2_0, -1, 3, 0}, /* 1 */ { 3, s_2_1, -1, 3, 0}, /* 2 */ { 3, s_2_2, -1, 4, 0}, /* 3 */ { 3, s_2_3, -1, 4, 0}, /* 4 */ { 3, s_2_4, -1, 2, 0}, /* 5 */ { 2, s_2_5, -1, 1, 0} }; static const symbol s_3_0[2] = { 'i', 'c' }; static const symbol s_3_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_3_2[2] = { 'i', 'v' }; static const struct among a_3[3] = { /* 0 */ { 2, s_3_0, -1, 2, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0}, /* 2 */ { 2, s_3_2, -1, 3, 0} }; static const symbol s_4_0[4] = { 'i', 'q', 'U', 'e' }; static const symbol s_4_1[6] = { 'a', 't', 'r', 'i', 'c', 'e' }; static const symbol s_4_2[4] = { 'a', 'n', 'c', 'e' }; static const symbol s_4_3[4] = { 'e', 'n', 'c', 'e' }; static const symbol s_4_4[5] = { 'l', 'o', 'g', 'i', 'e' }; static const symbol s_4_5[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_4_6[4] = { 'i', 's', 'm', 'e' }; static const symbol s_4_7[4] = { 'e', 'u', 's', 'e' }; static const symbol s_4_8[4] = { 'i', 's', 't', 'e' }; static const symbol s_4_9[3] = { 'i', 'v', 'e' }; static const symbol s_4_10[2] = { 'i', 'f' }; static const symbol s_4_11[5] = { 'u', 's', 'i', 'o', 'n' }; static const symbol s_4_12[5] = { 'a', 't', 'i', 'o', 'n' }; static const symbol s_4_13[5] = { 'u', 't', 'i', 'o', 'n' }; static const symbol s_4_14[5] = { 'a', 't', 'e', 'u', 'r' }; static const symbol s_4_15[5] = { 'i', 'q', 'U', 'e', 's' }; static const symbol s_4_16[7] = { 'a', 't', 'r', 'i', 'c', 'e', 's' }; static const symbol s_4_17[5] = { 'a', 'n', 'c', 'e', 's' }; static const symbol s_4_18[5] = { 'e', 'n', 'c', 'e', 's' }; static const symbol s_4_19[6] = { 'l', 'o', 'g', 'i', 'e', 's' }; static const symbol s_4_20[5] = { 'a', 'b', 'l', 'e', 's' }; static const symbol s_4_21[5] = { 'i', 's', 'm', 'e', 's' }; static const symbol s_4_22[5] = { 'e', 'u', 's', 'e', 's' }; static const symbol s_4_23[5] = { 'i', 's', 't', 'e', 's' }; static const symbol s_4_24[4] = { 'i', 'v', 'e', 's' }; static const symbol s_4_25[3] = { 'i', 'f', 's' }; static const symbol s_4_26[6] = { 'u', 's', 'i', 'o', 'n', 's' }; static const symbol s_4_27[6] = { 'a', 't', 'i', 'o', 'n', 's' }; static const symbol s_4_28[6] = { 'u', 't', 'i', 'o', 'n', 's' }; static const symbol s_4_29[6] = { 'a', 't', 'e', 'u', 'r', 's' }; static const symbol s_4_30[5] = { 'm', 'e', 'n', 't', 's' }; static const symbol s_4_31[6] = { 'e', 'm', 'e', 'n', 't', 's' }; static const symbol s_4_32[9] = { 'i', 's', 's', 'e', 'm', 'e', 'n', 't', 's' }; static const symbol s_4_33[4] = { 'i', 't', 0xE9, 's' }; static const symbol s_4_34[4] = { 'm', 'e', 'n', 't' }; static const symbol s_4_35[5] = { 'e', 'm', 'e', 'n', 't' }; static const symbol s_4_36[8] = { 'i', 's', 's', 'e', 'm', 'e', 'n', 't' }; static const symbol s_4_37[6] = { 'a', 'm', 'm', 'e', 'n', 't' }; static const symbol s_4_38[6] = { 'e', 'm', 'm', 'e', 'n', 't' }; static const symbol s_4_39[3] = { 'a', 'u', 'x' }; static const symbol s_4_40[4] = { 'e', 'a', 'u', 'x' }; static const symbol s_4_41[3] = { 'e', 'u', 'x' }; static const symbol s_4_42[3] = { 'i', 't', 0xE9 }; static const struct among a_4[43] = { /* 0 */ { 4, s_4_0, -1, 1, 0}, /* 1 */ { 6, s_4_1, -1, 2, 0}, /* 2 */ { 4, s_4_2, -1, 1, 0}, /* 3 */ { 4, s_4_3, -1, 5, 0}, /* 4 */ { 5, s_4_4, -1, 3, 0}, /* 5 */ { 4, s_4_5, -1, 1, 0}, /* 6 */ { 4, s_4_6, -1, 1, 0}, /* 7 */ { 4, s_4_7, -1, 11, 0}, /* 8 */ { 4, s_4_8, -1, 1, 0}, /* 9 */ { 3, s_4_9, -1, 8, 0}, /* 10 */ { 2, s_4_10, -1, 8, 0}, /* 11 */ { 5, s_4_11, -1, 4, 0}, /* 12 */ { 5, s_4_12, -1, 2, 0}, /* 13 */ { 5, s_4_13, -1, 4, 0}, /* 14 */ { 5, s_4_14, -1, 2, 0}, /* 15 */ { 5, s_4_15, -1, 1, 0}, /* 16 */ { 7, s_4_16, -1, 2, 0}, /* 17 */ { 5, s_4_17, -1, 1, 0}, /* 18 */ { 5, s_4_18, -1, 5, 0}, /* 19 */ { 6, s_4_19, -1, 3, 0}, /* 20 */ { 5, s_4_20, -1, 1, 0}, /* 21 */ { 5, s_4_21, -1, 1, 0}, /* 22 */ { 5, s_4_22, -1, 11, 0}, /* 23 */ { 5, s_4_23, -1, 1, 0}, /* 24 */ { 4, s_4_24, -1, 8, 0}, /* 25 */ { 3, s_4_25, -1, 8, 0}, /* 26 */ { 6, s_4_26, -1, 4, 0}, /* 27 */ { 6, s_4_27, -1, 2, 0}, /* 28 */ { 6, s_4_28, -1, 4, 0}, /* 29 */ { 6, s_4_29, -1, 2, 0}, /* 30 */ { 5, s_4_30, -1, 15, 0}, /* 31 */ { 6, s_4_31, 30, 6, 0}, /* 32 */ { 9, s_4_32, 31, 12, 0}, /* 33 */ { 4, s_4_33, -1, 7, 0}, /* 34 */ { 4, s_4_34, -1, 15, 0}, /* 35 */ { 5, s_4_35, 34, 6, 0}, /* 36 */ { 8, s_4_36, 35, 12, 0}, /* 37 */ { 6, s_4_37, 34, 13, 0}, /* 38 */ { 6, s_4_38, 34, 14, 0}, /* 39 */ { 3, s_4_39, -1, 10, 0}, /* 40 */ { 4, s_4_40, 39, 9, 0}, /* 41 */ { 3, s_4_41, -1, 1, 0}, /* 42 */ { 3, s_4_42, -1, 7, 0} }; static const symbol s_5_0[3] = { 'i', 'r', 'a' }; static const symbol s_5_1[2] = { 'i', 'e' }; static const symbol s_5_2[4] = { 'i', 's', 's', 'e' }; static const symbol s_5_3[7] = { 'i', 's', 's', 'a', 'n', 't', 'e' }; static const symbol s_5_4[1] = { 'i' }; static const symbol s_5_5[4] = { 'i', 'r', 'a', 'i' }; static const symbol s_5_6[2] = { 'i', 'r' }; static const symbol s_5_7[4] = { 'i', 'r', 'a', 's' }; static const symbol s_5_8[3] = { 'i', 'e', 's' }; static const symbol s_5_9[4] = { 0xEE, 'm', 'e', 's' }; static const symbol s_5_10[5] = { 'i', 's', 's', 'e', 's' }; static const symbol s_5_11[8] = { 'i', 's', 's', 'a', 'n', 't', 'e', 's' }; static const symbol s_5_12[4] = { 0xEE, 't', 'e', 's' }; static const symbol s_5_13[2] = { 'i', 's' }; static const symbol s_5_14[5] = { 'i', 'r', 'a', 'i', 's' }; static const symbol s_5_15[6] = { 'i', 's', 's', 'a', 'i', 's' }; static const symbol s_5_16[6] = { 'i', 'r', 'i', 'o', 'n', 's' }; static const symbol s_5_17[7] = { 'i', 's', 's', 'i', 'o', 'n', 's' }; static const symbol s_5_18[5] = { 'i', 'r', 'o', 'n', 's' }; static const symbol s_5_19[6] = { 'i', 's', 's', 'o', 'n', 's' }; static const symbol s_5_20[7] = { 'i', 's', 's', 'a', 'n', 't', 's' }; static const symbol s_5_21[2] = { 'i', 't' }; static const symbol s_5_22[5] = { 'i', 'r', 'a', 'i', 't' }; static const symbol s_5_23[6] = { 'i', 's', 's', 'a', 'i', 't' }; static const symbol s_5_24[6] = { 'i', 's', 's', 'a', 'n', 't' }; static const symbol s_5_25[7] = { 'i', 'r', 'a', 'I', 'e', 'n', 't' }; static const symbol s_5_26[8] = { 'i', 's', 's', 'a', 'I', 'e', 'n', 't' }; static const symbol s_5_27[5] = { 'i', 'r', 'e', 'n', 't' }; static const symbol s_5_28[6] = { 'i', 's', 's', 'e', 'n', 't' }; static const symbol s_5_29[5] = { 'i', 'r', 'o', 'n', 't' }; static const symbol s_5_30[2] = { 0xEE, 't' }; static const symbol s_5_31[5] = { 'i', 'r', 'i', 'e', 'z' }; static const symbol s_5_32[6] = { 'i', 's', 's', 'i', 'e', 'z' }; static const symbol s_5_33[4] = { 'i', 'r', 'e', 'z' }; static const symbol s_5_34[5] = { 'i', 's', 's', 'e', 'z' }; static const struct among a_5[35] = { /* 0 */ { 3, s_5_0, -1, 1, 0}, /* 1 */ { 2, s_5_1, -1, 1, 0}, /* 2 */ { 4, s_5_2, -1, 1, 0}, /* 3 */ { 7, s_5_3, -1, 1, 0}, /* 4 */ { 1, s_5_4, -1, 1, 0}, /* 5 */ { 4, s_5_5, 4, 1, 0}, /* 6 */ { 2, s_5_6, -1, 1, 0}, /* 7 */ { 4, s_5_7, -1, 1, 0}, /* 8 */ { 3, s_5_8, -1, 1, 0}, /* 9 */ { 4, s_5_9, -1, 1, 0}, /* 10 */ { 5, s_5_10, -1, 1, 0}, /* 11 */ { 8, s_5_11, -1, 1, 0}, /* 12 */ { 4, s_5_12, -1, 1, 0}, /* 13 */ { 2, s_5_13, -1, 1, 0}, /* 14 */ { 5, s_5_14, 13, 1, 0}, /* 15 */ { 6, s_5_15, 13, 1, 0}, /* 16 */ { 6, s_5_16, -1, 1, 0}, /* 17 */ { 7, s_5_17, -1, 1, 0}, /* 18 */ { 5, s_5_18, -1, 1, 0}, /* 19 */ { 6, s_5_19, -1, 1, 0}, /* 20 */ { 7, s_5_20, -1, 1, 0}, /* 21 */ { 2, s_5_21, -1, 1, 0}, /* 22 */ { 5, s_5_22, 21, 1, 0}, /* 23 */ { 6, s_5_23, 21, 1, 0}, /* 24 */ { 6, s_5_24, -1, 1, 0}, /* 25 */ { 7, s_5_25, -1, 1, 0}, /* 26 */ { 8, s_5_26, -1, 1, 0}, /* 27 */ { 5, s_5_27, -1, 1, 0}, /* 28 */ { 6, s_5_28, -1, 1, 0}, /* 29 */ { 5, s_5_29, -1, 1, 0}, /* 30 */ { 2, s_5_30, -1, 1, 0}, /* 31 */ { 5, s_5_31, -1, 1, 0}, /* 32 */ { 6, s_5_32, -1, 1, 0}, /* 33 */ { 4, s_5_33, -1, 1, 0}, /* 34 */ { 5, s_5_34, -1, 1, 0} }; static const symbol s_6_0[1] = { 'a' }; static const symbol s_6_1[3] = { 'e', 'r', 'a' }; static const symbol s_6_2[4] = { 'a', 's', 's', 'e' }; static const symbol s_6_3[4] = { 'a', 'n', 't', 'e' }; static const symbol s_6_4[2] = { 0xE9, 'e' }; static const symbol s_6_5[2] = { 'a', 'i' }; static const symbol s_6_6[4] = { 'e', 'r', 'a', 'i' }; static const symbol s_6_7[2] = { 'e', 'r' }; static const symbol s_6_8[2] = { 'a', 's' }; static const symbol s_6_9[4] = { 'e', 'r', 'a', 's' }; static const symbol s_6_10[4] = { 0xE2, 'm', 'e', 's' }; static const symbol s_6_11[5] = { 'a', 's', 's', 'e', 's' }; static const symbol s_6_12[5] = { 'a', 'n', 't', 'e', 's' }; static const symbol s_6_13[4] = { 0xE2, 't', 'e', 's' }; static const symbol s_6_14[3] = { 0xE9, 'e', 's' }; static const symbol s_6_15[3] = { 'a', 'i', 's' }; static const symbol s_6_16[5] = { 'e', 'r', 'a', 'i', 's' }; static const symbol s_6_17[4] = { 'i', 'o', 'n', 's' }; static const symbol s_6_18[6] = { 'e', 'r', 'i', 'o', 'n', 's' }; static const symbol s_6_19[7] = { 'a', 's', 's', 'i', 'o', 'n', 's' }; static const symbol s_6_20[5] = { 'e', 'r', 'o', 'n', 's' }; static const symbol s_6_21[4] = { 'a', 'n', 't', 's' }; static const symbol s_6_22[2] = { 0xE9, 's' }; static const symbol s_6_23[3] = { 'a', 'i', 't' }; static const symbol s_6_24[5] = { 'e', 'r', 'a', 'i', 't' }; static const symbol s_6_25[3] = { 'a', 'n', 't' }; static const symbol s_6_26[5] = { 'a', 'I', 'e', 'n', 't' }; static const symbol s_6_27[7] = { 'e', 'r', 'a', 'I', 'e', 'n', 't' }; static const symbol s_6_28[5] = { 0xE8, 'r', 'e', 'n', 't' }; static const symbol s_6_29[6] = { 'a', 's', 's', 'e', 'n', 't' }; static const symbol s_6_30[5] = { 'e', 'r', 'o', 'n', 't' }; static const symbol s_6_31[2] = { 0xE2, 't' }; static const symbol s_6_32[2] = { 'e', 'z' }; static const symbol s_6_33[3] = { 'i', 'e', 'z' }; static const symbol s_6_34[5] = { 'e', 'r', 'i', 'e', 'z' }; static const symbol s_6_35[6] = { 'a', 's', 's', 'i', 'e', 'z' }; static const symbol s_6_36[4] = { 'e', 'r', 'e', 'z' }; static const symbol s_6_37[1] = { 0xE9 }; static const struct among a_6[38] = { /* 0 */ { 1, s_6_0, -1, 3, 0}, /* 1 */ { 3, s_6_1, 0, 2, 0}, /* 2 */ { 4, s_6_2, -1, 3, 0}, /* 3 */ { 4, s_6_3, -1, 3, 0}, /* 4 */ { 2, s_6_4, -1, 2, 0}, /* 5 */ { 2, s_6_5, -1, 3, 0}, /* 6 */ { 4, s_6_6, 5, 2, 0}, /* 7 */ { 2, s_6_7, -1, 2, 0}, /* 8 */ { 2, s_6_8, -1, 3, 0}, /* 9 */ { 4, s_6_9, 8, 2, 0}, /* 10 */ { 4, s_6_10, -1, 3, 0}, /* 11 */ { 5, s_6_11, -1, 3, 0}, /* 12 */ { 5, s_6_12, -1, 3, 0}, /* 13 */ { 4, s_6_13, -1, 3, 0}, /* 14 */ { 3, s_6_14, -1, 2, 0}, /* 15 */ { 3, s_6_15, -1, 3, 0}, /* 16 */ { 5, s_6_16, 15, 2, 0}, /* 17 */ { 4, s_6_17, -1, 1, 0}, /* 18 */ { 6, s_6_18, 17, 2, 0}, /* 19 */ { 7, s_6_19, 17, 3, 0}, /* 20 */ { 5, s_6_20, -1, 2, 0}, /* 21 */ { 4, s_6_21, -1, 3, 0}, /* 22 */ { 2, s_6_22, -1, 2, 0}, /* 23 */ { 3, s_6_23, -1, 3, 0}, /* 24 */ { 5, s_6_24, 23, 2, 0}, /* 25 */ { 3, s_6_25, -1, 3, 0}, /* 26 */ { 5, s_6_26, -1, 3, 0}, /* 27 */ { 7, s_6_27, 26, 2, 0}, /* 28 */ { 5, s_6_28, -1, 2, 0}, /* 29 */ { 6, s_6_29, -1, 3, 0}, /* 30 */ { 5, s_6_30, -1, 2, 0}, /* 31 */ { 2, s_6_31, -1, 3, 0}, /* 32 */ { 2, s_6_32, -1, 2, 0}, /* 33 */ { 3, s_6_33, 32, 2, 0}, /* 34 */ { 5, s_6_34, 33, 2, 0}, /* 35 */ { 6, s_6_35, 33, 3, 0}, /* 36 */ { 4, s_6_36, 32, 2, 0}, /* 37 */ { 1, s_6_37, -1, 2, 0} }; static const symbol s_7_0[1] = { 'e' }; static const symbol s_7_1[4] = { 'I', 0xE8, 'r', 'e' }; static const symbol s_7_2[4] = { 'i', 0xE8, 'r', 'e' }; static const symbol s_7_3[3] = { 'i', 'o', 'n' }; static const symbol s_7_4[3] = { 'I', 'e', 'r' }; static const symbol s_7_5[3] = { 'i', 'e', 'r' }; static const symbol s_7_6[1] = { 0xEB }; static const struct among a_7[7] = { /* 0 */ { 1, s_7_0, -1, 3, 0}, /* 1 */ { 4, s_7_1, 0, 2, 0}, /* 2 */ { 4, s_7_2, 0, 2, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0}, /* 4 */ { 3, s_7_4, -1, 2, 0}, /* 5 */ { 3, s_7_5, -1, 2, 0}, /* 6 */ { 1, s_7_6, -1, 4, 0} }; static const symbol s_8_0[3] = { 'e', 'l', 'l' }; static const symbol s_8_1[4] = { 'e', 'i', 'l', 'l' }; static const symbol s_8_2[3] = { 'e', 'n', 'n' }; static const symbol s_8_3[3] = { 'o', 'n', 'n' }; static const symbol s_8_4[3] = { 'e', 't', 't' }; static const struct among a_8[5] = { /* 0 */ { 3, s_8_0, -1, -1, 0}, /* 1 */ { 4, s_8_1, -1, -1, 0}, /* 2 */ { 3, s_8_2, -1, -1, 0}, /* 3 */ { 3, s_8_3, -1, -1, 0}, /* 4 */ { 3, s_8_4, -1, -1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 130, 103, 8, 5 }; static const unsigned char g_keep_with_s[] = { 1, 65, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; static const symbol s_0[] = { 'u' }; static const symbol s_1[] = { 'U' }; static const symbol s_2[] = { 'i' }; static const symbol s_3[] = { 'I' }; static const symbol s_4[] = { 'y' }; static const symbol s_5[] = { 'Y' }; static const symbol s_6[] = { 'y' }; static const symbol s_7[] = { 'Y' }; static const symbol s_8[] = { 'q' }; static const symbol s_9[] = { 'u' }; static const symbol s_10[] = { 'U' }; static const symbol s_11[] = { 'i' }; static const symbol s_12[] = { 'u' }; static const symbol s_13[] = { 'y' }; static const symbol s_14[] = { 'i', 'c' }; static const symbol s_15[] = { 'i', 'q', 'U' }; static const symbol s_16[] = { 'l', 'o', 'g' }; static const symbol s_17[] = { 'u' }; static const symbol s_18[] = { 'e', 'n', 't' }; static const symbol s_19[] = { 'a', 't' }; static const symbol s_20[] = { 'e', 'u', 'x' }; static const symbol s_21[] = { 'i' }; static const symbol s_22[] = { 'a', 'b', 'l' }; static const symbol s_23[] = { 'i', 'q', 'U' }; static const symbol s_24[] = { 'a', 't' }; static const symbol s_25[] = { 'i', 'c' }; static const symbol s_26[] = { 'i', 'q', 'U' }; static const symbol s_27[] = { 'e', 'a', 'u' }; static const symbol s_28[] = { 'a', 'l' }; static const symbol s_29[] = { 'e', 'u', 'x' }; static const symbol s_30[] = { 'a', 'n', 't' }; static const symbol s_31[] = { 'e', 'n', 't' }; static const symbol s_32[] = { 'e' }; static const symbol s_33[] = { 's' }; static const symbol s_34[] = { 's' }; static const symbol s_35[] = { 't' }; static const symbol s_36[] = { 'i' }; static const symbol s_37[] = { 'g', 'u' }; static const symbol s_38[] = { 0xE9 }; static const symbol s_39[] = { 0xE8 }; static const symbol s_40[] = { 'e' }; static const symbol s_41[] = { 'Y' }; static const symbol s_42[] = { 'i' }; static const symbol s_43[] = { 0xE7 }; static const symbol s_44[] = { 'c' }; static int r_prelude(struct SN_env * z) { while(1) { /* repeat, line 38 */ int c1 = z->c; while(1) { /* goto, line 38 */ int c2 = z->c; { int c3 = z->c; /* or, line 44 */ if (in_grouping(z, g_v, 97, 251, 0)) goto lab3; z->bra = z->c; /* [, line 40 */ { int c4 = z->c; /* or, line 40 */ if (!(eq_s(z, 1, s_0))) goto lab5; z->ket = z->c; /* ], line 40 */ if (in_grouping(z, g_v, 97, 251, 0)) goto lab5; { int ret = slice_from_s(z, 1, s_1); /* <-, line 40 */ if (ret < 0) return ret; } goto lab4; lab5: z->c = c4; if (!(eq_s(z, 1, s_2))) goto lab6; z->ket = z->c; /* ], line 41 */ if (in_grouping(z, g_v, 97, 251, 0)) goto lab6; { int ret = slice_from_s(z, 1, s_3); /* <-, line 41 */ if (ret < 0) return ret; } goto lab4; lab6: z->c = c4; if (!(eq_s(z, 1, s_4))) goto lab3; z->ket = z->c; /* ], line 42 */ { int ret = slice_from_s(z, 1, s_5); /* <-, line 42 */ if (ret < 0) return ret; } } lab4: goto lab2; lab3: z->c = c3; z->bra = z->c; /* [, line 45 */ if (!(eq_s(z, 1, s_6))) goto lab7; z->ket = z->c; /* ], line 45 */ if (in_grouping(z, g_v, 97, 251, 0)) goto lab7; { int ret = slice_from_s(z, 1, s_7); /* <-, line 45 */ if (ret < 0) return ret; } goto lab2; lab7: z->c = c3; if (!(eq_s(z, 1, s_8))) goto lab1; z->bra = z->c; /* [, line 47 */ if (!(eq_s(z, 1, s_9))) goto lab1; z->ket = z->c; /* ], line 47 */ { int ret = slice_from_s(z, 1, s_10); /* <-, line 47 */ if (ret < 0) return ret; } } lab2: z->c = c2; break; lab1: z->c = c2; if (z->c >= z->l) goto lab0; z->c++; /* goto, line 38 */ } continue; lab0: z->c = c1; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 56 */ { int c2 = z->c; /* or, line 58 */ if (in_grouping(z, g_v, 97, 251, 0)) goto lab2; if (in_grouping(z, g_v, 97, 251, 0)) goto lab2; if (z->c >= z->l) goto lab2; z->c++; /* next, line 57 */ goto lab1; lab2: z->c = c2; if (z->c + 2 >= z->l || z->p[z->c + 2] >> 5 != 3 || !((331776 >> (z->p[z->c + 2] & 0x1f)) & 1)) goto lab3; if (!(find_among(z, a_0, 3))) goto lab3; /* among, line 59 */ goto lab1; lab3: z->c = c2; if (z->c >= z->l) goto lab0; z->c++; /* next, line 66 */ { /* gopast */ /* grouping v, line 66 */ int ret = out_grouping(z, g_v, 97, 251, 1); if (ret < 0) goto lab0; z->c += ret; } } lab1: z->I[0] = z->c; /* setmark pV, line 67 */ lab0: z->c = c1; } { int c3 = z->c; /* do, line 69 */ { /* gopast */ /* grouping v, line 70 */ int ret = out_grouping(z, g_v, 97, 251, 1); if (ret < 0) goto lab4; z->c += ret; } { /* gopast */ /* non v, line 70 */ int ret = in_grouping(z, g_v, 97, 251, 1); if (ret < 0) goto lab4; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 70 */ { /* gopast */ /* grouping v, line 71 */ int ret = out_grouping(z, g_v, 97, 251, 1); if (ret < 0) goto lab4; z->c += ret; } { /* gopast */ /* non v, line 71 */ int ret = in_grouping(z, g_v, 97, 251, 1); if (ret < 0) goto lab4; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 71 */ lab4: z->c = c3; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 75 */ int c1 = z->c; z->bra = z->c; /* [, line 77 */ if (z->c >= z->l || z->p[z->c + 0] >> 5 != 2 || !((35652096 >> (z->p[z->c + 0] & 0x1f)) & 1)) among_var = 4; else among_var = find_among(z, a_1, 4); /* substring, line 77 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 77 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_13); /* <-, line 80 */ if (ret < 0) return ret; } break; case 4: if (z->c >= z->l) goto lab0; z->c++; /* next, line 81 */ break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 92 */ among_var = find_among_b(z, a_4, 43); /* substring, line 92 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 92 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 96 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 96 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 99 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 99 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 100 */ z->ket = z->c; /* [, line 100 */ if (!(eq_s_b(z, 2, s_14))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 100 */ { int m1 = z->l - z->c; (void)m1; /* or, line 100 */ { int ret = r_R2(z); if (ret == 0) goto lab2; /* call R2, line 100 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 100 */ if (ret < 0) return ret; } goto lab1; lab2: z->c = z->l - m1; { int ret = slice_from_s(z, 3, s_15); /* <-, line 100 */ if (ret < 0) return ret; } } lab1: lab0: ; } break; case 3: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 104 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_16); /* <-, line 104 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 107 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_17); /* <-, line 107 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 110 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_18); /* <-, line 110 */ if (ret < 0) return ret; } break; case 6: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 114 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 114 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 115 */ z->ket = z->c; /* [, line 116 */ among_var = find_among_b(z, a_2, 6); /* substring, line 116 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 116 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab3; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 117 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 117 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 117 */ if (!(eq_s_b(z, 2, s_19))) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 117 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 117 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 117 */ if (ret < 0) return ret; } break; case 2: { int m2 = z->l - z->c; (void)m2; /* or, line 118 */ { int ret = r_R2(z); if (ret == 0) goto lab5; /* call R2, line 118 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 118 */ if (ret < 0) return ret; } goto lab4; lab5: z->c = z->l - m2; { int ret = r_R1(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R1, line 118 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_20); /* <-, line 118 */ if (ret < 0) return ret; } } lab4: break; case 3: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 120 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 120 */ if (ret < 0) return ret; } break; case 4: { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call RV, line 122 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_21); /* <-, line 122 */ if (ret < 0) return ret; } break; } lab3: ; } break; case 7: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 129 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 129 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 130 */ z->ket = z->c; /* [, line 131 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab6; } among_var = find_among_b(z, a_3, 3); /* substring, line 131 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab6; } z->bra = z->c; /* ], line 131 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab6; } case 1: { int m3 = z->l - z->c; (void)m3; /* or, line 132 */ { int ret = r_R2(z); if (ret == 0) goto lab8; /* call R2, line 132 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 132 */ if (ret < 0) return ret; } goto lab7; lab8: z->c = z->l - m3; { int ret = slice_from_s(z, 3, s_22); /* <-, line 132 */ if (ret < 0) return ret; } } lab7: break; case 2: { int m4 = z->l - z->c; (void)m4; /* or, line 133 */ { int ret = r_R2(z); if (ret == 0) goto lab10; /* call R2, line 133 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 133 */ if (ret < 0) return ret; } goto lab9; lab10: z->c = z->l - m4; { int ret = slice_from_s(z, 3, s_23); /* <-, line 133 */ if (ret < 0) return ret; } } lab9: break; case 3: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call R2, line 134 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } break; } lab6: ; } break; case 8: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 141 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 141 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 142 */ z->ket = z->c; /* [, line 142 */ if (!(eq_s_b(z, 2, s_24))) { z->c = z->l - m_keep; goto lab11; } z->bra = z->c; /* ], line 142 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab11; } /* call R2, line 142 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 142 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 142 */ if (!(eq_s_b(z, 2, s_25))) { z->c = z->l - m_keep; goto lab11; } z->bra = z->c; /* ], line 142 */ { int m5 = z->l - z->c; (void)m5; /* or, line 142 */ { int ret = r_R2(z); if (ret == 0) goto lab13; /* call R2, line 142 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 142 */ if (ret < 0) return ret; } goto lab12; lab13: z->c = z->l - m5; { int ret = slice_from_s(z, 3, s_26); /* <-, line 142 */ if (ret < 0) return ret; } } lab12: lab11: ; } break; case 9: { int ret = slice_from_s(z, 3, s_27); /* <-, line 144 */ if (ret < 0) return ret; } break; case 10: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 145 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 2, s_28); /* <-, line 145 */ if (ret < 0) return ret; } break; case 11: { int m6 = z->l - z->c; (void)m6; /* or, line 147 */ { int ret = r_R2(z); if (ret == 0) goto lab15; /* call R2, line 147 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 147 */ if (ret < 0) return ret; } goto lab14; lab15: z->c = z->l - m6; { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 147 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_29); /* <-, line 147 */ if (ret < 0) return ret; } } lab14: break; case 12: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 150 */ if (ret < 0) return ret; } if (out_grouping_b(z, g_v, 97, 251, 0)) return 0; { int ret = slice_del(z); /* delete, line 150 */ if (ret < 0) return ret; } break; case 13: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 155 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_30); /* <-, line 155 */ if (ret < 0) return ret; } return 0; /* fail, line 155 */ break; case 14: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 156 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_31); /* <-, line 156 */ if (ret < 0) return ret; } return 0; /* fail, line 156 */ break; case 15: { int m_test = z->l - z->c; /* test, line 158 */ if (in_grouping_b(z, g_v, 97, 251, 0)) return 0; { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 158 */ if (ret < 0) return ret; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 158 */ if (ret < 0) return ret; } return 0; /* fail, line 158 */ break; } return 1; } static int r_i_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 163 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 163 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 164 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68944418 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_5, 35); /* substring, line 164 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 164 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: if (out_grouping_b(z, g_v, 97, 251, 0)) { z->lb = mlimit; return 0; } { int ret = slice_del(z); /* delete, line 170 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 174 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 174 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 175 */ among_var = find_among_b(z, a_6, 38); /* substring, line 175 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 175 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = r_R2(z); if (ret == 0) { z->lb = mlimit; return 0; } /* call R2, line 177 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 177 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 185 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 190 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 191 */ z->ket = z->c; /* [, line 191 */ if (!(eq_s_b(z, 1, s_32))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 191 */ { int ret = slice_del(z); /* delete, line 191 */ if (ret < 0) return ret; } lab0: ; } break; } z->lb = mlimit; } return 1; } static int r_residual_suffix(struct SN_env * z) { int among_var; { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 199 */ z->ket = z->c; /* [, line 199 */ if (!(eq_s_b(z, 1, s_33))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 199 */ { int m_test = z->l - z->c; /* test, line 199 */ if (out_grouping_b(z, g_keep_with_s, 97, 232, 0)) { z->c = z->l - m_keep; goto lab0; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 199 */ if (ret < 0) return ret; } lab0: ; } { int mlimit; /* setlimit, line 200 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 200 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 201 */ among_var = find_among_b(z, a_7, 7); /* substring, line 201 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 201 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = r_R2(z); if (ret == 0) { z->lb = mlimit; return 0; } /* call R2, line 202 */ if (ret < 0) return ret; } { int m2 = z->l - z->c; (void)m2; /* or, line 202 */ if (!(eq_s_b(z, 1, s_34))) goto lab2; goto lab1; lab2: z->c = z->l - m2; if (!(eq_s_b(z, 1, s_35))) { z->lb = mlimit; return 0; } } lab1: { int ret = slice_del(z); /* delete, line 202 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_36); /* <-, line 204 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 205 */ if (ret < 0) return ret; } break; case 4: if (!(eq_s_b(z, 2, s_37))) { z->lb = mlimit; return 0; } { int ret = slice_del(z); /* delete, line 206 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_un_double(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 212 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1069056 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_8, 5))) return 0; /* among, line 212 */ z->c = z->l - m_test; } z->ket = z->c; /* [, line 212 */ if (z->c <= z->lb) return 0; z->c--; /* next, line 212 */ z->bra = z->c; /* ], line 212 */ { int ret = slice_del(z); /* delete, line 212 */ if (ret < 0) return ret; } return 1; } static int r_un_accent(struct SN_env * z) { { int i = 1; while(1) { /* atleast, line 216 */ if (out_grouping_b(z, g_v, 97, 251, 0)) goto lab0; i--; continue; lab0: break; } if (i > 0) return 0; } z->ket = z->c; /* [, line 217 */ { int m1 = z->l - z->c; (void)m1; /* or, line 217 */ if (!(eq_s_b(z, 1, s_38))) goto lab2; goto lab1; lab2: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_39))) return 0; } lab1: z->bra = z->c; /* ], line 217 */ { int ret = slice_from_s(z, 1, s_40); /* <-, line 217 */ if (ret < 0) return ret; } return 1; } extern int french_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 223 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 223 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 224 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 224 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 225 */ { int m3 = z->l - z->c; (void)m3; /* do, line 227 */ { int m4 = z->l - z->c; (void)m4; /* or, line 237 */ { int m5 = z->l - z->c; (void)m5; /* and, line 233 */ { int m6 = z->l - z->c; (void)m6; /* or, line 229 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab6; /* call standard_suffix, line 229 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m6; { int ret = r_i_verb_suffix(z); if (ret == 0) goto lab7; /* call i_verb_suffix, line 230 */ if (ret < 0) return ret; } goto lab5; lab7: z->c = z->l - m6; { int ret = r_verb_suffix(z); if (ret == 0) goto lab4; /* call verb_suffix, line 231 */ if (ret < 0) return ret; } } lab5: z->c = z->l - m5; { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 234 */ z->ket = z->c; /* [, line 234 */ { int m7 = z->l - z->c; (void)m7; /* or, line 234 */ if (!(eq_s_b(z, 1, s_41))) goto lab10; z->bra = z->c; /* ], line 234 */ { int ret = slice_from_s(z, 1, s_42); /* <-, line 234 */ if (ret < 0) return ret; } goto lab9; lab10: z->c = z->l - m7; if (!(eq_s_b(z, 1, s_43))) { z->c = z->l - m_keep; goto lab8; } z->bra = z->c; /* ], line 235 */ { int ret = slice_from_s(z, 1, s_44); /* <-, line 235 */ if (ret < 0) return ret; } } lab9: lab8: ; } } goto lab3; lab4: z->c = z->l - m4; { int ret = r_residual_suffix(z); if (ret == 0) goto lab2; /* call residual_suffix, line 238 */ if (ret < 0) return ret; } } lab3: lab2: z->c = z->l - m3; } { int m8 = z->l - z->c; (void)m8; /* do, line 243 */ { int ret = r_un_double(z); if (ret == 0) goto lab11; /* call un_double, line 243 */ if (ret < 0) return ret; } lab11: z->c = z->l - m8; } { int m9 = z->l - z->c; (void)m9; /* do, line 244 */ { int ret = r_un_accent(z); if (ret == 0) goto lab12; /* call un_accent, line 244 */ if (ret < 0) return ret; } lab12: z->c = z->l - m9; } z->c = z->lb; { int c10 = z->c; /* do, line 246 */ { int ret = r_postlude(z); if (ret == 0) goto lab13; /* call postlude, line 246 */ if (ret < 0) return ret; } lab13: z->c = c10; } return 1; } extern struct SN_env * french_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); } extern void french_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_french.h000066400000000000000000000005021456444476200312300ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* french_ISO_8859_1_create_env(void); extern void french_ISO_8859_1_close_env(struct SN_env* z); extern int french_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_german.c000066400000000000000000000424351456444476200312420ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int german_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_standard_suffix(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * german_ISO_8859_1_create_env(void); extern void german_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[1] = { 'U' }; static const symbol s_0_2[1] = { 'Y' }; static const symbol s_0_3[1] = { 0xE4 }; static const symbol s_0_4[1] = { 0xF6 }; static const symbol s_0_5[1] = { 0xFC }; static const struct among a_0[6] = { /* 0 */ { 0, 0, -1, 6, 0}, /* 1 */ { 1, s_0_1, 0, 2, 0}, /* 2 */ { 1, s_0_2, 0, 1, 0}, /* 3 */ { 1, s_0_3, 0, 3, 0}, /* 4 */ { 1, s_0_4, 0, 4, 0}, /* 5 */ { 1, s_0_5, 0, 5, 0} }; static const symbol s_1_0[1] = { 'e' }; static const symbol s_1_1[2] = { 'e', 'm' }; static const symbol s_1_2[2] = { 'e', 'n' }; static const symbol s_1_3[3] = { 'e', 'r', 'n' }; static const symbol s_1_4[2] = { 'e', 'r' }; static const symbol s_1_5[1] = { 's' }; static const symbol s_1_6[2] = { 'e', 's' }; static const struct among a_1[7] = { /* 0 */ { 1, s_1_0, -1, 2, 0}, /* 1 */ { 2, s_1_1, -1, 1, 0}, /* 2 */ { 2, s_1_2, -1, 2, 0}, /* 3 */ { 3, s_1_3, -1, 1, 0}, /* 4 */ { 2, s_1_4, -1, 1, 0}, /* 5 */ { 1, s_1_5, -1, 3, 0}, /* 6 */ { 2, s_1_6, 5, 2, 0} }; static const symbol s_2_0[2] = { 'e', 'n' }; static const symbol s_2_1[2] = { 'e', 'r' }; static const symbol s_2_2[2] = { 's', 't' }; static const symbol s_2_3[3] = { 'e', 's', 't' }; static const struct among a_2[4] = { /* 0 */ { 2, s_2_0, -1, 1, 0}, /* 1 */ { 2, s_2_1, -1, 1, 0}, /* 2 */ { 2, s_2_2, -1, 2, 0}, /* 3 */ { 3, s_2_3, 2, 1, 0} }; static const symbol s_3_0[2] = { 'i', 'g' }; static const symbol s_3_1[4] = { 'l', 'i', 'c', 'h' }; static const struct among a_3[2] = { /* 0 */ { 2, s_3_0, -1, 1, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0} }; static const symbol s_4_0[3] = { 'e', 'n', 'd' }; static const symbol s_4_1[2] = { 'i', 'g' }; static const symbol s_4_2[3] = { 'u', 'n', 'g' }; static const symbol s_4_3[4] = { 'l', 'i', 'c', 'h' }; static const symbol s_4_4[4] = { 'i', 's', 'c', 'h' }; static const symbol s_4_5[2] = { 'i', 'k' }; static const symbol s_4_6[4] = { 'h', 'e', 'i', 't' }; static const symbol s_4_7[4] = { 'k', 'e', 'i', 't' }; static const struct among a_4[8] = { /* 0 */ { 3, s_4_0, -1, 1, 0}, /* 1 */ { 2, s_4_1, -1, 2, 0}, /* 2 */ { 3, s_4_2, -1, 1, 0}, /* 3 */ { 4, s_4_3, -1, 3, 0}, /* 4 */ { 4, s_4_4, -1, 2, 0}, /* 5 */ { 2, s_4_5, -1, 2, 0}, /* 6 */ { 4, s_4_6, -1, 3, 0}, /* 7 */ { 4, s_4_7, -1, 4, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 }; static const unsigned char g_s_ending[] = { 117, 30, 5 }; static const unsigned char g_st_ending[] = { 117, 30, 4 }; static const symbol s_0[] = { 0xDF }; static const symbol s_1[] = { 's', 's' }; static const symbol s_2[] = { 'u' }; static const symbol s_3[] = { 'U' }; static const symbol s_4[] = { 'y' }; static const symbol s_5[] = { 'Y' }; static const symbol s_6[] = { 'y' }; static const symbol s_7[] = { 'u' }; static const symbol s_8[] = { 'a' }; static const symbol s_9[] = { 'o' }; static const symbol s_10[] = { 'u' }; static const symbol s_11[] = { 's' }; static const symbol s_12[] = { 'n', 'i', 's' }; static const symbol s_13[] = { 'i', 'g' }; static const symbol s_14[] = { 'e' }; static const symbol s_15[] = { 'e' }; static const symbol s_16[] = { 'e', 'r' }; static const symbol s_17[] = { 'e', 'n' }; static int r_prelude(struct SN_env * z) { { int c_test = z->c; /* test, line 35 */ while(1) { /* repeat, line 35 */ int c1 = z->c; { int c2 = z->c; /* or, line 38 */ z->bra = z->c; /* [, line 37 */ if (!(eq_s(z, 1, s_0))) goto lab2; z->ket = z->c; /* ], line 37 */ { int ret = slice_from_s(z, 2, s_1); /* <-, line 37 */ if (ret < 0) return ret; } goto lab1; lab2: z->c = c2; if (z->c >= z->l) goto lab0; z->c++; /* next, line 38 */ } lab1: continue; lab0: z->c = c1; break; } z->c = c_test; } while(1) { /* repeat, line 41 */ int c3 = z->c; while(1) { /* goto, line 41 */ int c4 = z->c; if (in_grouping(z, g_v, 97, 252, 0)) goto lab4; z->bra = z->c; /* [, line 42 */ { int c5 = z->c; /* or, line 42 */ if (!(eq_s(z, 1, s_2))) goto lab6; z->ket = z->c; /* ], line 42 */ if (in_grouping(z, g_v, 97, 252, 0)) goto lab6; { int ret = slice_from_s(z, 1, s_3); /* <-, line 42 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = c5; if (!(eq_s(z, 1, s_4))) goto lab4; z->ket = z->c; /* ], line 43 */ if (in_grouping(z, g_v, 97, 252, 0)) goto lab4; { int ret = slice_from_s(z, 1, s_5); /* <-, line 43 */ if (ret < 0) return ret; } } lab5: z->c = c4; break; lab4: z->c = c4; if (z->c >= z->l) goto lab3; z->c++; /* goto, line 41 */ } continue; lab3: z->c = c3; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; { int c_test = z->c; /* test, line 52 */ { int ret = z->c + 3; if (0 > ret || ret > z->l) return 0; z->c = ret; /* hop, line 52 */ } z->I[2] = z->c; /* setmark x, line 52 */ z->c = c_test; } { /* gopast */ /* grouping v, line 54 */ int ret = out_grouping(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } { /* gopast */ /* non v, line 54 */ int ret = in_grouping(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 54 */ /* try, line 55 */ if (!(z->I[0] < z->I[2])) goto lab0; z->I[0] = z->I[2]; lab0: { /* gopast */ /* grouping v, line 56 */ int ret = out_grouping(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } { /* gopast */ /* non v, line 56 */ int ret = in_grouping(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 56 */ return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 60 */ int c1 = z->c; z->bra = z->c; /* [, line 62 */ among_var = find_among(z, a_0, 6); /* substring, line 62 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 62 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_6); /* <-, line 63 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_7); /* <-, line 64 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_8); /* <-, line 65 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_9); /* <-, line 66 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_10); /* <-, line 67 */ if (ret < 0) return ret; } break; case 6: if (z->c >= z->l) goto lab0; z->c++; /* next, line 68 */ break; } continue; lab0: z->c = c1; break; } return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; { int m1 = z->l - z->c; (void)m1; /* do, line 79 */ z->ket = z->c; /* [, line 80 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((811040 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0; among_var = find_among_b(z, a_1, 7); /* substring, line 80 */ if (!(among_var)) goto lab0; z->bra = z->c; /* ], line 80 */ { int ret = r_R1(z); if (ret == 0) goto lab0; /* call R1, line 80 */ if (ret < 0) return ret; } switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_del(z); /* delete, line 82 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 85 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 86 */ z->ket = z->c; /* [, line 86 */ if (!(eq_s_b(z, 1, s_11))) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 86 */ if (!(eq_s_b(z, 3, s_12))) { z->c = z->l - m_keep; goto lab1; } { int ret = slice_del(z); /* delete, line 86 */ if (ret < 0) return ret; } lab1: ; } break; case 3: if (in_grouping_b(z, g_s_ending, 98, 116, 0)) goto lab0; { int ret = slice_del(z); /* delete, line 89 */ if (ret < 0) return ret; } break; } lab0: z->c = z->l - m1; } { int m2 = z->l - z->c; (void)m2; /* do, line 93 */ z->ket = z->c; /* [, line 94 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2; among_var = find_among_b(z, a_2, 4); /* substring, line 94 */ if (!(among_var)) goto lab2; z->bra = z->c; /* ], line 94 */ { int ret = r_R1(z); if (ret == 0) goto lab2; /* call R1, line 94 */ if (ret < 0) return ret; } switch(among_var) { case 0: goto lab2; case 1: { int ret = slice_del(z); /* delete, line 96 */ if (ret < 0) return ret; } break; case 2: if (in_grouping_b(z, g_st_ending, 98, 116, 0)) goto lab2; { int ret = z->c - 3; if (z->lb > ret || ret > z->l) goto lab2; z->c = ret; /* hop, line 99 */ } { int ret = slice_del(z); /* delete, line 99 */ if (ret < 0) return ret; } break; } lab2: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 103 */ z->ket = z->c; /* [, line 104 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab3; among_var = find_among_b(z, a_4, 8); /* substring, line 104 */ if (!(among_var)) goto lab3; z->bra = z->c; /* ], line 104 */ { int ret = r_R2(z); if (ret == 0) goto lab3; /* call R2, line 104 */ if (ret < 0) return ret; } switch(among_var) { case 0: goto lab3; case 1: { int ret = slice_del(z); /* delete, line 106 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 107 */ z->ket = z->c; /* [, line 107 */ if (!(eq_s_b(z, 2, s_13))) { z->c = z->l - m_keep; goto lab4; } z->bra = z->c; /* ], line 107 */ { int m4 = z->l - z->c; (void)m4; /* not, line 107 */ if (!(eq_s_b(z, 1, s_14))) goto lab5; { z->c = z->l - m_keep; goto lab4; } lab5: z->c = z->l - m4; } { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call R2, line 107 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 107 */ if (ret < 0) return ret; } lab4: ; } break; case 2: { int m5 = z->l - z->c; (void)m5; /* not, line 110 */ if (!(eq_s_b(z, 1, s_15))) goto lab6; goto lab3; lab6: z->c = z->l - m5; } { int ret = slice_del(z); /* delete, line 110 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 113 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 114 */ z->ket = z->c; /* [, line 115 */ { int m6 = z->l - z->c; (void)m6; /* or, line 115 */ if (!(eq_s_b(z, 2, s_16))) goto lab9; goto lab8; lab9: z->c = z->l - m6; if (!(eq_s_b(z, 2, s_17))) { z->c = z->l - m_keep; goto lab7; } } lab8: z->bra = z->c; /* ], line 115 */ { int ret = r_R1(z); if (ret == 0) { z->c = z->l - m_keep; goto lab7; } /* call R1, line 115 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 115 */ if (ret < 0) return ret; } lab7: ; } break; case 4: { int ret = slice_del(z); /* delete, line 119 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 120 */ z->ket = z->c; /* [, line 121 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m_keep; goto lab10; } among_var = find_among_b(z, a_3, 2); /* substring, line 121 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab10; } z->bra = z->c; /* ], line 121 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab10; } /* call R2, line 121 */ if (ret < 0) return ret; } switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab10; } case 1: { int ret = slice_del(z); /* delete, line 123 */ if (ret < 0) return ret; } break; } lab10: ; } break; } lab3: z->c = z->l - m3; } return 1; } extern int german_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 134 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 134 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 135 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 135 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 136 */ { int m3 = z->l - z->c; (void)m3; /* do, line 137 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab2; /* call standard_suffix, line 137 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } z->c = z->lb; { int c4 = z->c; /* do, line 138 */ { int ret = r_postlude(z); if (ret == 0) goto lab3; /* call postlude, line 138 */ if (ret < 0) return ret; } lab3: z->c = c4; } return 1; } extern struct SN_env * german_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); } extern void german_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_german.h000066400000000000000000000005021456444476200312340ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* german_ISO_8859_1_create_env(void); extern void german_ISO_8859_1_close_env(struct SN_env* z); extern int german_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.c000066400000000000000000001176071456444476200317510ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int hungarian_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_double(struct SN_env * z); static int r_undouble(struct SN_env * z); static int r_factive(struct SN_env * z); static int r_instrum(struct SN_env * z); static int r_plur_owner(struct SN_env * z); static int r_sing_owner(struct SN_env * z); static int r_owned(struct SN_env * z); static int r_plural(struct SN_env * z); static int r_case_other(struct SN_env * z); static int r_case_special(struct SN_env * z); static int r_case(struct SN_env * z); static int r_v_ending(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * hungarian_ISO_8859_1_create_env(void); extern void hungarian_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[2] = { 'c', 's' }; static const symbol s_0_1[3] = { 'd', 'z', 's' }; static const symbol s_0_2[2] = { 'g', 'y' }; static const symbol s_0_3[2] = { 'l', 'y' }; static const symbol s_0_4[2] = { 'n', 'y' }; static const symbol s_0_5[2] = { 's', 'z' }; static const symbol s_0_6[2] = { 't', 'y' }; static const symbol s_0_7[2] = { 'z', 's' }; static const struct among a_0[8] = { /* 0 */ { 2, s_0_0, -1, -1, 0}, /* 1 */ { 3, s_0_1, -1, -1, 0}, /* 2 */ { 2, s_0_2, -1, -1, 0}, /* 3 */ { 2, s_0_3, -1, -1, 0}, /* 4 */ { 2, s_0_4, -1, -1, 0}, /* 5 */ { 2, s_0_5, -1, -1, 0}, /* 6 */ { 2, s_0_6, -1, -1, 0}, /* 7 */ { 2, s_0_7, -1, -1, 0} }; static const symbol s_1_0[1] = { 0xE1 }; static const symbol s_1_1[1] = { 0xE9 }; static const struct among a_1[2] = { /* 0 */ { 1, s_1_0, -1, 1, 0}, /* 1 */ { 1, s_1_1, -1, 2, 0} }; static const symbol s_2_0[2] = { 'b', 'b' }; static const symbol s_2_1[2] = { 'c', 'c' }; static const symbol s_2_2[2] = { 'd', 'd' }; static const symbol s_2_3[2] = { 'f', 'f' }; static const symbol s_2_4[2] = { 'g', 'g' }; static const symbol s_2_5[2] = { 'j', 'j' }; static const symbol s_2_6[2] = { 'k', 'k' }; static const symbol s_2_7[2] = { 'l', 'l' }; static const symbol s_2_8[2] = { 'm', 'm' }; static const symbol s_2_9[2] = { 'n', 'n' }; static const symbol s_2_10[2] = { 'p', 'p' }; static const symbol s_2_11[2] = { 'r', 'r' }; static const symbol s_2_12[3] = { 'c', 'c', 's' }; static const symbol s_2_13[2] = { 's', 's' }; static const symbol s_2_14[3] = { 'z', 'z', 's' }; static const symbol s_2_15[2] = { 't', 't' }; static const symbol s_2_16[2] = { 'v', 'v' }; static const symbol s_2_17[3] = { 'g', 'g', 'y' }; static const symbol s_2_18[3] = { 'l', 'l', 'y' }; static const symbol s_2_19[3] = { 'n', 'n', 'y' }; static const symbol s_2_20[3] = { 't', 't', 'y' }; static const symbol s_2_21[3] = { 's', 's', 'z' }; static const symbol s_2_22[2] = { 'z', 'z' }; static const struct among a_2[23] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 2, s_2_1, -1, -1, 0}, /* 2 */ { 2, s_2_2, -1, -1, 0}, /* 3 */ { 2, s_2_3, -1, -1, 0}, /* 4 */ { 2, s_2_4, -1, -1, 0}, /* 5 */ { 2, s_2_5, -1, -1, 0}, /* 6 */ { 2, s_2_6, -1, -1, 0}, /* 7 */ { 2, s_2_7, -1, -1, 0}, /* 8 */ { 2, s_2_8, -1, -1, 0}, /* 9 */ { 2, s_2_9, -1, -1, 0}, /* 10 */ { 2, s_2_10, -1, -1, 0}, /* 11 */ { 2, s_2_11, -1, -1, 0}, /* 12 */ { 3, s_2_12, -1, -1, 0}, /* 13 */ { 2, s_2_13, -1, -1, 0}, /* 14 */ { 3, s_2_14, -1, -1, 0}, /* 15 */ { 2, s_2_15, -1, -1, 0}, /* 16 */ { 2, s_2_16, -1, -1, 0}, /* 17 */ { 3, s_2_17, -1, -1, 0}, /* 18 */ { 3, s_2_18, -1, -1, 0}, /* 19 */ { 3, s_2_19, -1, -1, 0}, /* 20 */ { 3, s_2_20, -1, -1, 0}, /* 21 */ { 3, s_2_21, -1, -1, 0}, /* 22 */ { 2, s_2_22, -1, -1, 0} }; static const symbol s_3_0[2] = { 'a', 'l' }; static const symbol s_3_1[2] = { 'e', 'l' }; static const struct among a_3[2] = { /* 0 */ { 2, s_3_0, -1, 1, 0}, /* 1 */ { 2, s_3_1, -1, 2, 0} }; static const symbol s_4_0[2] = { 'b', 'a' }; static const symbol s_4_1[2] = { 'r', 'a' }; static const symbol s_4_2[2] = { 'b', 'e' }; static const symbol s_4_3[2] = { 'r', 'e' }; static const symbol s_4_4[2] = { 'i', 'g' }; static const symbol s_4_5[3] = { 'n', 'a', 'k' }; static const symbol s_4_6[3] = { 'n', 'e', 'k' }; static const symbol s_4_7[3] = { 'v', 'a', 'l' }; static const symbol s_4_8[3] = { 'v', 'e', 'l' }; static const symbol s_4_9[2] = { 'u', 'l' }; static const symbol s_4_10[3] = { 'n', 0xE1, 'l' }; static const symbol s_4_11[3] = { 'n', 0xE9, 'l' }; static const symbol s_4_12[3] = { 'b', 0xF3, 'l' }; static const symbol s_4_13[3] = { 'r', 0xF3, 'l' }; static const symbol s_4_14[3] = { 't', 0xF3, 'l' }; static const symbol s_4_15[3] = { 'b', 0xF5, 'l' }; static const symbol s_4_16[3] = { 'r', 0xF5, 'l' }; static const symbol s_4_17[3] = { 't', 0xF5, 'l' }; static const symbol s_4_18[2] = { 0xFC, 'l' }; static const symbol s_4_19[1] = { 'n' }; static const symbol s_4_20[2] = { 'a', 'n' }; static const symbol s_4_21[3] = { 'b', 'a', 'n' }; static const symbol s_4_22[2] = { 'e', 'n' }; static const symbol s_4_23[3] = { 'b', 'e', 'n' }; static const symbol s_4_24[6] = { 'k', 0xE9, 'p', 'p', 'e', 'n' }; static const symbol s_4_25[2] = { 'o', 'n' }; static const symbol s_4_26[2] = { 0xF6, 'n' }; static const symbol s_4_27[4] = { 'k', 0xE9, 'p', 'p' }; static const symbol s_4_28[3] = { 'k', 'o', 'r' }; static const symbol s_4_29[1] = { 't' }; static const symbol s_4_30[2] = { 'a', 't' }; static const symbol s_4_31[2] = { 'e', 't' }; static const symbol s_4_32[4] = { 'k', 0xE9, 'n', 't' }; static const symbol s_4_33[6] = { 'a', 'n', 'k', 0xE9, 'n', 't' }; static const symbol s_4_34[6] = { 'e', 'n', 'k', 0xE9, 'n', 't' }; static const symbol s_4_35[6] = { 'o', 'n', 'k', 0xE9, 'n', 't' }; static const symbol s_4_36[2] = { 'o', 't' }; static const symbol s_4_37[3] = { 0xE9, 'r', 't' }; static const symbol s_4_38[2] = { 0xF6, 't' }; static const symbol s_4_39[3] = { 'h', 'e', 'z' }; static const symbol s_4_40[3] = { 'h', 'o', 'z' }; static const symbol s_4_41[3] = { 'h', 0xF6, 'z' }; static const symbol s_4_42[2] = { 'v', 0xE1 }; static const symbol s_4_43[2] = { 'v', 0xE9 }; static const struct among a_4[44] = { /* 0 */ { 2, s_4_0, -1, -1, 0}, /* 1 */ { 2, s_4_1, -1, -1, 0}, /* 2 */ { 2, s_4_2, -1, -1, 0}, /* 3 */ { 2, s_4_3, -1, -1, 0}, /* 4 */ { 2, s_4_4, -1, -1, 0}, /* 5 */ { 3, s_4_5, -1, -1, 0}, /* 6 */ { 3, s_4_6, -1, -1, 0}, /* 7 */ { 3, s_4_7, -1, -1, 0}, /* 8 */ { 3, s_4_8, -1, -1, 0}, /* 9 */ { 2, s_4_9, -1, -1, 0}, /* 10 */ { 3, s_4_10, -1, -1, 0}, /* 11 */ { 3, s_4_11, -1, -1, 0}, /* 12 */ { 3, s_4_12, -1, -1, 0}, /* 13 */ { 3, s_4_13, -1, -1, 0}, /* 14 */ { 3, s_4_14, -1, -1, 0}, /* 15 */ { 3, s_4_15, -1, -1, 0}, /* 16 */ { 3, s_4_16, -1, -1, 0}, /* 17 */ { 3, s_4_17, -1, -1, 0}, /* 18 */ { 2, s_4_18, -1, -1, 0}, /* 19 */ { 1, s_4_19, -1, -1, 0}, /* 20 */ { 2, s_4_20, 19, -1, 0}, /* 21 */ { 3, s_4_21, 20, -1, 0}, /* 22 */ { 2, s_4_22, 19, -1, 0}, /* 23 */ { 3, s_4_23, 22, -1, 0}, /* 24 */ { 6, s_4_24, 22, -1, 0}, /* 25 */ { 2, s_4_25, 19, -1, 0}, /* 26 */ { 2, s_4_26, 19, -1, 0}, /* 27 */ { 4, s_4_27, -1, -1, 0}, /* 28 */ { 3, s_4_28, -1, -1, 0}, /* 29 */ { 1, s_4_29, -1, -1, 0}, /* 30 */ { 2, s_4_30, 29, -1, 0}, /* 31 */ { 2, s_4_31, 29, -1, 0}, /* 32 */ { 4, s_4_32, 29, -1, 0}, /* 33 */ { 6, s_4_33, 32, -1, 0}, /* 34 */ { 6, s_4_34, 32, -1, 0}, /* 35 */ { 6, s_4_35, 32, -1, 0}, /* 36 */ { 2, s_4_36, 29, -1, 0}, /* 37 */ { 3, s_4_37, 29, -1, 0}, /* 38 */ { 2, s_4_38, 29, -1, 0}, /* 39 */ { 3, s_4_39, -1, -1, 0}, /* 40 */ { 3, s_4_40, -1, -1, 0}, /* 41 */ { 3, s_4_41, -1, -1, 0}, /* 42 */ { 2, s_4_42, -1, -1, 0}, /* 43 */ { 2, s_4_43, -1, -1, 0} }; static const symbol s_5_0[2] = { 0xE1, 'n' }; static const symbol s_5_1[2] = { 0xE9, 'n' }; static const symbol s_5_2[6] = { 0xE1, 'n', 'k', 0xE9, 'n', 't' }; static const struct among a_5[3] = { /* 0 */ { 2, s_5_0, -1, 2, 0}, /* 1 */ { 2, s_5_1, -1, 1, 0}, /* 2 */ { 6, s_5_2, -1, 3, 0} }; static const symbol s_6_0[4] = { 's', 't', 'u', 'l' }; static const symbol s_6_1[5] = { 'a', 's', 't', 'u', 'l' }; static const symbol s_6_2[5] = { 0xE1, 's', 't', 'u', 'l' }; static const symbol s_6_3[4] = { 's', 't', 0xFC, 'l' }; static const symbol s_6_4[5] = { 'e', 's', 't', 0xFC, 'l' }; static const symbol s_6_5[5] = { 0xE9, 's', 't', 0xFC, 'l' }; static const struct among a_6[6] = { /* 0 */ { 4, s_6_0, -1, 2, 0}, /* 1 */ { 5, s_6_1, 0, 1, 0}, /* 2 */ { 5, s_6_2, 0, 3, 0}, /* 3 */ { 4, s_6_3, -1, 2, 0}, /* 4 */ { 5, s_6_4, 3, 1, 0}, /* 5 */ { 5, s_6_5, 3, 4, 0} }; static const symbol s_7_0[1] = { 0xE1 }; static const symbol s_7_1[1] = { 0xE9 }; static const struct among a_7[2] = { /* 0 */ { 1, s_7_0, -1, 1, 0}, /* 1 */ { 1, s_7_1, -1, 2, 0} }; static const symbol s_8_0[1] = { 'k' }; static const symbol s_8_1[2] = { 'a', 'k' }; static const symbol s_8_2[2] = { 'e', 'k' }; static const symbol s_8_3[2] = { 'o', 'k' }; static const symbol s_8_4[2] = { 0xE1, 'k' }; static const symbol s_8_5[2] = { 0xE9, 'k' }; static const symbol s_8_6[2] = { 0xF6, 'k' }; static const struct among a_8[7] = { /* 0 */ { 1, s_8_0, -1, 7, 0}, /* 1 */ { 2, s_8_1, 0, 4, 0}, /* 2 */ { 2, s_8_2, 0, 6, 0}, /* 3 */ { 2, s_8_3, 0, 5, 0}, /* 4 */ { 2, s_8_4, 0, 1, 0}, /* 5 */ { 2, s_8_5, 0, 2, 0}, /* 6 */ { 2, s_8_6, 0, 3, 0} }; static const symbol s_9_0[2] = { 0xE9, 'i' }; static const symbol s_9_1[3] = { 0xE1, 0xE9, 'i' }; static const symbol s_9_2[3] = { 0xE9, 0xE9, 'i' }; static const symbol s_9_3[1] = { 0xE9 }; static const symbol s_9_4[2] = { 'k', 0xE9 }; static const symbol s_9_5[3] = { 'a', 'k', 0xE9 }; static const symbol s_9_6[3] = { 'e', 'k', 0xE9 }; static const symbol s_9_7[3] = { 'o', 'k', 0xE9 }; static const symbol s_9_8[3] = { 0xE1, 'k', 0xE9 }; static const symbol s_9_9[3] = { 0xE9, 'k', 0xE9 }; static const symbol s_9_10[3] = { 0xF6, 'k', 0xE9 }; static const symbol s_9_11[2] = { 0xE9, 0xE9 }; static const struct among a_9[12] = { /* 0 */ { 2, s_9_0, -1, 7, 0}, /* 1 */ { 3, s_9_1, 0, 6, 0}, /* 2 */ { 3, s_9_2, 0, 5, 0}, /* 3 */ { 1, s_9_3, -1, 9, 0}, /* 4 */ { 2, s_9_4, 3, 4, 0}, /* 5 */ { 3, s_9_5, 4, 1, 0}, /* 6 */ { 3, s_9_6, 4, 1, 0}, /* 7 */ { 3, s_9_7, 4, 1, 0}, /* 8 */ { 3, s_9_8, 4, 3, 0}, /* 9 */ { 3, s_9_9, 4, 2, 0}, /* 10 */ { 3, s_9_10, 4, 1, 0}, /* 11 */ { 2, s_9_11, 3, 8, 0} }; static const symbol s_10_0[1] = { 'a' }; static const symbol s_10_1[2] = { 'j', 'a' }; static const symbol s_10_2[1] = { 'd' }; static const symbol s_10_3[2] = { 'a', 'd' }; static const symbol s_10_4[2] = { 'e', 'd' }; static const symbol s_10_5[2] = { 'o', 'd' }; static const symbol s_10_6[2] = { 0xE1, 'd' }; static const symbol s_10_7[2] = { 0xE9, 'd' }; static const symbol s_10_8[2] = { 0xF6, 'd' }; static const symbol s_10_9[1] = { 'e' }; static const symbol s_10_10[2] = { 'j', 'e' }; static const symbol s_10_11[2] = { 'n', 'k' }; static const symbol s_10_12[3] = { 'u', 'n', 'k' }; static const symbol s_10_13[3] = { 0xE1, 'n', 'k' }; static const symbol s_10_14[3] = { 0xE9, 'n', 'k' }; static const symbol s_10_15[3] = { 0xFC, 'n', 'k' }; static const symbol s_10_16[2] = { 'u', 'k' }; static const symbol s_10_17[3] = { 'j', 'u', 'k' }; static const symbol s_10_18[4] = { 0xE1, 'j', 'u', 'k' }; static const symbol s_10_19[2] = { 0xFC, 'k' }; static const symbol s_10_20[3] = { 'j', 0xFC, 'k' }; static const symbol s_10_21[4] = { 0xE9, 'j', 0xFC, 'k' }; static const symbol s_10_22[1] = { 'm' }; static const symbol s_10_23[2] = { 'a', 'm' }; static const symbol s_10_24[2] = { 'e', 'm' }; static const symbol s_10_25[2] = { 'o', 'm' }; static const symbol s_10_26[2] = { 0xE1, 'm' }; static const symbol s_10_27[2] = { 0xE9, 'm' }; static const symbol s_10_28[1] = { 'o' }; static const symbol s_10_29[1] = { 0xE1 }; static const symbol s_10_30[1] = { 0xE9 }; static const struct among a_10[31] = { /* 0 */ { 1, s_10_0, -1, 18, 0}, /* 1 */ { 2, s_10_1, 0, 17, 0}, /* 2 */ { 1, s_10_2, -1, 16, 0}, /* 3 */ { 2, s_10_3, 2, 13, 0}, /* 4 */ { 2, s_10_4, 2, 13, 0}, /* 5 */ { 2, s_10_5, 2, 13, 0}, /* 6 */ { 2, s_10_6, 2, 14, 0}, /* 7 */ { 2, s_10_7, 2, 15, 0}, /* 8 */ { 2, s_10_8, 2, 13, 0}, /* 9 */ { 1, s_10_9, -1, 18, 0}, /* 10 */ { 2, s_10_10, 9, 17, 0}, /* 11 */ { 2, s_10_11, -1, 4, 0}, /* 12 */ { 3, s_10_12, 11, 1, 0}, /* 13 */ { 3, s_10_13, 11, 2, 0}, /* 14 */ { 3, s_10_14, 11, 3, 0}, /* 15 */ { 3, s_10_15, 11, 1, 0}, /* 16 */ { 2, s_10_16, -1, 8, 0}, /* 17 */ { 3, s_10_17, 16, 7, 0}, /* 18 */ { 4, s_10_18, 17, 5, 0}, /* 19 */ { 2, s_10_19, -1, 8, 0}, /* 20 */ { 3, s_10_20, 19, 7, 0}, /* 21 */ { 4, s_10_21, 20, 6, 0}, /* 22 */ { 1, s_10_22, -1, 12, 0}, /* 23 */ { 2, s_10_23, 22, 9, 0}, /* 24 */ { 2, s_10_24, 22, 9, 0}, /* 25 */ { 2, s_10_25, 22, 9, 0}, /* 26 */ { 2, s_10_26, 22, 10, 0}, /* 27 */ { 2, s_10_27, 22, 11, 0}, /* 28 */ { 1, s_10_28, -1, 18, 0}, /* 29 */ { 1, s_10_29, -1, 19, 0}, /* 30 */ { 1, s_10_30, -1, 20, 0} }; static const symbol s_11_0[2] = { 'i', 'd' }; static const symbol s_11_1[3] = { 'a', 'i', 'd' }; static const symbol s_11_2[4] = { 'j', 'a', 'i', 'd' }; static const symbol s_11_3[3] = { 'e', 'i', 'd' }; static const symbol s_11_4[4] = { 'j', 'e', 'i', 'd' }; static const symbol s_11_5[3] = { 0xE1, 'i', 'd' }; static const symbol s_11_6[3] = { 0xE9, 'i', 'd' }; static const symbol s_11_7[1] = { 'i' }; static const symbol s_11_8[2] = { 'a', 'i' }; static const symbol s_11_9[3] = { 'j', 'a', 'i' }; static const symbol s_11_10[2] = { 'e', 'i' }; static const symbol s_11_11[3] = { 'j', 'e', 'i' }; static const symbol s_11_12[2] = { 0xE1, 'i' }; static const symbol s_11_13[2] = { 0xE9, 'i' }; static const symbol s_11_14[4] = { 'i', 't', 'e', 'k' }; static const symbol s_11_15[5] = { 'e', 'i', 't', 'e', 'k' }; static const symbol s_11_16[6] = { 'j', 'e', 'i', 't', 'e', 'k' }; static const symbol s_11_17[5] = { 0xE9, 'i', 't', 'e', 'k' }; static const symbol s_11_18[2] = { 'i', 'k' }; static const symbol s_11_19[3] = { 'a', 'i', 'k' }; static const symbol s_11_20[4] = { 'j', 'a', 'i', 'k' }; static const symbol s_11_21[3] = { 'e', 'i', 'k' }; static const symbol s_11_22[4] = { 'j', 'e', 'i', 'k' }; static const symbol s_11_23[3] = { 0xE1, 'i', 'k' }; static const symbol s_11_24[3] = { 0xE9, 'i', 'k' }; static const symbol s_11_25[3] = { 'i', 'n', 'k' }; static const symbol s_11_26[4] = { 'a', 'i', 'n', 'k' }; static const symbol s_11_27[5] = { 'j', 'a', 'i', 'n', 'k' }; static const symbol s_11_28[4] = { 'e', 'i', 'n', 'k' }; static const symbol s_11_29[5] = { 'j', 'e', 'i', 'n', 'k' }; static const symbol s_11_30[4] = { 0xE1, 'i', 'n', 'k' }; static const symbol s_11_31[4] = { 0xE9, 'i', 'n', 'k' }; static const symbol s_11_32[5] = { 'a', 'i', 't', 'o', 'k' }; static const symbol s_11_33[6] = { 'j', 'a', 'i', 't', 'o', 'k' }; static const symbol s_11_34[5] = { 0xE1, 'i', 't', 'o', 'k' }; static const symbol s_11_35[2] = { 'i', 'm' }; static const symbol s_11_36[3] = { 'a', 'i', 'm' }; static const symbol s_11_37[4] = { 'j', 'a', 'i', 'm' }; static const symbol s_11_38[3] = { 'e', 'i', 'm' }; static const symbol s_11_39[4] = { 'j', 'e', 'i', 'm' }; static const symbol s_11_40[3] = { 0xE1, 'i', 'm' }; static const symbol s_11_41[3] = { 0xE9, 'i', 'm' }; static const struct among a_11[42] = { /* 0 */ { 2, s_11_0, -1, 10, 0}, /* 1 */ { 3, s_11_1, 0, 9, 0}, /* 2 */ { 4, s_11_2, 1, 6, 0}, /* 3 */ { 3, s_11_3, 0, 9, 0}, /* 4 */ { 4, s_11_4, 3, 6, 0}, /* 5 */ { 3, s_11_5, 0, 7, 0}, /* 6 */ { 3, s_11_6, 0, 8, 0}, /* 7 */ { 1, s_11_7, -1, 15, 0}, /* 8 */ { 2, s_11_8, 7, 14, 0}, /* 9 */ { 3, s_11_9, 8, 11, 0}, /* 10 */ { 2, s_11_10, 7, 14, 0}, /* 11 */ { 3, s_11_11, 10, 11, 0}, /* 12 */ { 2, s_11_12, 7, 12, 0}, /* 13 */ { 2, s_11_13, 7, 13, 0}, /* 14 */ { 4, s_11_14, -1, 24, 0}, /* 15 */ { 5, s_11_15, 14, 21, 0}, /* 16 */ { 6, s_11_16, 15, 20, 0}, /* 17 */ { 5, s_11_17, 14, 23, 0}, /* 18 */ { 2, s_11_18, -1, 29, 0}, /* 19 */ { 3, s_11_19, 18, 26, 0}, /* 20 */ { 4, s_11_20, 19, 25, 0}, /* 21 */ { 3, s_11_21, 18, 26, 0}, /* 22 */ { 4, s_11_22, 21, 25, 0}, /* 23 */ { 3, s_11_23, 18, 27, 0}, /* 24 */ { 3, s_11_24, 18, 28, 0}, /* 25 */ { 3, s_11_25, -1, 20, 0}, /* 26 */ { 4, s_11_26, 25, 17, 0}, /* 27 */ { 5, s_11_27, 26, 16, 0}, /* 28 */ { 4, s_11_28, 25, 17, 0}, /* 29 */ { 5, s_11_29, 28, 16, 0}, /* 30 */ { 4, s_11_30, 25, 18, 0}, /* 31 */ { 4, s_11_31, 25, 19, 0}, /* 32 */ { 5, s_11_32, -1, 21, 0}, /* 33 */ { 6, s_11_33, 32, 20, 0}, /* 34 */ { 5, s_11_34, -1, 22, 0}, /* 35 */ { 2, s_11_35, -1, 5, 0}, /* 36 */ { 3, s_11_36, 35, 4, 0}, /* 37 */ { 4, s_11_37, 36, 1, 0}, /* 38 */ { 3, s_11_38, 35, 4, 0}, /* 39 */ { 4, s_11_39, 38, 1, 0}, /* 40 */ { 3, s_11_40, 35, 2, 0}, /* 41 */ { 3, s_11_41, 35, 3, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 52, 14 }; static const symbol s_0[] = { 'a' }; static const symbol s_1[] = { 'e' }; static const symbol s_2[] = { 'e' }; static const symbol s_3[] = { 'a' }; static const symbol s_4[] = { 'a' }; static const symbol s_5[] = { 'a' }; static const symbol s_6[] = { 'e' }; static const symbol s_7[] = { 'a' }; static const symbol s_8[] = { 'e' }; static const symbol s_9[] = { 'e' }; static const symbol s_10[] = { 'a' }; static const symbol s_11[] = { 'e' }; static const symbol s_12[] = { 'a' }; static const symbol s_13[] = { 'e' }; static const symbol s_14[] = { 'a' }; static const symbol s_15[] = { 'e' }; static const symbol s_16[] = { 'a' }; static const symbol s_17[] = { 'e' }; static const symbol s_18[] = { 'a' }; static const symbol s_19[] = { 'e' }; static const symbol s_20[] = { 'a' }; static const symbol s_21[] = { 'e' }; static const symbol s_22[] = { 'a' }; static const symbol s_23[] = { 'e' }; static const symbol s_24[] = { 'a' }; static const symbol s_25[] = { 'e' }; static const symbol s_26[] = { 'a' }; static const symbol s_27[] = { 'e' }; static const symbol s_28[] = { 'a' }; static const symbol s_29[] = { 'e' }; static const symbol s_30[] = { 'a' }; static const symbol s_31[] = { 'e' }; static const symbol s_32[] = { 'a' }; static const symbol s_33[] = { 'e' }; static const symbol s_34[] = { 'a' }; static const symbol s_35[] = { 'e' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; { int c1 = z->c; /* or, line 51 */ if (in_grouping(z, g_v, 97, 252, 0)) goto lab1; if (in_grouping(z, g_v, 97, 252, 1) < 0) goto lab1; /* goto */ /* non v, line 48 */ { int c2 = z->c; /* or, line 49 */ if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 3 || !((101187584 >> (z->p[z->c + 1] & 0x1f)) & 1)) goto lab3; if (!(find_among(z, a_0, 8))) goto lab3; /* among, line 49 */ goto lab2; lab3: z->c = c2; if (z->c >= z->l) goto lab1; z->c++; /* next, line 49 */ } lab2: z->I[0] = z->c; /* setmark p1, line 50 */ goto lab0; lab1: z->c = c1; if (out_grouping(z, g_v, 97, 252, 0)) return 0; { /* gopast */ /* grouping v, line 53 */ int ret = out_grouping(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 53 */ } lab0: return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_v_ending(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 61 */ if (z->c <= z->lb || (z->p[z->c - 1] != 225 && z->p[z->c - 1] != 233)) return 0; among_var = find_among_b(z, a_1, 2); /* substring, line 61 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 61 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 61 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 1, s_0); /* <-, line 62 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 63 */ if (ret < 0) return ret; } break; } return 1; } static int r_double(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 68 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((106790108 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_2, 23))) return 0; /* among, line 68 */ z->c = z->l - m_test; } return 1; } static int r_undouble(struct SN_env * z) { if (z->c <= z->lb) return 0; z->c--; /* next, line 73 */ z->ket = z->c; /* [, line 73 */ { int ret = z->c - 1; if (z->lb > ret || ret > z->l) return 0; z->c = ret; /* hop, line 73 */ } z->bra = z->c; /* ], line 73 */ { int ret = slice_del(z); /* delete, line 73 */ if (ret < 0) return ret; } return 1; } static int r_instrum(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 77 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] != 108) return 0; among_var = find_among_b(z, a_3, 2); /* substring, line 77 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 77 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 77 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = r_double(z); if (ret == 0) return 0; /* call double, line 78 */ if (ret < 0) return ret; } break; case 2: { int ret = r_double(z); if (ret == 0) return 0; /* call double, line 79 */ if (ret < 0) return ret; } break; } { int ret = slice_del(z); /* delete, line 81 */ if (ret < 0) return ret; } { int ret = r_undouble(z); if (ret == 0) return 0; /* call undouble, line 82 */ if (ret < 0) return ret; } return 1; } static int r_case(struct SN_env * z) { z->ket = z->c; /* [, line 87 */ if (!(find_among_b(z, a_4, 44))) return 0; /* substring, line 87 */ z->bra = z->c; /* ], line 87 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 87 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 111 */ if (ret < 0) return ret; } { int ret = r_v_ending(z); if (ret == 0) return 0; /* call v_ending, line 112 */ if (ret < 0) return ret; } return 1; } static int r_case_special(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 116 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 110 && z->p[z->c - 1] != 116)) return 0; among_var = find_among_b(z, a_5, 3); /* substring, line 116 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 116 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 116 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 1, s_2); /* <-, line 117 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_3); /* <-, line 118 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_4); /* <-, line 119 */ if (ret < 0) return ret; } break; } return 1; } static int r_case_other(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 124 */ if (z->c - 3 <= z->lb || z->p[z->c - 1] != 108) return 0; among_var = find_among_b(z, a_6, 6); /* substring, line 124 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 124 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 124 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 125 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 126 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_5); /* <-, line 127 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_6); /* <-, line 128 */ if (ret < 0) return ret; } break; } return 1; } static int r_factive(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 133 */ if (z->c <= z->lb || (z->p[z->c - 1] != 225 && z->p[z->c - 1] != 233)) return 0; among_var = find_among_b(z, a_7, 2); /* substring, line 133 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 133 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 133 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = r_double(z); if (ret == 0) return 0; /* call double, line 134 */ if (ret < 0) return ret; } break; case 2: { int ret = r_double(z); if (ret == 0) return 0; /* call double, line 135 */ if (ret < 0) return ret; } break; } { int ret = slice_del(z); /* delete, line 137 */ if (ret < 0) return ret; } { int ret = r_undouble(z); if (ret == 0) return 0; /* call undouble, line 138 */ if (ret < 0) return ret; } return 1; } static int r_plural(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 142 */ if (z->c <= z->lb || z->p[z->c - 1] != 107) return 0; among_var = find_among_b(z, a_8, 7); /* substring, line 142 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 142 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 142 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 1, s_7); /* <-, line 143 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_8); /* <-, line 144 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 145 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_del(z); /* delete, line 146 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_del(z); /* delete, line 147 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_del(z); /* delete, line 148 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_del(z); /* delete, line 149 */ if (ret < 0) return ret; } break; } return 1; } static int r_owned(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 154 */ if (z->c <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 233)) return 0; among_var = find_among_b(z, a_9, 12); /* substring, line 154 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 154 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 154 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 155 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_9); /* <-, line 156 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_10); /* <-, line 157 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_del(z); /* delete, line 158 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_11); /* <-, line 159 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 1, s_12); /* <-, line 160 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_del(z); /* delete, line 161 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 1, s_13); /* <-, line 162 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_del(z); /* delete, line 163 */ if (ret < 0) return ret; } break; } return 1; } static int r_sing_owner(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 168 */ among_var = find_among_b(z, a_10, 31); /* substring, line 168 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 168 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 168 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 169 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_14); /* <-, line 170 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_15); /* <-, line 171 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_del(z); /* delete, line 172 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_16); /* <-, line 173 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 1, s_17); /* <-, line 174 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_del(z); /* delete, line 175 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_del(z); /* delete, line 176 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_del(z); /* delete, line 177 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_from_s(z, 1, s_18); /* <-, line 178 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_from_s(z, 1, s_19); /* <-, line 179 */ if (ret < 0) return ret; } break; case 12: { int ret = slice_del(z); /* delete, line 180 */ if (ret < 0) return ret; } break; case 13: { int ret = slice_del(z); /* delete, line 181 */ if (ret < 0) return ret; } break; case 14: { int ret = slice_from_s(z, 1, s_20); /* <-, line 182 */ if (ret < 0) return ret; } break; case 15: { int ret = slice_from_s(z, 1, s_21); /* <-, line 183 */ if (ret < 0) return ret; } break; case 16: { int ret = slice_del(z); /* delete, line 184 */ if (ret < 0) return ret; } break; case 17: { int ret = slice_del(z); /* delete, line 185 */ if (ret < 0) return ret; } break; case 18: { int ret = slice_del(z); /* delete, line 186 */ if (ret < 0) return ret; } break; case 19: { int ret = slice_from_s(z, 1, s_22); /* <-, line 187 */ if (ret < 0) return ret; } break; case 20: { int ret = slice_from_s(z, 1, s_23); /* <-, line 188 */ if (ret < 0) return ret; } break; } return 1; } static int r_plur_owner(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 193 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((10768 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_11, 42); /* substring, line 193 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 193 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 193 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 194 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_24); /* <-, line 195 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_25); /* <-, line 196 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_del(z); /* delete, line 197 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_del(z); /* delete, line 198 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_del(z); /* delete, line 199 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 1, s_26); /* <-, line 200 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 1, s_27); /* <-, line 201 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_del(z); /* delete, line 202 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_del(z); /* delete, line 203 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_del(z); /* delete, line 204 */ if (ret < 0) return ret; } break; case 12: { int ret = slice_from_s(z, 1, s_28); /* <-, line 205 */ if (ret < 0) return ret; } break; case 13: { int ret = slice_from_s(z, 1, s_29); /* <-, line 206 */ if (ret < 0) return ret; } break; case 14: { int ret = slice_del(z); /* delete, line 207 */ if (ret < 0) return ret; } break; case 15: { int ret = slice_del(z); /* delete, line 208 */ if (ret < 0) return ret; } break; case 16: { int ret = slice_del(z); /* delete, line 209 */ if (ret < 0) return ret; } break; case 17: { int ret = slice_del(z); /* delete, line 210 */ if (ret < 0) return ret; } break; case 18: { int ret = slice_from_s(z, 1, s_30); /* <-, line 211 */ if (ret < 0) return ret; } break; case 19: { int ret = slice_from_s(z, 1, s_31); /* <-, line 212 */ if (ret < 0) return ret; } break; case 20: { int ret = slice_del(z); /* delete, line 214 */ if (ret < 0) return ret; } break; case 21: { int ret = slice_del(z); /* delete, line 215 */ if (ret < 0) return ret; } break; case 22: { int ret = slice_from_s(z, 1, s_32); /* <-, line 216 */ if (ret < 0) return ret; } break; case 23: { int ret = slice_from_s(z, 1, s_33); /* <-, line 217 */ if (ret < 0) return ret; } break; case 24: { int ret = slice_del(z); /* delete, line 218 */ if (ret < 0) return ret; } break; case 25: { int ret = slice_del(z); /* delete, line 219 */ if (ret < 0) return ret; } break; case 26: { int ret = slice_del(z); /* delete, line 220 */ if (ret < 0) return ret; } break; case 27: { int ret = slice_from_s(z, 1, s_34); /* <-, line 221 */ if (ret < 0) return ret; } break; case 28: { int ret = slice_from_s(z, 1, s_35); /* <-, line 222 */ if (ret < 0) return ret; } break; case 29: { int ret = slice_del(z); /* delete, line 223 */ if (ret < 0) return ret; } break; } return 1; } extern int hungarian_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 229 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 229 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 230 */ { int m2 = z->l - z->c; (void)m2; /* do, line 231 */ { int ret = r_instrum(z); if (ret == 0) goto lab1; /* call instrum, line 231 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 232 */ { int ret = r_case(z); if (ret == 0) goto lab2; /* call case, line 232 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 233 */ { int ret = r_case_special(z); if (ret == 0) goto lab3; /* call case_special, line 233 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 234 */ { int ret = r_case_other(z); if (ret == 0) goto lab4; /* call case_other, line 234 */ if (ret < 0) return ret; } lab4: z->c = z->l - m5; } { int m6 = z->l - z->c; (void)m6; /* do, line 235 */ { int ret = r_factive(z); if (ret == 0) goto lab5; /* call factive, line 235 */ if (ret < 0) return ret; } lab5: z->c = z->l - m6; } { int m7 = z->l - z->c; (void)m7; /* do, line 236 */ { int ret = r_owned(z); if (ret == 0) goto lab6; /* call owned, line 236 */ if (ret < 0) return ret; } lab6: z->c = z->l - m7; } { int m8 = z->l - z->c; (void)m8; /* do, line 237 */ { int ret = r_sing_owner(z); if (ret == 0) goto lab7; /* call sing_owner, line 237 */ if (ret < 0) return ret; } lab7: z->c = z->l - m8; } { int m9 = z->l - z->c; (void)m9; /* do, line 238 */ { int ret = r_plur_owner(z); if (ret == 0) goto lab8; /* call plur_owner, line 238 */ if (ret < 0) return ret; } lab8: z->c = z->l - m9; } { int m10 = z->l - z->c; (void)m10; /* do, line 239 */ { int ret = r_plural(z); if (ret == 0) goto lab9; /* call plural, line 239 */ if (ret < 0) return ret; } lab9: z->c = z->l - m10; } z->c = z->lb; return 1; } extern struct SN_env * hungarian_ISO_8859_1_create_env(void) { return SN_create_env(0, 1, 0); } extern void hungarian_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h000066400000000000000000000005131456444476200317410ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* hungarian_ISO_8859_1_create_env(void); extern void hungarian_ISO_8859_1_close_env(struct SN_env* z); extern int hungarian_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_italian.c000066400000000000000000001161051456444476200314060ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int italian_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_vowel_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_attached_pronoun(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * italian_ISO_8859_1_create_env(void); extern void italian_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[2] = { 'q', 'u' }; static const symbol s_0_2[1] = { 0xE1 }; static const symbol s_0_3[1] = { 0xE9 }; static const symbol s_0_4[1] = { 0xED }; static const symbol s_0_5[1] = { 0xF3 }; static const symbol s_0_6[1] = { 0xFA }; static const struct among a_0[7] = { /* 0 */ { 0, 0, -1, 7, 0}, /* 1 */ { 2, s_0_1, 0, 6, 0}, /* 2 */ { 1, s_0_2, 0, 1, 0}, /* 3 */ { 1, s_0_3, 0, 2, 0}, /* 4 */ { 1, s_0_4, 0, 3, 0}, /* 5 */ { 1, s_0_5, 0, 4, 0}, /* 6 */ { 1, s_0_6, 0, 5, 0} }; static const symbol s_1_1[1] = { 'I' }; static const symbol s_1_2[1] = { 'U' }; static const struct among a_1[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 1, s_1_1, 0, 1, 0}, /* 2 */ { 1, s_1_2, 0, 2, 0} }; static const symbol s_2_0[2] = { 'l', 'a' }; static const symbol s_2_1[4] = { 'c', 'e', 'l', 'a' }; static const symbol s_2_2[6] = { 'g', 'l', 'i', 'e', 'l', 'a' }; static const symbol s_2_3[4] = { 'm', 'e', 'l', 'a' }; static const symbol s_2_4[4] = { 't', 'e', 'l', 'a' }; static const symbol s_2_5[4] = { 'v', 'e', 'l', 'a' }; static const symbol s_2_6[2] = { 'l', 'e' }; static const symbol s_2_7[4] = { 'c', 'e', 'l', 'e' }; static const symbol s_2_8[6] = { 'g', 'l', 'i', 'e', 'l', 'e' }; static const symbol s_2_9[4] = { 'm', 'e', 'l', 'e' }; static const symbol s_2_10[4] = { 't', 'e', 'l', 'e' }; static const symbol s_2_11[4] = { 'v', 'e', 'l', 'e' }; static const symbol s_2_12[2] = { 'n', 'e' }; static const symbol s_2_13[4] = { 'c', 'e', 'n', 'e' }; static const symbol s_2_14[6] = { 'g', 'l', 'i', 'e', 'n', 'e' }; static const symbol s_2_15[4] = { 'm', 'e', 'n', 'e' }; static const symbol s_2_16[4] = { 's', 'e', 'n', 'e' }; static const symbol s_2_17[4] = { 't', 'e', 'n', 'e' }; static const symbol s_2_18[4] = { 'v', 'e', 'n', 'e' }; static const symbol s_2_19[2] = { 'c', 'i' }; static const symbol s_2_20[2] = { 'l', 'i' }; static const symbol s_2_21[4] = { 'c', 'e', 'l', 'i' }; static const symbol s_2_22[6] = { 'g', 'l', 'i', 'e', 'l', 'i' }; static const symbol s_2_23[4] = { 'm', 'e', 'l', 'i' }; static const symbol s_2_24[4] = { 't', 'e', 'l', 'i' }; static const symbol s_2_25[4] = { 'v', 'e', 'l', 'i' }; static const symbol s_2_26[3] = { 'g', 'l', 'i' }; static const symbol s_2_27[2] = { 'm', 'i' }; static const symbol s_2_28[2] = { 's', 'i' }; static const symbol s_2_29[2] = { 't', 'i' }; static const symbol s_2_30[2] = { 'v', 'i' }; static const symbol s_2_31[2] = { 'l', 'o' }; static const symbol s_2_32[4] = { 'c', 'e', 'l', 'o' }; static const symbol s_2_33[6] = { 'g', 'l', 'i', 'e', 'l', 'o' }; static const symbol s_2_34[4] = { 'm', 'e', 'l', 'o' }; static const symbol s_2_35[4] = { 't', 'e', 'l', 'o' }; static const symbol s_2_36[4] = { 'v', 'e', 'l', 'o' }; static const struct among a_2[37] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 4, s_2_1, 0, -1, 0}, /* 2 */ { 6, s_2_2, 0, -1, 0}, /* 3 */ { 4, s_2_3, 0, -1, 0}, /* 4 */ { 4, s_2_4, 0, -1, 0}, /* 5 */ { 4, s_2_5, 0, -1, 0}, /* 6 */ { 2, s_2_6, -1, -1, 0}, /* 7 */ { 4, s_2_7, 6, -1, 0}, /* 8 */ { 6, s_2_8, 6, -1, 0}, /* 9 */ { 4, s_2_9, 6, -1, 0}, /* 10 */ { 4, s_2_10, 6, -1, 0}, /* 11 */ { 4, s_2_11, 6, -1, 0}, /* 12 */ { 2, s_2_12, -1, -1, 0}, /* 13 */ { 4, s_2_13, 12, -1, 0}, /* 14 */ { 6, s_2_14, 12, -1, 0}, /* 15 */ { 4, s_2_15, 12, -1, 0}, /* 16 */ { 4, s_2_16, 12, -1, 0}, /* 17 */ { 4, s_2_17, 12, -1, 0}, /* 18 */ { 4, s_2_18, 12, -1, 0}, /* 19 */ { 2, s_2_19, -1, -1, 0}, /* 20 */ { 2, s_2_20, -1, -1, 0}, /* 21 */ { 4, s_2_21, 20, -1, 0}, /* 22 */ { 6, s_2_22, 20, -1, 0}, /* 23 */ { 4, s_2_23, 20, -1, 0}, /* 24 */ { 4, s_2_24, 20, -1, 0}, /* 25 */ { 4, s_2_25, 20, -1, 0}, /* 26 */ { 3, s_2_26, 20, -1, 0}, /* 27 */ { 2, s_2_27, -1, -1, 0}, /* 28 */ { 2, s_2_28, -1, -1, 0}, /* 29 */ { 2, s_2_29, -1, -1, 0}, /* 30 */ { 2, s_2_30, -1, -1, 0}, /* 31 */ { 2, s_2_31, -1, -1, 0}, /* 32 */ { 4, s_2_32, 31, -1, 0}, /* 33 */ { 6, s_2_33, 31, -1, 0}, /* 34 */ { 4, s_2_34, 31, -1, 0}, /* 35 */ { 4, s_2_35, 31, -1, 0}, /* 36 */ { 4, s_2_36, 31, -1, 0} }; static const symbol s_3_0[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_3_1[4] = { 'e', 'n', 'd', 'o' }; static const symbol s_3_2[2] = { 'a', 'r' }; static const symbol s_3_3[2] = { 'e', 'r' }; static const symbol s_3_4[2] = { 'i', 'r' }; static const struct among a_3[5] = { /* 0 */ { 4, s_3_0, -1, 1, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0}, /* 2 */ { 2, s_3_2, -1, 2, 0}, /* 3 */ { 2, s_3_3, -1, 2, 0}, /* 4 */ { 2, s_3_4, -1, 2, 0} }; static const symbol s_4_0[2] = { 'i', 'c' }; static const symbol s_4_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_4_2[2] = { 'o', 's' }; static const symbol s_4_3[2] = { 'i', 'v' }; static const struct among a_4[4] = { /* 0 */ { 2, s_4_0, -1, -1, 0}, /* 1 */ { 4, s_4_1, -1, -1, 0}, /* 2 */ { 2, s_4_2, -1, -1, 0}, /* 3 */ { 2, s_4_3, -1, 1, 0} }; static const symbol s_5_0[2] = { 'i', 'c' }; static const symbol s_5_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_5_2[2] = { 'i', 'v' }; static const struct among a_5[3] = { /* 0 */ { 2, s_5_0, -1, 1, 0}, /* 1 */ { 4, s_5_1, -1, 1, 0}, /* 2 */ { 2, s_5_2, -1, 1, 0} }; static const symbol s_6_0[3] = { 'i', 'c', 'a' }; static const symbol s_6_1[5] = { 'l', 'o', 'g', 'i', 'a' }; static const symbol s_6_2[3] = { 'o', 's', 'a' }; static const symbol s_6_3[4] = { 'i', 's', 't', 'a' }; static const symbol s_6_4[3] = { 'i', 'v', 'a' }; static const symbol s_6_5[4] = { 'a', 'n', 'z', 'a' }; static const symbol s_6_6[4] = { 'e', 'n', 'z', 'a' }; static const symbol s_6_7[3] = { 'i', 'c', 'e' }; static const symbol s_6_8[6] = { 'a', 't', 'r', 'i', 'c', 'e' }; static const symbol s_6_9[4] = { 'i', 'c', 'h', 'e' }; static const symbol s_6_10[5] = { 'l', 'o', 'g', 'i', 'e' }; static const symbol s_6_11[5] = { 'a', 'b', 'i', 'l', 'e' }; static const symbol s_6_12[5] = { 'i', 'b', 'i', 'l', 'e' }; static const symbol s_6_13[6] = { 'u', 's', 'i', 'o', 'n', 'e' }; static const symbol s_6_14[6] = { 'a', 'z', 'i', 'o', 'n', 'e' }; static const symbol s_6_15[6] = { 'u', 'z', 'i', 'o', 'n', 'e' }; static const symbol s_6_16[5] = { 'a', 't', 'o', 'r', 'e' }; static const symbol s_6_17[3] = { 'o', 's', 'e' }; static const symbol s_6_18[4] = { 'a', 'n', 't', 'e' }; static const symbol s_6_19[5] = { 'm', 'e', 'n', 't', 'e' }; static const symbol s_6_20[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; static const symbol s_6_21[4] = { 'i', 's', 't', 'e' }; static const symbol s_6_22[3] = { 'i', 'v', 'e' }; static const symbol s_6_23[4] = { 'a', 'n', 'z', 'e' }; static const symbol s_6_24[4] = { 'e', 'n', 'z', 'e' }; static const symbol s_6_25[3] = { 'i', 'c', 'i' }; static const symbol s_6_26[6] = { 'a', 't', 'r', 'i', 'c', 'i' }; static const symbol s_6_27[4] = { 'i', 'c', 'h', 'i' }; static const symbol s_6_28[5] = { 'a', 'b', 'i', 'l', 'i' }; static const symbol s_6_29[5] = { 'i', 'b', 'i', 'l', 'i' }; static const symbol s_6_30[4] = { 'i', 's', 'm', 'i' }; static const symbol s_6_31[6] = { 'u', 's', 'i', 'o', 'n', 'i' }; static const symbol s_6_32[6] = { 'a', 'z', 'i', 'o', 'n', 'i' }; static const symbol s_6_33[6] = { 'u', 'z', 'i', 'o', 'n', 'i' }; static const symbol s_6_34[5] = { 'a', 't', 'o', 'r', 'i' }; static const symbol s_6_35[3] = { 'o', 's', 'i' }; static const symbol s_6_36[4] = { 'a', 'n', 't', 'i' }; static const symbol s_6_37[6] = { 'a', 'm', 'e', 'n', 't', 'i' }; static const symbol s_6_38[6] = { 'i', 'm', 'e', 'n', 't', 'i' }; static const symbol s_6_39[4] = { 'i', 's', 't', 'i' }; static const symbol s_6_40[3] = { 'i', 'v', 'i' }; static const symbol s_6_41[3] = { 'i', 'c', 'o' }; static const symbol s_6_42[4] = { 'i', 's', 'm', 'o' }; static const symbol s_6_43[3] = { 'o', 's', 'o' }; static const symbol s_6_44[6] = { 'a', 'm', 'e', 'n', 't', 'o' }; static const symbol s_6_45[6] = { 'i', 'm', 'e', 'n', 't', 'o' }; static const symbol s_6_46[3] = { 'i', 'v', 'o' }; static const symbol s_6_47[3] = { 'i', 't', 0xE0 }; static const symbol s_6_48[4] = { 'i', 's', 't', 0xE0 }; static const symbol s_6_49[4] = { 'i', 's', 't', 0xE8 }; static const symbol s_6_50[4] = { 'i', 's', 't', 0xEC }; static const struct among a_6[51] = { /* 0 */ { 3, s_6_0, -1, 1, 0}, /* 1 */ { 5, s_6_1, -1, 3, 0}, /* 2 */ { 3, s_6_2, -1, 1, 0}, /* 3 */ { 4, s_6_3, -1, 1, 0}, /* 4 */ { 3, s_6_4, -1, 9, 0}, /* 5 */ { 4, s_6_5, -1, 1, 0}, /* 6 */ { 4, s_6_6, -1, 5, 0}, /* 7 */ { 3, s_6_7, -1, 1, 0}, /* 8 */ { 6, s_6_8, 7, 1, 0}, /* 9 */ { 4, s_6_9, -1, 1, 0}, /* 10 */ { 5, s_6_10, -1, 3, 0}, /* 11 */ { 5, s_6_11, -1, 1, 0}, /* 12 */ { 5, s_6_12, -1, 1, 0}, /* 13 */ { 6, s_6_13, -1, 4, 0}, /* 14 */ { 6, s_6_14, -1, 2, 0}, /* 15 */ { 6, s_6_15, -1, 4, 0}, /* 16 */ { 5, s_6_16, -1, 2, 0}, /* 17 */ { 3, s_6_17, -1, 1, 0}, /* 18 */ { 4, s_6_18, -1, 1, 0}, /* 19 */ { 5, s_6_19, -1, 1, 0}, /* 20 */ { 6, s_6_20, 19, 7, 0}, /* 21 */ { 4, s_6_21, -1, 1, 0}, /* 22 */ { 3, s_6_22, -1, 9, 0}, /* 23 */ { 4, s_6_23, -1, 1, 0}, /* 24 */ { 4, s_6_24, -1, 5, 0}, /* 25 */ { 3, s_6_25, -1, 1, 0}, /* 26 */ { 6, s_6_26, 25, 1, 0}, /* 27 */ { 4, s_6_27, -1, 1, 0}, /* 28 */ { 5, s_6_28, -1, 1, 0}, /* 29 */ { 5, s_6_29, -1, 1, 0}, /* 30 */ { 4, s_6_30, -1, 1, 0}, /* 31 */ { 6, s_6_31, -1, 4, 0}, /* 32 */ { 6, s_6_32, -1, 2, 0}, /* 33 */ { 6, s_6_33, -1, 4, 0}, /* 34 */ { 5, s_6_34, -1, 2, 0}, /* 35 */ { 3, s_6_35, -1, 1, 0}, /* 36 */ { 4, s_6_36, -1, 1, 0}, /* 37 */ { 6, s_6_37, -1, 6, 0}, /* 38 */ { 6, s_6_38, -1, 6, 0}, /* 39 */ { 4, s_6_39, -1, 1, 0}, /* 40 */ { 3, s_6_40, -1, 9, 0}, /* 41 */ { 3, s_6_41, -1, 1, 0}, /* 42 */ { 4, s_6_42, -1, 1, 0}, /* 43 */ { 3, s_6_43, -1, 1, 0}, /* 44 */ { 6, s_6_44, -1, 6, 0}, /* 45 */ { 6, s_6_45, -1, 6, 0}, /* 46 */ { 3, s_6_46, -1, 9, 0}, /* 47 */ { 3, s_6_47, -1, 8, 0}, /* 48 */ { 4, s_6_48, -1, 1, 0}, /* 49 */ { 4, s_6_49, -1, 1, 0}, /* 50 */ { 4, s_6_50, -1, 1, 0} }; static const symbol s_7_0[4] = { 'i', 's', 'c', 'a' }; static const symbol s_7_1[4] = { 'e', 'n', 'd', 'a' }; static const symbol s_7_2[3] = { 'a', 't', 'a' }; static const symbol s_7_3[3] = { 'i', 't', 'a' }; static const symbol s_7_4[3] = { 'u', 't', 'a' }; static const symbol s_7_5[3] = { 'a', 'v', 'a' }; static const symbol s_7_6[3] = { 'e', 'v', 'a' }; static const symbol s_7_7[3] = { 'i', 'v', 'a' }; static const symbol s_7_8[6] = { 'e', 'r', 'e', 'b', 'b', 'e' }; static const symbol s_7_9[6] = { 'i', 'r', 'e', 'b', 'b', 'e' }; static const symbol s_7_10[4] = { 'i', 's', 'c', 'e' }; static const symbol s_7_11[4] = { 'e', 'n', 'd', 'e' }; static const symbol s_7_12[3] = { 'a', 'r', 'e' }; static const symbol s_7_13[3] = { 'e', 'r', 'e' }; static const symbol s_7_14[3] = { 'i', 'r', 'e' }; static const symbol s_7_15[4] = { 'a', 's', 's', 'e' }; static const symbol s_7_16[3] = { 'a', 't', 'e' }; static const symbol s_7_17[5] = { 'a', 'v', 'a', 't', 'e' }; static const symbol s_7_18[5] = { 'e', 'v', 'a', 't', 'e' }; static const symbol s_7_19[5] = { 'i', 'v', 'a', 't', 'e' }; static const symbol s_7_20[3] = { 'e', 't', 'e' }; static const symbol s_7_21[5] = { 'e', 'r', 'e', 't', 'e' }; static const symbol s_7_22[5] = { 'i', 'r', 'e', 't', 'e' }; static const symbol s_7_23[3] = { 'i', 't', 'e' }; static const symbol s_7_24[6] = { 'e', 'r', 'e', 's', 't', 'e' }; static const symbol s_7_25[6] = { 'i', 'r', 'e', 's', 't', 'e' }; static const symbol s_7_26[3] = { 'u', 't', 'e' }; static const symbol s_7_27[4] = { 'e', 'r', 'a', 'i' }; static const symbol s_7_28[4] = { 'i', 'r', 'a', 'i' }; static const symbol s_7_29[4] = { 'i', 's', 'c', 'i' }; static const symbol s_7_30[4] = { 'e', 'n', 'd', 'i' }; static const symbol s_7_31[4] = { 'e', 'r', 'e', 'i' }; static const symbol s_7_32[4] = { 'i', 'r', 'e', 'i' }; static const symbol s_7_33[4] = { 'a', 's', 's', 'i' }; static const symbol s_7_34[3] = { 'a', 't', 'i' }; static const symbol s_7_35[3] = { 'i', 't', 'i' }; static const symbol s_7_36[6] = { 'e', 'r', 'e', 's', 't', 'i' }; static const symbol s_7_37[6] = { 'i', 'r', 'e', 's', 't', 'i' }; static const symbol s_7_38[3] = { 'u', 't', 'i' }; static const symbol s_7_39[3] = { 'a', 'v', 'i' }; static const symbol s_7_40[3] = { 'e', 'v', 'i' }; static const symbol s_7_41[3] = { 'i', 'v', 'i' }; static const symbol s_7_42[4] = { 'i', 's', 'c', 'o' }; static const symbol s_7_43[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_7_44[4] = { 'e', 'n', 'd', 'o' }; static const symbol s_7_45[4] = { 'Y', 'a', 'm', 'o' }; static const symbol s_7_46[4] = { 'i', 'a', 'm', 'o' }; static const symbol s_7_47[5] = { 'a', 'v', 'a', 'm', 'o' }; static const symbol s_7_48[5] = { 'e', 'v', 'a', 'm', 'o' }; static const symbol s_7_49[5] = { 'i', 'v', 'a', 'm', 'o' }; static const symbol s_7_50[5] = { 'e', 'r', 'e', 'm', 'o' }; static const symbol s_7_51[5] = { 'i', 'r', 'e', 'm', 'o' }; static const symbol s_7_52[6] = { 'a', 's', 's', 'i', 'm', 'o' }; static const symbol s_7_53[4] = { 'a', 'm', 'm', 'o' }; static const symbol s_7_54[4] = { 'e', 'm', 'm', 'o' }; static const symbol s_7_55[6] = { 'e', 'r', 'e', 'm', 'm', 'o' }; static const symbol s_7_56[6] = { 'i', 'r', 'e', 'm', 'm', 'o' }; static const symbol s_7_57[4] = { 'i', 'm', 'm', 'o' }; static const symbol s_7_58[3] = { 'a', 'n', 'o' }; static const symbol s_7_59[6] = { 'i', 's', 'c', 'a', 'n', 'o' }; static const symbol s_7_60[5] = { 'a', 'v', 'a', 'n', 'o' }; static const symbol s_7_61[5] = { 'e', 'v', 'a', 'n', 'o' }; static const symbol s_7_62[5] = { 'i', 'v', 'a', 'n', 'o' }; static const symbol s_7_63[6] = { 'e', 'r', 'a', 'n', 'n', 'o' }; static const symbol s_7_64[6] = { 'i', 'r', 'a', 'n', 'n', 'o' }; static const symbol s_7_65[3] = { 'o', 'n', 'o' }; static const symbol s_7_66[6] = { 'i', 's', 'c', 'o', 'n', 'o' }; static const symbol s_7_67[5] = { 'a', 'r', 'o', 'n', 'o' }; static const symbol s_7_68[5] = { 'e', 'r', 'o', 'n', 'o' }; static const symbol s_7_69[5] = { 'i', 'r', 'o', 'n', 'o' }; static const symbol s_7_70[8] = { 'e', 'r', 'e', 'b', 'b', 'e', 'r', 'o' }; static const symbol s_7_71[8] = { 'i', 'r', 'e', 'b', 'b', 'e', 'r', 'o' }; static const symbol s_7_72[6] = { 'a', 's', 's', 'e', 'r', 'o' }; static const symbol s_7_73[6] = { 'e', 's', 's', 'e', 'r', 'o' }; static const symbol s_7_74[6] = { 'i', 's', 's', 'e', 'r', 'o' }; static const symbol s_7_75[3] = { 'a', 't', 'o' }; static const symbol s_7_76[3] = { 'i', 't', 'o' }; static const symbol s_7_77[3] = { 'u', 't', 'o' }; static const symbol s_7_78[3] = { 'a', 'v', 'o' }; static const symbol s_7_79[3] = { 'e', 'v', 'o' }; static const symbol s_7_80[3] = { 'i', 'v', 'o' }; static const symbol s_7_81[2] = { 'a', 'r' }; static const symbol s_7_82[2] = { 'i', 'r' }; static const symbol s_7_83[3] = { 'e', 'r', 0xE0 }; static const symbol s_7_84[3] = { 'i', 'r', 0xE0 }; static const symbol s_7_85[3] = { 'e', 'r', 0xF2 }; static const symbol s_7_86[3] = { 'i', 'r', 0xF2 }; static const struct among a_7[87] = { /* 0 */ { 4, s_7_0, -1, 1, 0}, /* 1 */ { 4, s_7_1, -1, 1, 0}, /* 2 */ { 3, s_7_2, -1, 1, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0}, /* 4 */ { 3, s_7_4, -1, 1, 0}, /* 5 */ { 3, s_7_5, -1, 1, 0}, /* 6 */ { 3, s_7_6, -1, 1, 0}, /* 7 */ { 3, s_7_7, -1, 1, 0}, /* 8 */ { 6, s_7_8, -1, 1, 0}, /* 9 */ { 6, s_7_9, -1, 1, 0}, /* 10 */ { 4, s_7_10, -1, 1, 0}, /* 11 */ { 4, s_7_11, -1, 1, 0}, /* 12 */ { 3, s_7_12, -1, 1, 0}, /* 13 */ { 3, s_7_13, -1, 1, 0}, /* 14 */ { 3, s_7_14, -1, 1, 0}, /* 15 */ { 4, s_7_15, -1, 1, 0}, /* 16 */ { 3, s_7_16, -1, 1, 0}, /* 17 */ { 5, s_7_17, 16, 1, 0}, /* 18 */ { 5, s_7_18, 16, 1, 0}, /* 19 */ { 5, s_7_19, 16, 1, 0}, /* 20 */ { 3, s_7_20, -1, 1, 0}, /* 21 */ { 5, s_7_21, 20, 1, 0}, /* 22 */ { 5, s_7_22, 20, 1, 0}, /* 23 */ { 3, s_7_23, -1, 1, 0}, /* 24 */ { 6, s_7_24, -1, 1, 0}, /* 25 */ { 6, s_7_25, -1, 1, 0}, /* 26 */ { 3, s_7_26, -1, 1, 0}, /* 27 */ { 4, s_7_27, -1, 1, 0}, /* 28 */ { 4, s_7_28, -1, 1, 0}, /* 29 */ { 4, s_7_29, -1, 1, 0}, /* 30 */ { 4, s_7_30, -1, 1, 0}, /* 31 */ { 4, s_7_31, -1, 1, 0}, /* 32 */ { 4, s_7_32, -1, 1, 0}, /* 33 */ { 4, s_7_33, -1, 1, 0}, /* 34 */ { 3, s_7_34, -1, 1, 0}, /* 35 */ { 3, s_7_35, -1, 1, 0}, /* 36 */ { 6, s_7_36, -1, 1, 0}, /* 37 */ { 6, s_7_37, -1, 1, 0}, /* 38 */ { 3, s_7_38, -1, 1, 0}, /* 39 */ { 3, s_7_39, -1, 1, 0}, /* 40 */ { 3, s_7_40, -1, 1, 0}, /* 41 */ { 3, s_7_41, -1, 1, 0}, /* 42 */ { 4, s_7_42, -1, 1, 0}, /* 43 */ { 4, s_7_43, -1, 1, 0}, /* 44 */ { 4, s_7_44, -1, 1, 0}, /* 45 */ { 4, s_7_45, -1, 1, 0}, /* 46 */ { 4, s_7_46, -1, 1, 0}, /* 47 */ { 5, s_7_47, -1, 1, 0}, /* 48 */ { 5, s_7_48, -1, 1, 0}, /* 49 */ { 5, s_7_49, -1, 1, 0}, /* 50 */ { 5, s_7_50, -1, 1, 0}, /* 51 */ { 5, s_7_51, -1, 1, 0}, /* 52 */ { 6, s_7_52, -1, 1, 0}, /* 53 */ { 4, s_7_53, -1, 1, 0}, /* 54 */ { 4, s_7_54, -1, 1, 0}, /* 55 */ { 6, s_7_55, 54, 1, 0}, /* 56 */ { 6, s_7_56, 54, 1, 0}, /* 57 */ { 4, s_7_57, -1, 1, 0}, /* 58 */ { 3, s_7_58, -1, 1, 0}, /* 59 */ { 6, s_7_59, 58, 1, 0}, /* 60 */ { 5, s_7_60, 58, 1, 0}, /* 61 */ { 5, s_7_61, 58, 1, 0}, /* 62 */ { 5, s_7_62, 58, 1, 0}, /* 63 */ { 6, s_7_63, -1, 1, 0}, /* 64 */ { 6, s_7_64, -1, 1, 0}, /* 65 */ { 3, s_7_65, -1, 1, 0}, /* 66 */ { 6, s_7_66, 65, 1, 0}, /* 67 */ { 5, s_7_67, 65, 1, 0}, /* 68 */ { 5, s_7_68, 65, 1, 0}, /* 69 */ { 5, s_7_69, 65, 1, 0}, /* 70 */ { 8, s_7_70, -1, 1, 0}, /* 71 */ { 8, s_7_71, -1, 1, 0}, /* 72 */ { 6, s_7_72, -1, 1, 0}, /* 73 */ { 6, s_7_73, -1, 1, 0}, /* 74 */ { 6, s_7_74, -1, 1, 0}, /* 75 */ { 3, s_7_75, -1, 1, 0}, /* 76 */ { 3, s_7_76, -1, 1, 0}, /* 77 */ { 3, s_7_77, -1, 1, 0}, /* 78 */ { 3, s_7_78, -1, 1, 0}, /* 79 */ { 3, s_7_79, -1, 1, 0}, /* 80 */ { 3, s_7_80, -1, 1, 0}, /* 81 */ { 2, s_7_81, -1, 1, 0}, /* 82 */ { 2, s_7_82, -1, 1, 0}, /* 83 */ { 3, s_7_83, -1, 1, 0}, /* 84 */ { 3, s_7_84, -1, 1, 0}, /* 85 */ { 3, s_7_85, -1, 1, 0}, /* 86 */ { 3, s_7_86, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2, 1 }; static const unsigned char g_AEIO[] = { 17, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2 }; static const unsigned char g_CG[] = { 17 }; static const symbol s_0[] = { 0xE0 }; static const symbol s_1[] = { 0xE8 }; static const symbol s_2[] = { 0xEC }; static const symbol s_3[] = { 0xF2 }; static const symbol s_4[] = { 0xF9 }; static const symbol s_5[] = { 'q', 'U' }; static const symbol s_6[] = { 'u' }; static const symbol s_7[] = { 'U' }; static const symbol s_8[] = { 'i' }; static const symbol s_9[] = { 'I' }; static const symbol s_10[] = { 'i' }; static const symbol s_11[] = { 'u' }; static const symbol s_12[] = { 'e' }; static const symbol s_13[] = { 'i', 'c' }; static const symbol s_14[] = { 'l', 'o', 'g' }; static const symbol s_15[] = { 'u' }; static const symbol s_16[] = { 'e', 'n', 't', 'e' }; static const symbol s_17[] = { 'a', 't' }; static const symbol s_18[] = { 'a', 't' }; static const symbol s_19[] = { 'i', 'c' }; static const symbol s_20[] = { 'i' }; static const symbol s_21[] = { 'h' }; static int r_prelude(struct SN_env * z) { int among_var; { int c_test = z->c; /* test, line 35 */ while(1) { /* repeat, line 35 */ int c1 = z->c; z->bra = z->c; /* [, line 36 */ among_var = find_among(z, a_0, 7); /* substring, line 36 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 36 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_0); /* <-, line 37 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 38 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_2); /* <-, line 39 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_3); /* <-, line 40 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_4); /* <-, line 41 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 2, s_5); /* <-, line 42 */ if (ret < 0) return ret; } break; case 7: if (z->c >= z->l) goto lab0; z->c++; /* next, line 43 */ break; } continue; lab0: z->c = c1; break; } z->c = c_test; } while(1) { /* repeat, line 46 */ int c2 = z->c; while(1) { /* goto, line 46 */ int c3 = z->c; if (in_grouping(z, g_v, 97, 249, 0)) goto lab2; z->bra = z->c; /* [, line 47 */ { int c4 = z->c; /* or, line 47 */ if (!(eq_s(z, 1, s_6))) goto lab4; z->ket = z->c; /* ], line 47 */ if (in_grouping(z, g_v, 97, 249, 0)) goto lab4; { int ret = slice_from_s(z, 1, s_7); /* <-, line 47 */ if (ret < 0) return ret; } goto lab3; lab4: z->c = c4; if (!(eq_s(z, 1, s_8))) goto lab2; z->ket = z->c; /* ], line 48 */ if (in_grouping(z, g_v, 97, 249, 0)) goto lab2; { int ret = slice_from_s(z, 1, s_9); /* <-, line 48 */ if (ret < 0) return ret; } } lab3: z->c = c3; break; lab2: z->c = c3; if (z->c >= z->l) goto lab1; z->c++; /* goto, line 46 */ } continue; lab1: z->c = c2; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 58 */ { int c2 = z->c; /* or, line 60 */ if (in_grouping(z, g_v, 97, 249, 0)) goto lab2; { int c3 = z->c; /* or, line 59 */ if (out_grouping(z, g_v, 97, 249, 0)) goto lab4; { /* gopast */ /* grouping v, line 59 */ int ret = out_grouping(z, g_v, 97, 249, 1); if (ret < 0) goto lab4; z->c += ret; } goto lab3; lab4: z->c = c3; if (in_grouping(z, g_v, 97, 249, 0)) goto lab2; { /* gopast */ /* non v, line 59 */ int ret = in_grouping(z, g_v, 97, 249, 1); if (ret < 0) goto lab2; z->c += ret; } } lab3: goto lab1; lab2: z->c = c2; if (out_grouping(z, g_v, 97, 249, 0)) goto lab0; { int c4 = z->c; /* or, line 61 */ if (out_grouping(z, g_v, 97, 249, 0)) goto lab6; { /* gopast */ /* grouping v, line 61 */ int ret = out_grouping(z, g_v, 97, 249, 1); if (ret < 0) goto lab6; z->c += ret; } goto lab5; lab6: z->c = c4; if (in_grouping(z, g_v, 97, 249, 0)) goto lab0; if (z->c >= z->l) goto lab0; z->c++; /* next, line 61 */ } lab5: ; } lab1: z->I[0] = z->c; /* setmark pV, line 62 */ lab0: z->c = c1; } { int c5 = z->c; /* do, line 64 */ { /* gopast */ /* grouping v, line 65 */ int ret = out_grouping(z, g_v, 97, 249, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 65 */ int ret = in_grouping(z, g_v, 97, 249, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 65 */ { /* gopast */ /* grouping v, line 66 */ int ret = out_grouping(z, g_v, 97, 249, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 66 */ int ret = in_grouping(z, g_v, 97, 249, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 66 */ lab7: z->c = c5; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 70 */ int c1 = z->c; z->bra = z->c; /* [, line 72 */ if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 85)) among_var = 3; else among_var = find_among(z, a_1, 3); /* substring, line 72 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 72 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_10); /* <-, line 73 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_11); /* <-, line 74 */ if (ret < 0) return ret; } break; case 3: if (z->c >= z->l) goto lab0; z->c++; /* next, line 75 */ break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_attached_pronoun(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 87 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((33314 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_2, 37))) return 0; /* substring, line 87 */ z->bra = z->c; /* ], line 87 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 111 && z->p[z->c - 1] != 114)) return 0; among_var = find_among_b(z, a_3, 5); /* among, line 97 */ if (!(among_var)) return 0; { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 97 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 98 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_12); /* <-, line 99 */ if (ret < 0) return ret; } break; } return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 104 */ among_var = find_among_b(z, a_6, 51); /* substring, line 104 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 104 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 111 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 111 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 113 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 113 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 114 */ z->ket = z->c; /* [, line 114 */ if (!(eq_s_b(z, 2, s_13))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 114 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 114 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 114 */ if (ret < 0) return ret; } lab0: ; } break; case 3: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 117 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_14); /* <-, line 117 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 119 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_15); /* <-, line 119 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 121 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 4, s_16); /* <-, line 121 */ if (ret < 0) return ret; } break; case 6: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 123 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 123 */ if (ret < 0) return ret; } break; case 7: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 125 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 125 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 126 */ z->ket = z->c; /* [, line 127 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4722696 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab1; } among_var = find_among_b(z, a_4, 4); /* substring, line 127 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 127 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 127 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 127 */ if (ret < 0) return ret; } switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab1; } case 1: z->ket = z->c; /* [, line 128 */ if (!(eq_s_b(z, 2, s_17))) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 128 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 128 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 128 */ if (ret < 0) return ret; } break; } lab1: ; } break; case 8: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 134 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 135 */ z->ket = z->c; /* [, line 136 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab2; } among_var = find_among_b(z, a_5, 3); /* substring, line 136 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } z->bra = z->c; /* ], line 136 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab2; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 137 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 137 */ if (ret < 0) return ret; } break; } lab2: ; } break; case 9: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 142 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 142 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 143 */ z->ket = z->c; /* [, line 143 */ if (!(eq_s_b(z, 2, s_18))) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 143 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 143 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 143 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 143 */ if (!(eq_s_b(z, 2, s_19))) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 143 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 143 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 143 */ if (ret < 0) return ret; } lab3: ; } break; } return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 148 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 148 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 149 */ among_var = find_among_b(z, a_7, 87); /* substring, line 149 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 149 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = slice_del(z); /* delete, line 163 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_vowel_suffix(struct SN_env * z) { { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 171 */ z->ket = z->c; /* [, line 172 */ if (in_grouping_b(z, g_AEIO, 97, 242, 0)) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 172 */ { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 172 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 172 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 173 */ if (!(eq_s_b(z, 1, s_20))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 173 */ { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 173 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 173 */ if (ret < 0) return ret; } lab0: ; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 175 */ z->ket = z->c; /* [, line 176 */ if (!(eq_s_b(z, 1, s_21))) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 176 */ if (in_grouping_b(z, g_CG, 99, 103, 0)) { z->c = z->l - m_keep; goto lab1; } { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call RV, line 176 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 176 */ if (ret < 0) return ret; } lab1: ; } return 1; } extern int italian_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 182 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 182 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 183 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 183 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 184 */ { int m3 = z->l - z->c; (void)m3; /* do, line 185 */ { int ret = r_attached_pronoun(z); if (ret == 0) goto lab2; /* call attached_pronoun, line 185 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 186 */ { int m5 = z->l - z->c; (void)m5; /* or, line 186 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab5; /* call standard_suffix, line 186 */ if (ret < 0) return ret; } goto lab4; lab5: z->c = z->l - m5; { int ret = r_verb_suffix(z); if (ret == 0) goto lab3; /* call verb_suffix, line 186 */ if (ret < 0) return ret; } } lab4: lab3: z->c = z->l - m4; } { int m6 = z->l - z->c; (void)m6; /* do, line 187 */ { int ret = r_vowel_suffix(z); if (ret == 0) goto lab6; /* call vowel_suffix, line 187 */ if (ret < 0) return ret; } lab6: z->c = z->l - m6; } z->c = z->lb; { int c7 = z->c; /* do, line 189 */ { int ret = r_postlude(z); if (ret == 0) goto lab7; /* call postlude, line 189 */ if (ret < 0) return ret; } lab7: z->c = c7; } return 1; } extern struct SN_env * italian_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); } extern void italian_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_italian.h000066400000000000000000000005051456444476200314070ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* italian_ISO_8859_1_create_env(void); extern void italian_ISO_8859_1_close_env(struct SN_env* z); extern int italian_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.c000066400000000000000000000234031456444476200317540ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int norwegian_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_other_suffix(struct SN_env * z); static int r_consonant_pair(struct SN_env * z); static int r_main_suffix(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * norwegian_ISO_8859_1_create_env(void); extern void norwegian_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[1] = { 'a' }; static const symbol s_0_1[1] = { 'e' }; static const symbol s_0_2[3] = { 'e', 'd', 'e' }; static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' }; static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' }; static const symbol s_0_5[3] = { 'a', 'n', 'e' }; static const symbol s_0_6[3] = { 'e', 'n', 'e' }; static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' }; static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' }; static const symbol s_0_9[2] = { 'e', 'n' }; static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' }; static const symbol s_0_11[2] = { 'a', 'r' }; static const symbol s_0_12[2] = { 'e', 'r' }; static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' }; static const symbol s_0_14[1] = { 's' }; static const symbol s_0_15[2] = { 'a', 's' }; static const symbol s_0_16[2] = { 'e', 's' }; static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' }; static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' }; static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' }; static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' }; static const symbol s_0_21[3] = { 'e', 'n', 's' }; static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' }; static const symbol s_0_23[3] = { 'e', 'r', 's' }; static const symbol s_0_24[3] = { 'e', 't', 's' }; static const symbol s_0_25[2] = { 'e', 't' }; static const symbol s_0_26[3] = { 'h', 'e', 't' }; static const symbol s_0_27[3] = { 'e', 'r', 't' }; static const symbol s_0_28[3] = { 'a', 's', 't' }; static const struct among a_0[29] = { /* 0 */ { 1, s_0_0, -1, 1, 0}, /* 1 */ { 1, s_0_1, -1, 1, 0}, /* 2 */ { 3, s_0_2, 1, 1, 0}, /* 3 */ { 4, s_0_3, 1, 1, 0}, /* 4 */ { 4, s_0_4, 1, 1, 0}, /* 5 */ { 3, s_0_5, 1, 1, 0}, /* 6 */ { 3, s_0_6, 1, 1, 0}, /* 7 */ { 6, s_0_7, 6, 1, 0}, /* 8 */ { 4, s_0_8, 1, 3, 0}, /* 9 */ { 2, s_0_9, -1, 1, 0}, /* 10 */ { 5, s_0_10, 9, 1, 0}, /* 11 */ { 2, s_0_11, -1, 1, 0}, /* 12 */ { 2, s_0_12, -1, 1, 0}, /* 13 */ { 5, s_0_13, 12, 1, 0}, /* 14 */ { 1, s_0_14, -1, 2, 0}, /* 15 */ { 2, s_0_15, 14, 1, 0}, /* 16 */ { 2, s_0_16, 14, 1, 0}, /* 17 */ { 4, s_0_17, 16, 1, 0}, /* 18 */ { 5, s_0_18, 16, 1, 0}, /* 19 */ { 4, s_0_19, 16, 1, 0}, /* 20 */ { 7, s_0_20, 19, 1, 0}, /* 21 */ { 3, s_0_21, 14, 1, 0}, /* 22 */ { 6, s_0_22, 21, 1, 0}, /* 23 */ { 3, s_0_23, 14, 1, 0}, /* 24 */ { 3, s_0_24, 14, 1, 0}, /* 25 */ { 2, s_0_25, -1, 1, 0}, /* 26 */ { 3, s_0_26, 25, 1, 0}, /* 27 */ { 3, s_0_27, -1, 3, 0}, /* 28 */ { 3, s_0_28, -1, 1, 0} }; static const symbol s_1_0[2] = { 'd', 't' }; static const symbol s_1_1[2] = { 'v', 't' }; static const struct among a_1[2] = { /* 0 */ { 2, s_1_0, -1, -1, 0}, /* 1 */ { 2, s_1_1, -1, -1, 0} }; static const symbol s_2_0[3] = { 'l', 'e', 'g' }; static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' }; static const symbol s_2_2[2] = { 'i', 'g' }; static const symbol s_2_3[3] = { 'e', 'i', 'g' }; static const symbol s_2_4[3] = { 'l', 'i', 'g' }; static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' }; static const symbol s_2_6[3] = { 'e', 'l', 's' }; static const symbol s_2_7[3] = { 'l', 'o', 'v' }; static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' }; static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' }; static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' }; static const struct among a_2[11] = { /* 0 */ { 3, s_2_0, -1, 1, 0}, /* 1 */ { 4, s_2_1, 0, 1, 0}, /* 2 */ { 2, s_2_2, -1, 1, 0}, /* 3 */ { 3, s_2_3, 2, 1, 0}, /* 4 */ { 3, s_2_4, 2, 1, 0}, /* 5 */ { 4, s_2_5, 4, 1, 0}, /* 6 */ { 3, s_2_6, -1, 1, 0}, /* 7 */ { 3, s_2_7, -1, 1, 0}, /* 8 */ { 4, s_2_8, 7, 1, 0}, /* 9 */ { 4, s_2_9, 7, 1, 0}, /* 10 */ { 7, s_2_10, 9, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; static const unsigned char g_s_ending[] = { 119, 125, 149, 1 }; static const symbol s_0[] = { 'k' }; static const symbol s_1[] = { 'e', 'r' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; { int c_test = z->c; /* test, line 30 */ { int ret = z->c + 3; if (0 > ret || ret > z->l) return 0; z->c = ret; /* hop, line 30 */ } z->I[1] = z->c; /* setmark x, line 30 */ z->c = c_test; } if (out_grouping(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 31 */ { /* gopast */ /* non v, line 31 */ int ret = in_grouping(z, g_v, 97, 248, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 31 */ /* try, line 32 */ if (!(z->I[0] < z->I[1])) goto lab0; z->I[0] = z->I[1]; lab0: return 1; } static int r_main_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 38 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 38 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 38 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851426 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_0, 29); /* substring, line 38 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 38 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 44 */ if (ret < 0) return ret; } break; case 2: { int m2 = z->l - z->c; (void)m2; /* or, line 46 */ if (in_grouping_b(z, g_s_ending, 98, 122, 0)) goto lab1; goto lab0; lab1: z->c = z->l - m2; if (!(eq_s_b(z, 1, s_0))) return 0; if (out_grouping_b(z, g_v, 97, 248, 0)) return 0; } lab0: { int ret = slice_del(z); /* delete, line 46 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 2, s_1); /* <-, line 48 */ if (ret < 0) return ret; } break; } return 1; } static int r_consonant_pair(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 53 */ { int mlimit; /* setlimit, line 54 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 54 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 54 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] != 116) { z->lb = mlimit; return 0; } if (!(find_among_b(z, a_1, 2))) { z->lb = mlimit; return 0; } /* substring, line 54 */ z->bra = z->c; /* ], line 54 */ z->lb = mlimit; } z->c = z->l - m_test; } if (z->c <= z->lb) return 0; z->c--; /* next, line 59 */ z->bra = z->c; /* ], line 59 */ { int ret = slice_del(z); /* delete, line 59 */ if (ret < 0) return ret; } return 1; } static int r_other_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 63 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 63 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 63 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718720 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_2, 11); /* substring, line 63 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 63 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 67 */ if (ret < 0) return ret; } break; } return 1; } extern int norwegian_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 74 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 74 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 75 */ { int m2 = z->l - z->c; (void)m2; /* do, line 76 */ { int ret = r_main_suffix(z); if (ret == 0) goto lab1; /* call main_suffix, line 76 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 77 */ { int ret = r_consonant_pair(z); if (ret == 0) goto lab2; /* call consonant_pair, line 77 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 78 */ { int ret = r_other_suffix(z); if (ret == 0) goto lab3; /* call other_suffix, line 78 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } z->c = z->lb; return 1; } extern struct SN_env * norwegian_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 0); } extern void norwegian_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h000066400000000000000000000005131456444476200317560ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* norwegian_ISO_8859_1_create_env(void); extern void norwegian_ISO_8859_1_close_env(struct SN_env* z); extern int norwegian_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_porter.c000066400000000000000000000605561456444476200313100ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int porter_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_Step_5b(struct SN_env * z); static int r_Step_5a(struct SN_env * z); static int r_Step_4(struct SN_env * z); static int r_Step_3(struct SN_env * z); static int r_Step_2(struct SN_env * z); static int r_Step_1c(struct SN_env * z); static int r_Step_1b(struct SN_env * z); static int r_Step_1a(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_shortv(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * porter_ISO_8859_1_create_env(void); extern void porter_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[1] = { 's' }; static const symbol s_0_1[3] = { 'i', 'e', 's' }; static const symbol s_0_2[4] = { 's', 's', 'e', 's' }; static const symbol s_0_3[2] = { 's', 's' }; static const struct among a_0[4] = { /* 0 */ { 1, s_0_0, -1, 3, 0}, /* 1 */ { 3, s_0_1, 0, 2, 0}, /* 2 */ { 4, s_0_2, 0, 1, 0}, /* 3 */ { 2, s_0_3, 0, -1, 0} }; static const symbol s_1_1[2] = { 'b', 'b' }; static const symbol s_1_2[2] = { 'd', 'd' }; static const symbol s_1_3[2] = { 'f', 'f' }; static const symbol s_1_4[2] = { 'g', 'g' }; static const symbol s_1_5[2] = { 'b', 'l' }; static const symbol s_1_6[2] = { 'm', 'm' }; static const symbol s_1_7[2] = { 'n', 'n' }; static const symbol s_1_8[2] = { 'p', 'p' }; static const symbol s_1_9[2] = { 'r', 'r' }; static const symbol s_1_10[2] = { 'a', 't' }; static const symbol s_1_11[2] = { 't', 't' }; static const symbol s_1_12[2] = { 'i', 'z' }; static const struct among a_1[13] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 2, s_1_1, 0, 2, 0}, /* 2 */ { 2, s_1_2, 0, 2, 0}, /* 3 */ { 2, s_1_3, 0, 2, 0}, /* 4 */ { 2, s_1_4, 0, 2, 0}, /* 5 */ { 2, s_1_5, 0, 1, 0}, /* 6 */ { 2, s_1_6, 0, 2, 0}, /* 7 */ { 2, s_1_7, 0, 2, 0}, /* 8 */ { 2, s_1_8, 0, 2, 0}, /* 9 */ { 2, s_1_9, 0, 2, 0}, /* 10 */ { 2, s_1_10, 0, 1, 0}, /* 11 */ { 2, s_1_11, 0, 2, 0}, /* 12 */ { 2, s_1_12, 0, 1, 0} }; static const symbol s_2_0[2] = { 'e', 'd' }; static const symbol s_2_1[3] = { 'e', 'e', 'd' }; static const symbol s_2_2[3] = { 'i', 'n', 'g' }; static const struct among a_2[3] = { /* 0 */ { 2, s_2_0, -1, 2, 0}, /* 1 */ { 3, s_2_1, 0, 1, 0}, /* 2 */ { 3, s_2_2, -1, 2, 0} }; static const symbol s_3_0[4] = { 'a', 'n', 'c', 'i' }; static const symbol s_3_1[4] = { 'e', 'n', 'c', 'i' }; static const symbol s_3_2[4] = { 'a', 'b', 'l', 'i' }; static const symbol s_3_3[3] = { 'e', 'l', 'i' }; static const symbol s_3_4[4] = { 'a', 'l', 'l', 'i' }; static const symbol s_3_5[5] = { 'o', 'u', 's', 'l', 'i' }; static const symbol s_3_6[5] = { 'e', 'n', 't', 'l', 'i' }; static const symbol s_3_7[5] = { 'a', 'l', 'i', 't', 'i' }; static const symbol s_3_8[6] = { 'b', 'i', 'l', 'i', 't', 'i' }; static const symbol s_3_9[5] = { 'i', 'v', 'i', 't', 'i' }; static const symbol s_3_10[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_3_11[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_3_12[5] = { 'a', 'l', 'i', 's', 'm' }; static const symbol s_3_13[5] = { 'a', 't', 'i', 'o', 'n' }; static const symbol s_3_14[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' }; static const symbol s_3_15[4] = { 'i', 'z', 'e', 'r' }; static const symbol s_3_16[4] = { 'a', 't', 'o', 'r' }; static const symbol s_3_17[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' }; static const symbol s_3_18[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' }; static const symbol s_3_19[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' }; static const struct among a_3[20] = { /* 0 */ { 4, s_3_0, -1, 3, 0}, /* 1 */ { 4, s_3_1, -1, 2, 0}, /* 2 */ { 4, s_3_2, -1, 4, 0}, /* 3 */ { 3, s_3_3, -1, 6, 0}, /* 4 */ { 4, s_3_4, -1, 9, 0}, /* 5 */ { 5, s_3_5, -1, 12, 0}, /* 6 */ { 5, s_3_6, -1, 5, 0}, /* 7 */ { 5, s_3_7, -1, 10, 0}, /* 8 */ { 6, s_3_8, -1, 14, 0}, /* 9 */ { 5, s_3_9, -1, 13, 0}, /* 10 */ { 6, s_3_10, -1, 1, 0}, /* 11 */ { 7, s_3_11, 10, 8, 0}, /* 12 */ { 5, s_3_12, -1, 10, 0}, /* 13 */ { 5, s_3_13, -1, 8, 0}, /* 14 */ { 7, s_3_14, 13, 7, 0}, /* 15 */ { 4, s_3_15, -1, 7, 0}, /* 16 */ { 4, s_3_16, -1, 8, 0}, /* 17 */ { 7, s_3_17, -1, 13, 0}, /* 18 */ { 7, s_3_18, -1, 11, 0}, /* 19 */ { 7, s_3_19, -1, 12, 0} }; static const symbol s_4_0[5] = { 'i', 'c', 'a', 't', 'e' }; static const symbol s_4_1[5] = { 'a', 't', 'i', 'v', 'e' }; static const symbol s_4_2[5] = { 'a', 'l', 'i', 'z', 'e' }; static const symbol s_4_3[5] = { 'i', 'c', 'i', 't', 'i' }; static const symbol s_4_4[4] = { 'i', 'c', 'a', 'l' }; static const symbol s_4_5[3] = { 'f', 'u', 'l' }; static const symbol s_4_6[4] = { 'n', 'e', 's', 's' }; static const struct among a_4[7] = { /* 0 */ { 5, s_4_0, -1, 2, 0}, /* 1 */ { 5, s_4_1, -1, 3, 0}, /* 2 */ { 5, s_4_2, -1, 1, 0}, /* 3 */ { 5, s_4_3, -1, 2, 0}, /* 4 */ { 4, s_4_4, -1, 2, 0}, /* 5 */ { 3, s_4_5, -1, 3, 0}, /* 6 */ { 4, s_4_6, -1, 3, 0} }; static const symbol s_5_0[2] = { 'i', 'c' }; static const symbol s_5_1[4] = { 'a', 'n', 'c', 'e' }; static const symbol s_5_2[4] = { 'e', 'n', 'c', 'e' }; static const symbol s_5_3[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_5_4[4] = { 'i', 'b', 'l', 'e' }; static const symbol s_5_5[3] = { 'a', 't', 'e' }; static const symbol s_5_6[3] = { 'i', 'v', 'e' }; static const symbol s_5_7[3] = { 'i', 'z', 'e' }; static const symbol s_5_8[3] = { 'i', 't', 'i' }; static const symbol s_5_9[2] = { 'a', 'l' }; static const symbol s_5_10[3] = { 'i', 's', 'm' }; static const symbol s_5_11[3] = { 'i', 'o', 'n' }; static const symbol s_5_12[2] = { 'e', 'r' }; static const symbol s_5_13[3] = { 'o', 'u', 's' }; static const symbol s_5_14[3] = { 'a', 'n', 't' }; static const symbol s_5_15[3] = { 'e', 'n', 't' }; static const symbol s_5_16[4] = { 'm', 'e', 'n', 't' }; static const symbol s_5_17[5] = { 'e', 'm', 'e', 'n', 't' }; static const symbol s_5_18[2] = { 'o', 'u' }; static const struct among a_5[19] = { /* 0 */ { 2, s_5_0, -1, 1, 0}, /* 1 */ { 4, s_5_1, -1, 1, 0}, /* 2 */ { 4, s_5_2, -1, 1, 0}, /* 3 */ { 4, s_5_3, -1, 1, 0}, /* 4 */ { 4, s_5_4, -1, 1, 0}, /* 5 */ { 3, s_5_5, -1, 1, 0}, /* 6 */ { 3, s_5_6, -1, 1, 0}, /* 7 */ { 3, s_5_7, -1, 1, 0}, /* 8 */ { 3, s_5_8, -1, 1, 0}, /* 9 */ { 2, s_5_9, -1, 1, 0}, /* 10 */ { 3, s_5_10, -1, 1, 0}, /* 11 */ { 3, s_5_11, -1, 2, 0}, /* 12 */ { 2, s_5_12, -1, 1, 0}, /* 13 */ { 3, s_5_13, -1, 1, 0}, /* 14 */ { 3, s_5_14, -1, 1, 0}, /* 15 */ { 3, s_5_15, -1, 1, 0}, /* 16 */ { 4, s_5_16, 15, 1, 0}, /* 17 */ { 5, s_5_17, 16, 1, 0}, /* 18 */ { 2, s_5_18, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1 }; static const unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 }; static const symbol s_0[] = { 's', 's' }; static const symbol s_1[] = { 'i' }; static const symbol s_2[] = { 'e', 'e' }; static const symbol s_3[] = { 'e' }; static const symbol s_4[] = { 'e' }; static const symbol s_5[] = { 'y' }; static const symbol s_6[] = { 'Y' }; static const symbol s_7[] = { 'i' }; static const symbol s_8[] = { 't', 'i', 'o', 'n' }; static const symbol s_9[] = { 'e', 'n', 'c', 'e' }; static const symbol s_10[] = { 'a', 'n', 'c', 'e' }; static const symbol s_11[] = { 'a', 'b', 'l', 'e' }; static const symbol s_12[] = { 'e', 'n', 't' }; static const symbol s_13[] = { 'e' }; static const symbol s_14[] = { 'i', 'z', 'e' }; static const symbol s_15[] = { 'a', 't', 'e' }; static const symbol s_16[] = { 'a', 'l' }; static const symbol s_17[] = { 'a', 'l' }; static const symbol s_18[] = { 'f', 'u', 'l' }; static const symbol s_19[] = { 'o', 'u', 's' }; static const symbol s_20[] = { 'i', 'v', 'e' }; static const symbol s_21[] = { 'b', 'l', 'e' }; static const symbol s_22[] = { 'a', 'l' }; static const symbol s_23[] = { 'i', 'c' }; static const symbol s_24[] = { 's' }; static const symbol s_25[] = { 't' }; static const symbol s_26[] = { 'e' }; static const symbol s_27[] = { 'l' }; static const symbol s_28[] = { 'l' }; static const symbol s_29[] = { 'y' }; static const symbol s_30[] = { 'Y' }; static const symbol s_31[] = { 'y' }; static const symbol s_32[] = { 'Y' }; static const symbol s_33[] = { 'Y' }; static const symbol s_34[] = { 'y' }; static int r_shortv(struct SN_env * z) { if (out_grouping_b(z, g_v_WXY, 89, 121, 0)) return 0; if (in_grouping_b(z, g_v, 97, 121, 0)) return 0; if (out_grouping_b(z, g_v, 97, 121, 0)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_Step_1a(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 25 */ if (z->c <= z->lb || z->p[z->c - 1] != 115) return 0; among_var = find_among_b(z, a_0, 4); /* substring, line 25 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 25 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 2, s_0); /* <-, line 26 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 27 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 29 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_1b(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 34 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 103)) return 0; among_var = find_among_b(z, a_2, 3); /* substring, line 34 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 34 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 35 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 2, s_2); /* <-, line 35 */ if (ret < 0) return ret; } break; case 2: { int m_test = z->l - z->c; /* test, line 38 */ { /* gopast */ /* grouping v, line 38 */ int ret = out_grouping_b(z, g_v, 97, 121, 1); if (ret < 0) return 0; z->c -= ret; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 38 */ if (ret < 0) return ret; } { int m_test = z->l - z->c; /* test, line 39 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68514004 >> (z->p[z->c - 1] & 0x1f)) & 1)) among_var = 3; else among_var = find_among_b(z, a_1, 13); /* substring, line 39 */ if (!(among_var)) return 0; z->c = z->l - m_test; } switch(among_var) { case 0: return 0; case 1: { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_3); /* <+, line 41 */ z->c = c_keep; if (ret < 0) return ret; } break; case 2: z->ket = z->c; /* [, line 44 */ if (z->c <= z->lb) return 0; z->c--; /* next, line 44 */ z->bra = z->c; /* ], line 44 */ { int ret = slice_del(z); /* delete, line 44 */ if (ret < 0) return ret; } break; case 3: if (z->c != z->I[0]) return 0; /* atmark, line 45 */ { int m_test = z->l - z->c; /* test, line 45 */ { int ret = r_shortv(z); if (ret == 0) return 0; /* call shortv, line 45 */ if (ret < 0) return ret; } z->c = z->l - m_test; } { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_4); /* <+, line 45 */ z->c = c_keep; if (ret < 0) return ret; } break; } break; } return 1; } static int r_Step_1c(struct SN_env * z) { z->ket = z->c; /* [, line 52 */ { int m1 = z->l - z->c; (void)m1; /* or, line 52 */ if (!(eq_s_b(z, 1, s_5))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_6))) return 0; } lab0: z->bra = z->c; /* ], line 52 */ { /* gopast */ /* grouping v, line 53 */ int ret = out_grouping_b(z, g_v, 97, 121, 1); if (ret < 0) return 0; z->c -= ret; } { int ret = slice_from_s(z, 1, s_7); /* <-, line 54 */ if (ret < 0) return ret; } return 1; } static int r_Step_2(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 58 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((815616 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_3, 20); /* substring, line 58 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 58 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 58 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 4, s_8); /* <-, line 59 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 4, s_9); /* <-, line 60 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 4, s_10); /* <-, line 61 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 4, s_11); /* <-, line 62 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 3, s_12); /* <-, line 63 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 1, s_13); /* <-, line 64 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 3, s_14); /* <-, line 66 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 3, s_15); /* <-, line 68 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_from_s(z, 2, s_16); /* <-, line 69 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_from_s(z, 2, s_17); /* <-, line 71 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_from_s(z, 3, s_18); /* <-, line 72 */ if (ret < 0) return ret; } break; case 12: { int ret = slice_from_s(z, 3, s_19); /* <-, line 74 */ if (ret < 0) return ret; } break; case 13: { int ret = slice_from_s(z, 3, s_20); /* <-, line 76 */ if (ret < 0) return ret; } break; case 14: { int ret = slice_from_s(z, 3, s_21); /* <-, line 77 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_3(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 82 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((528928 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_4, 7); /* substring, line 82 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 82 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 82 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 2, s_22); /* <-, line 83 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 2, s_23); /* <-, line 85 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 87 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_4(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 92 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((3961384 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_5, 19); /* substring, line 92 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 92 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 92 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 95 */ if (ret < 0) return ret; } break; case 2: { int m1 = z->l - z->c; (void)m1; /* or, line 96 */ if (!(eq_s_b(z, 1, s_24))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_25))) return 0; } lab0: { int ret = slice_del(z); /* delete, line 96 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_5a(struct SN_env * z) { z->ket = z->c; /* [, line 101 */ if (!(eq_s_b(z, 1, s_26))) return 0; z->bra = z->c; /* ], line 101 */ { int m1 = z->l - z->c; (void)m1; /* or, line 102 */ { int ret = r_R2(z); if (ret == 0) goto lab1; /* call R2, line 102 */ if (ret < 0) return ret; } goto lab0; lab1: z->c = z->l - m1; { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 102 */ if (ret < 0) return ret; } { int m2 = z->l - z->c; (void)m2; /* not, line 102 */ { int ret = r_shortv(z); if (ret == 0) goto lab2; /* call shortv, line 102 */ if (ret < 0) return ret; } return 0; lab2: z->c = z->l - m2; } } lab0: { int ret = slice_del(z); /* delete, line 103 */ if (ret < 0) return ret; } return 1; } static int r_Step_5b(struct SN_env * z) { z->ket = z->c; /* [, line 107 */ if (!(eq_s_b(z, 1, s_27))) return 0; z->bra = z->c; /* ], line 107 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 108 */ if (ret < 0) return ret; } if (!(eq_s_b(z, 1, s_28))) return 0; { int ret = slice_del(z); /* delete, line 109 */ if (ret < 0) return ret; } return 1; } extern int porter_ISO_8859_1_stem(struct SN_env * z) { z->B[0] = 0; /* unset Y_found, line 115 */ { int c1 = z->c; /* do, line 116 */ z->bra = z->c; /* [, line 116 */ if (!(eq_s(z, 1, s_29))) goto lab0; z->ket = z->c; /* ], line 116 */ { int ret = slice_from_s(z, 1, s_30); /* <-, line 116 */ if (ret < 0) return ret; } z->B[0] = 1; /* set Y_found, line 116 */ lab0: z->c = c1; } { int c2 = z->c; /* do, line 117 */ while(1) { /* repeat, line 117 */ int c3 = z->c; while(1) { /* goto, line 117 */ int c4 = z->c; if (in_grouping(z, g_v, 97, 121, 0)) goto lab3; z->bra = z->c; /* [, line 117 */ if (!(eq_s(z, 1, s_31))) goto lab3; z->ket = z->c; /* ], line 117 */ z->c = c4; break; lab3: z->c = c4; if (z->c >= z->l) goto lab2; z->c++; /* goto, line 117 */ } { int ret = slice_from_s(z, 1, s_32); /* <-, line 117 */ if (ret < 0) return ret; } z->B[0] = 1; /* set Y_found, line 117 */ continue; lab2: z->c = c3; break; } z->c = c2; } z->I[0] = z->l; z->I[1] = z->l; { int c5 = z->c; /* do, line 121 */ { /* gopast */ /* grouping v, line 122 */ int ret = out_grouping(z, g_v, 97, 121, 1); if (ret < 0) goto lab4; z->c += ret; } { /* gopast */ /* non v, line 122 */ int ret = in_grouping(z, g_v, 97, 121, 1); if (ret < 0) goto lab4; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 122 */ { /* gopast */ /* grouping v, line 123 */ int ret = out_grouping(z, g_v, 97, 121, 1); if (ret < 0) goto lab4; z->c += ret; } { /* gopast */ /* non v, line 123 */ int ret = in_grouping(z, g_v, 97, 121, 1); if (ret < 0) goto lab4; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 123 */ lab4: z->c = c5; } z->lb = z->c; z->c = z->l; /* backwards, line 126 */ { int m6 = z->l - z->c; (void)m6; /* do, line 127 */ { int ret = r_Step_1a(z); if (ret == 0) goto lab5; /* call Step_1a, line 127 */ if (ret < 0) return ret; } lab5: z->c = z->l - m6; } { int m7 = z->l - z->c; (void)m7; /* do, line 128 */ { int ret = r_Step_1b(z); if (ret == 0) goto lab6; /* call Step_1b, line 128 */ if (ret < 0) return ret; } lab6: z->c = z->l - m7; } { int m8 = z->l - z->c; (void)m8; /* do, line 129 */ { int ret = r_Step_1c(z); if (ret == 0) goto lab7; /* call Step_1c, line 129 */ if (ret < 0) return ret; } lab7: z->c = z->l - m8; } { int m9 = z->l - z->c; (void)m9; /* do, line 130 */ { int ret = r_Step_2(z); if (ret == 0) goto lab8; /* call Step_2, line 130 */ if (ret < 0) return ret; } lab8: z->c = z->l - m9; } { int m10 = z->l - z->c; (void)m10; /* do, line 131 */ { int ret = r_Step_3(z); if (ret == 0) goto lab9; /* call Step_3, line 131 */ if (ret < 0) return ret; } lab9: z->c = z->l - m10; } { int m11 = z->l - z->c; (void)m11; /* do, line 132 */ { int ret = r_Step_4(z); if (ret == 0) goto lab10; /* call Step_4, line 132 */ if (ret < 0) return ret; } lab10: z->c = z->l - m11; } { int m12 = z->l - z->c; (void)m12; /* do, line 133 */ { int ret = r_Step_5a(z); if (ret == 0) goto lab11; /* call Step_5a, line 133 */ if (ret < 0) return ret; } lab11: z->c = z->l - m12; } { int m13 = z->l - z->c; (void)m13; /* do, line 134 */ { int ret = r_Step_5b(z); if (ret == 0) goto lab12; /* call Step_5b, line 134 */ if (ret < 0) return ret; } lab12: z->c = z->l - m13; } z->c = z->lb; { int c14 = z->c; /* do, line 137 */ if (!(z->B[0])) goto lab13; /* Boolean test Y_found, line 137 */ while(1) { /* repeat, line 137 */ int c15 = z->c; while(1) { /* goto, line 137 */ int c16 = z->c; z->bra = z->c; /* [, line 137 */ if (!(eq_s(z, 1, s_33))) goto lab15; z->ket = z->c; /* ], line 137 */ z->c = c16; break; lab15: z->c = c16; if (z->c >= z->l) goto lab14; z->c++; /* goto, line 137 */ } { int ret = slice_from_s(z, 1, s_34); /* <-, line 137 */ if (ret < 0) return ret; } continue; lab14: z->c = c15; break; } lab13: z->c = c14; } return 1; } extern struct SN_env * porter_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 1); } extern void porter_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_porter.h000066400000000000000000000005021456444476200312760ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* porter_ISO_8859_1_create_env(void); extern void porter_ISO_8859_1_close_env(struct SN_env* z); extern int porter_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.c000066400000000000000000001127461456444476200321760ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int portuguese_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_residual_form(struct SN_env * z); static int r_residual_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * portuguese_ISO_8859_1_create_env(void); extern void portuguese_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[1] = { 0xE3 }; static const symbol s_0_2[1] = { 0xF5 }; static const struct among a_0[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 1, s_0_1, 0, 1, 0}, /* 2 */ { 1, s_0_2, 0, 2, 0} }; static const symbol s_1_1[2] = { 'a', '~' }; static const symbol s_1_2[2] = { 'o', '~' }; static const struct among a_1[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 2, s_1_1, 0, 1, 0}, /* 2 */ { 2, s_1_2, 0, 2, 0} }; static const symbol s_2_0[2] = { 'i', 'c' }; static const symbol s_2_1[2] = { 'a', 'd' }; static const symbol s_2_2[2] = { 'o', 's' }; static const symbol s_2_3[2] = { 'i', 'v' }; static const struct among a_2[4] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 2, s_2_1, -1, -1, 0}, /* 2 */ { 2, s_2_2, -1, -1, 0}, /* 3 */ { 2, s_2_3, -1, 1, 0} }; static const symbol s_3_0[4] = { 'a', 'n', 't', 'e' }; static const symbol s_3_1[4] = { 'a', 'v', 'e', 'l' }; static const symbol s_3_2[4] = { 0xED, 'v', 'e', 'l' }; static const struct among a_3[3] = { /* 0 */ { 4, s_3_0, -1, 1, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0}, /* 2 */ { 4, s_3_2, -1, 1, 0} }; static const symbol s_4_0[2] = { 'i', 'c' }; static const symbol s_4_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_4_2[2] = { 'i', 'v' }; static const struct among a_4[3] = { /* 0 */ { 2, s_4_0, -1, 1, 0}, /* 1 */ { 4, s_4_1, -1, 1, 0}, /* 2 */ { 2, s_4_2, -1, 1, 0} }; static const symbol s_5_0[3] = { 'i', 'c', 'a' }; static const symbol s_5_1[5] = { 0xE2, 'n', 'c', 'i', 'a' }; static const symbol s_5_2[5] = { 0xEA, 'n', 'c', 'i', 'a' }; static const symbol s_5_3[3] = { 'i', 'r', 'a' }; static const symbol s_5_4[5] = { 'a', 'd', 'o', 'r', 'a' }; static const symbol s_5_5[3] = { 'o', 's', 'a' }; static const symbol s_5_6[4] = { 'i', 's', 't', 'a' }; static const symbol s_5_7[3] = { 'i', 'v', 'a' }; static const symbol s_5_8[3] = { 'e', 'z', 'a' }; static const symbol s_5_9[5] = { 'l', 'o', 'g', 0xED, 'a' }; static const symbol s_5_10[5] = { 'i', 'd', 'a', 'd', 'e' }; static const symbol s_5_11[4] = { 'a', 'n', 't', 'e' }; static const symbol s_5_12[5] = { 'm', 'e', 'n', 't', 'e' }; static const symbol s_5_13[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; static const symbol s_5_14[4] = { 0xE1, 'v', 'e', 'l' }; static const symbol s_5_15[4] = { 0xED, 'v', 'e', 'l' }; static const symbol s_5_16[5] = { 'u', 'c', 'i', 0xF3, 'n' }; static const symbol s_5_17[3] = { 'i', 'c', 'o' }; static const symbol s_5_18[4] = { 'i', 's', 'm', 'o' }; static const symbol s_5_19[3] = { 'o', 's', 'o' }; static const symbol s_5_20[6] = { 'a', 'm', 'e', 'n', 't', 'o' }; static const symbol s_5_21[6] = { 'i', 'm', 'e', 'n', 't', 'o' }; static const symbol s_5_22[3] = { 'i', 'v', 'o' }; static const symbol s_5_23[5] = { 'a', 0xE7, 'a', '~', 'o' }; static const symbol s_5_24[4] = { 'a', 'd', 'o', 'r' }; static const symbol s_5_25[4] = { 'i', 'c', 'a', 's' }; static const symbol s_5_26[6] = { 0xEA, 'n', 'c', 'i', 'a', 's' }; static const symbol s_5_27[4] = { 'i', 'r', 'a', 's' }; static const symbol s_5_28[6] = { 'a', 'd', 'o', 'r', 'a', 's' }; static const symbol s_5_29[4] = { 'o', 's', 'a', 's' }; static const symbol s_5_30[5] = { 'i', 's', 't', 'a', 's' }; static const symbol s_5_31[4] = { 'i', 'v', 'a', 's' }; static const symbol s_5_32[4] = { 'e', 'z', 'a', 's' }; static const symbol s_5_33[6] = { 'l', 'o', 'g', 0xED, 'a', 's' }; static const symbol s_5_34[6] = { 'i', 'd', 'a', 'd', 'e', 's' }; static const symbol s_5_35[7] = { 'u', 'c', 'i', 'o', 'n', 'e', 's' }; static const symbol s_5_36[6] = { 'a', 'd', 'o', 'r', 'e', 's' }; static const symbol s_5_37[5] = { 'a', 'n', 't', 'e', 's' }; static const symbol s_5_38[6] = { 'a', 0xE7, 'o', '~', 'e', 's' }; static const symbol s_5_39[4] = { 'i', 'c', 'o', 's' }; static const symbol s_5_40[5] = { 'i', 's', 'm', 'o', 's' }; static const symbol s_5_41[4] = { 'o', 's', 'o', 's' }; static const symbol s_5_42[7] = { 'a', 'm', 'e', 'n', 't', 'o', 's' }; static const symbol s_5_43[7] = { 'i', 'm', 'e', 'n', 't', 'o', 's' }; static const symbol s_5_44[4] = { 'i', 'v', 'o', 's' }; static const struct among a_5[45] = { /* 0 */ { 3, s_5_0, -1, 1, 0}, /* 1 */ { 5, s_5_1, -1, 1, 0}, /* 2 */ { 5, s_5_2, -1, 4, 0}, /* 3 */ { 3, s_5_3, -1, 9, 0}, /* 4 */ { 5, s_5_4, -1, 1, 0}, /* 5 */ { 3, s_5_5, -1, 1, 0}, /* 6 */ { 4, s_5_6, -1, 1, 0}, /* 7 */ { 3, s_5_7, -1, 8, 0}, /* 8 */ { 3, s_5_8, -1, 1, 0}, /* 9 */ { 5, s_5_9, -1, 2, 0}, /* 10 */ { 5, s_5_10, -1, 7, 0}, /* 11 */ { 4, s_5_11, -1, 1, 0}, /* 12 */ { 5, s_5_12, -1, 6, 0}, /* 13 */ { 6, s_5_13, 12, 5, 0}, /* 14 */ { 4, s_5_14, -1, 1, 0}, /* 15 */ { 4, s_5_15, -1, 1, 0}, /* 16 */ { 5, s_5_16, -1, 3, 0}, /* 17 */ { 3, s_5_17, -1, 1, 0}, /* 18 */ { 4, s_5_18, -1, 1, 0}, /* 19 */ { 3, s_5_19, -1, 1, 0}, /* 20 */ { 6, s_5_20, -1, 1, 0}, /* 21 */ { 6, s_5_21, -1, 1, 0}, /* 22 */ { 3, s_5_22, -1, 8, 0}, /* 23 */ { 5, s_5_23, -1, 1, 0}, /* 24 */ { 4, s_5_24, -1, 1, 0}, /* 25 */ { 4, s_5_25, -1, 1, 0}, /* 26 */ { 6, s_5_26, -1, 4, 0}, /* 27 */ { 4, s_5_27, -1, 9, 0}, /* 28 */ { 6, s_5_28, -1, 1, 0}, /* 29 */ { 4, s_5_29, -1, 1, 0}, /* 30 */ { 5, s_5_30, -1, 1, 0}, /* 31 */ { 4, s_5_31, -1, 8, 0}, /* 32 */ { 4, s_5_32, -1, 1, 0}, /* 33 */ { 6, s_5_33, -1, 2, 0}, /* 34 */ { 6, s_5_34, -1, 7, 0}, /* 35 */ { 7, s_5_35, -1, 3, 0}, /* 36 */ { 6, s_5_36, -1, 1, 0}, /* 37 */ { 5, s_5_37, -1, 1, 0}, /* 38 */ { 6, s_5_38, -1, 1, 0}, /* 39 */ { 4, s_5_39, -1, 1, 0}, /* 40 */ { 5, s_5_40, -1, 1, 0}, /* 41 */ { 4, s_5_41, -1, 1, 0}, /* 42 */ { 7, s_5_42, -1, 1, 0}, /* 43 */ { 7, s_5_43, -1, 1, 0}, /* 44 */ { 4, s_5_44, -1, 8, 0} }; static const symbol s_6_0[3] = { 'a', 'd', 'a' }; static const symbol s_6_1[3] = { 'i', 'd', 'a' }; static const symbol s_6_2[2] = { 'i', 'a' }; static const symbol s_6_3[4] = { 'a', 'r', 'i', 'a' }; static const symbol s_6_4[4] = { 'e', 'r', 'i', 'a' }; static const symbol s_6_5[4] = { 'i', 'r', 'i', 'a' }; static const symbol s_6_6[3] = { 'a', 'r', 'a' }; static const symbol s_6_7[3] = { 'e', 'r', 'a' }; static const symbol s_6_8[3] = { 'i', 'r', 'a' }; static const symbol s_6_9[3] = { 'a', 'v', 'a' }; static const symbol s_6_10[4] = { 'a', 's', 's', 'e' }; static const symbol s_6_11[4] = { 'e', 's', 's', 'e' }; static const symbol s_6_12[4] = { 'i', 's', 's', 'e' }; static const symbol s_6_13[4] = { 'a', 's', 't', 'e' }; static const symbol s_6_14[4] = { 'e', 's', 't', 'e' }; static const symbol s_6_15[4] = { 'i', 's', 't', 'e' }; static const symbol s_6_16[2] = { 'e', 'i' }; static const symbol s_6_17[4] = { 'a', 'r', 'e', 'i' }; static const symbol s_6_18[4] = { 'e', 'r', 'e', 'i' }; static const symbol s_6_19[4] = { 'i', 'r', 'e', 'i' }; static const symbol s_6_20[2] = { 'a', 'm' }; static const symbol s_6_21[3] = { 'i', 'a', 'm' }; static const symbol s_6_22[5] = { 'a', 'r', 'i', 'a', 'm' }; static const symbol s_6_23[5] = { 'e', 'r', 'i', 'a', 'm' }; static const symbol s_6_24[5] = { 'i', 'r', 'i', 'a', 'm' }; static const symbol s_6_25[4] = { 'a', 'r', 'a', 'm' }; static const symbol s_6_26[4] = { 'e', 'r', 'a', 'm' }; static const symbol s_6_27[4] = { 'i', 'r', 'a', 'm' }; static const symbol s_6_28[4] = { 'a', 'v', 'a', 'm' }; static const symbol s_6_29[2] = { 'e', 'm' }; static const symbol s_6_30[4] = { 'a', 'r', 'e', 'm' }; static const symbol s_6_31[4] = { 'e', 'r', 'e', 'm' }; static const symbol s_6_32[4] = { 'i', 'r', 'e', 'm' }; static const symbol s_6_33[5] = { 'a', 's', 's', 'e', 'm' }; static const symbol s_6_34[5] = { 'e', 's', 's', 'e', 'm' }; static const symbol s_6_35[5] = { 'i', 's', 's', 'e', 'm' }; static const symbol s_6_36[3] = { 'a', 'd', 'o' }; static const symbol s_6_37[3] = { 'i', 'd', 'o' }; static const symbol s_6_38[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_6_39[4] = { 'e', 'n', 'd', 'o' }; static const symbol s_6_40[4] = { 'i', 'n', 'd', 'o' }; static const symbol s_6_41[5] = { 'a', 'r', 'a', '~', 'o' }; static const symbol s_6_42[5] = { 'e', 'r', 'a', '~', 'o' }; static const symbol s_6_43[5] = { 'i', 'r', 'a', '~', 'o' }; static const symbol s_6_44[2] = { 'a', 'r' }; static const symbol s_6_45[2] = { 'e', 'r' }; static const symbol s_6_46[2] = { 'i', 'r' }; static const symbol s_6_47[2] = { 'a', 's' }; static const symbol s_6_48[4] = { 'a', 'd', 'a', 's' }; static const symbol s_6_49[4] = { 'i', 'd', 'a', 's' }; static const symbol s_6_50[3] = { 'i', 'a', 's' }; static const symbol s_6_51[5] = { 'a', 'r', 'i', 'a', 's' }; static const symbol s_6_52[5] = { 'e', 'r', 'i', 'a', 's' }; static const symbol s_6_53[5] = { 'i', 'r', 'i', 'a', 's' }; static const symbol s_6_54[4] = { 'a', 'r', 'a', 's' }; static const symbol s_6_55[4] = { 'e', 'r', 'a', 's' }; static const symbol s_6_56[4] = { 'i', 'r', 'a', 's' }; static const symbol s_6_57[4] = { 'a', 'v', 'a', 's' }; static const symbol s_6_58[2] = { 'e', 's' }; static const symbol s_6_59[5] = { 'a', 'r', 'd', 'e', 's' }; static const symbol s_6_60[5] = { 'e', 'r', 'd', 'e', 's' }; static const symbol s_6_61[5] = { 'i', 'r', 'd', 'e', 's' }; static const symbol s_6_62[4] = { 'a', 'r', 'e', 's' }; static const symbol s_6_63[4] = { 'e', 'r', 'e', 's' }; static const symbol s_6_64[4] = { 'i', 'r', 'e', 's' }; static const symbol s_6_65[5] = { 'a', 's', 's', 'e', 's' }; static const symbol s_6_66[5] = { 'e', 's', 's', 'e', 's' }; static const symbol s_6_67[5] = { 'i', 's', 's', 'e', 's' }; static const symbol s_6_68[5] = { 'a', 's', 't', 'e', 's' }; static const symbol s_6_69[5] = { 'e', 's', 't', 'e', 's' }; static const symbol s_6_70[5] = { 'i', 's', 't', 'e', 's' }; static const symbol s_6_71[2] = { 'i', 's' }; static const symbol s_6_72[3] = { 'a', 'i', 's' }; static const symbol s_6_73[3] = { 'e', 'i', 's' }; static const symbol s_6_74[5] = { 'a', 'r', 'e', 'i', 's' }; static const symbol s_6_75[5] = { 'e', 'r', 'e', 'i', 's' }; static const symbol s_6_76[5] = { 'i', 'r', 'e', 'i', 's' }; static const symbol s_6_77[5] = { 0xE1, 'r', 'e', 'i', 's' }; static const symbol s_6_78[5] = { 0xE9, 'r', 'e', 'i', 's' }; static const symbol s_6_79[5] = { 0xED, 'r', 'e', 'i', 's' }; static const symbol s_6_80[6] = { 0xE1, 's', 's', 'e', 'i', 's' }; static const symbol s_6_81[6] = { 0xE9, 's', 's', 'e', 'i', 's' }; static const symbol s_6_82[6] = { 0xED, 's', 's', 'e', 'i', 's' }; static const symbol s_6_83[5] = { 0xE1, 'v', 'e', 'i', 's' }; static const symbol s_6_84[4] = { 0xED, 'e', 'i', 's' }; static const symbol s_6_85[6] = { 'a', 'r', 0xED, 'e', 'i', 's' }; static const symbol s_6_86[6] = { 'e', 'r', 0xED, 'e', 'i', 's' }; static const symbol s_6_87[6] = { 'i', 'r', 0xED, 'e', 'i', 's' }; static const symbol s_6_88[4] = { 'a', 'd', 'o', 's' }; static const symbol s_6_89[4] = { 'i', 'd', 'o', 's' }; static const symbol s_6_90[4] = { 'a', 'm', 'o', 's' }; static const symbol s_6_91[6] = { 0xE1, 'r', 'a', 'm', 'o', 's' }; static const symbol s_6_92[6] = { 0xE9, 'r', 'a', 'm', 'o', 's' }; static const symbol s_6_93[6] = { 0xED, 'r', 'a', 'm', 'o', 's' }; static const symbol s_6_94[6] = { 0xE1, 'v', 'a', 'm', 'o', 's' }; static const symbol s_6_95[5] = { 0xED, 'a', 'm', 'o', 's' }; static const symbol s_6_96[7] = { 'a', 'r', 0xED, 'a', 'm', 'o', 's' }; static const symbol s_6_97[7] = { 'e', 'r', 0xED, 'a', 'm', 'o', 's' }; static const symbol s_6_98[7] = { 'i', 'r', 0xED, 'a', 'm', 'o', 's' }; static const symbol s_6_99[4] = { 'e', 'm', 'o', 's' }; static const symbol s_6_100[6] = { 'a', 'r', 'e', 'm', 'o', 's' }; static const symbol s_6_101[6] = { 'e', 'r', 'e', 'm', 'o', 's' }; static const symbol s_6_102[6] = { 'i', 'r', 'e', 'm', 'o', 's' }; static const symbol s_6_103[7] = { 0xE1, 's', 's', 'e', 'm', 'o', 's' }; static const symbol s_6_104[7] = { 0xEA, 's', 's', 'e', 'm', 'o', 's' }; static const symbol s_6_105[7] = { 0xED, 's', 's', 'e', 'm', 'o', 's' }; static const symbol s_6_106[4] = { 'i', 'm', 'o', 's' }; static const symbol s_6_107[5] = { 'a', 'r', 'm', 'o', 's' }; static const symbol s_6_108[5] = { 'e', 'r', 'm', 'o', 's' }; static const symbol s_6_109[5] = { 'i', 'r', 'm', 'o', 's' }; static const symbol s_6_110[4] = { 0xE1, 'm', 'o', 's' }; static const symbol s_6_111[4] = { 'a', 'r', 0xE1, 's' }; static const symbol s_6_112[4] = { 'e', 'r', 0xE1, 's' }; static const symbol s_6_113[4] = { 'i', 'r', 0xE1, 's' }; static const symbol s_6_114[2] = { 'e', 'u' }; static const symbol s_6_115[2] = { 'i', 'u' }; static const symbol s_6_116[2] = { 'o', 'u' }; static const symbol s_6_117[3] = { 'a', 'r', 0xE1 }; static const symbol s_6_118[3] = { 'e', 'r', 0xE1 }; static const symbol s_6_119[3] = { 'i', 'r', 0xE1 }; static const struct among a_6[120] = { /* 0 */ { 3, s_6_0, -1, 1, 0}, /* 1 */ { 3, s_6_1, -1, 1, 0}, /* 2 */ { 2, s_6_2, -1, 1, 0}, /* 3 */ { 4, s_6_3, 2, 1, 0}, /* 4 */ { 4, s_6_4, 2, 1, 0}, /* 5 */ { 4, s_6_5, 2, 1, 0}, /* 6 */ { 3, s_6_6, -1, 1, 0}, /* 7 */ { 3, s_6_7, -1, 1, 0}, /* 8 */ { 3, s_6_8, -1, 1, 0}, /* 9 */ { 3, s_6_9, -1, 1, 0}, /* 10 */ { 4, s_6_10, -1, 1, 0}, /* 11 */ { 4, s_6_11, -1, 1, 0}, /* 12 */ { 4, s_6_12, -1, 1, 0}, /* 13 */ { 4, s_6_13, -1, 1, 0}, /* 14 */ { 4, s_6_14, -1, 1, 0}, /* 15 */ { 4, s_6_15, -1, 1, 0}, /* 16 */ { 2, s_6_16, -1, 1, 0}, /* 17 */ { 4, s_6_17, 16, 1, 0}, /* 18 */ { 4, s_6_18, 16, 1, 0}, /* 19 */ { 4, s_6_19, 16, 1, 0}, /* 20 */ { 2, s_6_20, -1, 1, 0}, /* 21 */ { 3, s_6_21, 20, 1, 0}, /* 22 */ { 5, s_6_22, 21, 1, 0}, /* 23 */ { 5, s_6_23, 21, 1, 0}, /* 24 */ { 5, s_6_24, 21, 1, 0}, /* 25 */ { 4, s_6_25, 20, 1, 0}, /* 26 */ { 4, s_6_26, 20, 1, 0}, /* 27 */ { 4, s_6_27, 20, 1, 0}, /* 28 */ { 4, s_6_28, 20, 1, 0}, /* 29 */ { 2, s_6_29, -1, 1, 0}, /* 30 */ { 4, s_6_30, 29, 1, 0}, /* 31 */ { 4, s_6_31, 29, 1, 0}, /* 32 */ { 4, s_6_32, 29, 1, 0}, /* 33 */ { 5, s_6_33, 29, 1, 0}, /* 34 */ { 5, s_6_34, 29, 1, 0}, /* 35 */ { 5, s_6_35, 29, 1, 0}, /* 36 */ { 3, s_6_36, -1, 1, 0}, /* 37 */ { 3, s_6_37, -1, 1, 0}, /* 38 */ { 4, s_6_38, -1, 1, 0}, /* 39 */ { 4, s_6_39, -1, 1, 0}, /* 40 */ { 4, s_6_40, -1, 1, 0}, /* 41 */ { 5, s_6_41, -1, 1, 0}, /* 42 */ { 5, s_6_42, -1, 1, 0}, /* 43 */ { 5, s_6_43, -1, 1, 0}, /* 44 */ { 2, s_6_44, -1, 1, 0}, /* 45 */ { 2, s_6_45, -1, 1, 0}, /* 46 */ { 2, s_6_46, -1, 1, 0}, /* 47 */ { 2, s_6_47, -1, 1, 0}, /* 48 */ { 4, s_6_48, 47, 1, 0}, /* 49 */ { 4, s_6_49, 47, 1, 0}, /* 50 */ { 3, s_6_50, 47, 1, 0}, /* 51 */ { 5, s_6_51, 50, 1, 0}, /* 52 */ { 5, s_6_52, 50, 1, 0}, /* 53 */ { 5, s_6_53, 50, 1, 0}, /* 54 */ { 4, s_6_54, 47, 1, 0}, /* 55 */ { 4, s_6_55, 47, 1, 0}, /* 56 */ { 4, s_6_56, 47, 1, 0}, /* 57 */ { 4, s_6_57, 47, 1, 0}, /* 58 */ { 2, s_6_58, -1, 1, 0}, /* 59 */ { 5, s_6_59, 58, 1, 0}, /* 60 */ { 5, s_6_60, 58, 1, 0}, /* 61 */ { 5, s_6_61, 58, 1, 0}, /* 62 */ { 4, s_6_62, 58, 1, 0}, /* 63 */ { 4, s_6_63, 58, 1, 0}, /* 64 */ { 4, s_6_64, 58, 1, 0}, /* 65 */ { 5, s_6_65, 58, 1, 0}, /* 66 */ { 5, s_6_66, 58, 1, 0}, /* 67 */ { 5, s_6_67, 58, 1, 0}, /* 68 */ { 5, s_6_68, 58, 1, 0}, /* 69 */ { 5, s_6_69, 58, 1, 0}, /* 70 */ { 5, s_6_70, 58, 1, 0}, /* 71 */ { 2, s_6_71, -1, 1, 0}, /* 72 */ { 3, s_6_72, 71, 1, 0}, /* 73 */ { 3, s_6_73, 71, 1, 0}, /* 74 */ { 5, s_6_74, 73, 1, 0}, /* 75 */ { 5, s_6_75, 73, 1, 0}, /* 76 */ { 5, s_6_76, 73, 1, 0}, /* 77 */ { 5, s_6_77, 73, 1, 0}, /* 78 */ { 5, s_6_78, 73, 1, 0}, /* 79 */ { 5, s_6_79, 73, 1, 0}, /* 80 */ { 6, s_6_80, 73, 1, 0}, /* 81 */ { 6, s_6_81, 73, 1, 0}, /* 82 */ { 6, s_6_82, 73, 1, 0}, /* 83 */ { 5, s_6_83, 73, 1, 0}, /* 84 */ { 4, s_6_84, 73, 1, 0}, /* 85 */ { 6, s_6_85, 84, 1, 0}, /* 86 */ { 6, s_6_86, 84, 1, 0}, /* 87 */ { 6, s_6_87, 84, 1, 0}, /* 88 */ { 4, s_6_88, -1, 1, 0}, /* 89 */ { 4, s_6_89, -1, 1, 0}, /* 90 */ { 4, s_6_90, -1, 1, 0}, /* 91 */ { 6, s_6_91, 90, 1, 0}, /* 92 */ { 6, s_6_92, 90, 1, 0}, /* 93 */ { 6, s_6_93, 90, 1, 0}, /* 94 */ { 6, s_6_94, 90, 1, 0}, /* 95 */ { 5, s_6_95, 90, 1, 0}, /* 96 */ { 7, s_6_96, 95, 1, 0}, /* 97 */ { 7, s_6_97, 95, 1, 0}, /* 98 */ { 7, s_6_98, 95, 1, 0}, /* 99 */ { 4, s_6_99, -1, 1, 0}, /*100 */ { 6, s_6_100, 99, 1, 0}, /*101 */ { 6, s_6_101, 99, 1, 0}, /*102 */ { 6, s_6_102, 99, 1, 0}, /*103 */ { 7, s_6_103, 99, 1, 0}, /*104 */ { 7, s_6_104, 99, 1, 0}, /*105 */ { 7, s_6_105, 99, 1, 0}, /*106 */ { 4, s_6_106, -1, 1, 0}, /*107 */ { 5, s_6_107, -1, 1, 0}, /*108 */ { 5, s_6_108, -1, 1, 0}, /*109 */ { 5, s_6_109, -1, 1, 0}, /*110 */ { 4, s_6_110, -1, 1, 0}, /*111 */ { 4, s_6_111, -1, 1, 0}, /*112 */ { 4, s_6_112, -1, 1, 0}, /*113 */ { 4, s_6_113, -1, 1, 0}, /*114 */ { 2, s_6_114, -1, 1, 0}, /*115 */ { 2, s_6_115, -1, 1, 0}, /*116 */ { 2, s_6_116, -1, 1, 0}, /*117 */ { 3, s_6_117, -1, 1, 0}, /*118 */ { 3, s_6_118, -1, 1, 0}, /*119 */ { 3, s_6_119, -1, 1, 0} }; static const symbol s_7_0[1] = { 'a' }; static const symbol s_7_1[1] = { 'i' }; static const symbol s_7_2[1] = { 'o' }; static const symbol s_7_3[2] = { 'o', 's' }; static const symbol s_7_4[1] = { 0xE1 }; static const symbol s_7_5[1] = { 0xED }; static const symbol s_7_6[1] = { 0xF3 }; static const struct among a_7[7] = { /* 0 */ { 1, s_7_0, -1, 1, 0}, /* 1 */ { 1, s_7_1, -1, 1, 0}, /* 2 */ { 1, s_7_2, -1, 1, 0}, /* 3 */ { 2, s_7_3, -1, 1, 0}, /* 4 */ { 1, s_7_4, -1, 1, 0}, /* 5 */ { 1, s_7_5, -1, 1, 0}, /* 6 */ { 1, s_7_6, -1, 1, 0} }; static const symbol s_8_0[1] = { 'e' }; static const symbol s_8_1[1] = { 0xE7 }; static const symbol s_8_2[1] = { 0xE9 }; static const symbol s_8_3[1] = { 0xEA }; static const struct among a_8[4] = { /* 0 */ { 1, s_8_0, -1, 1, 0}, /* 1 */ { 1, s_8_1, -1, 2, 0}, /* 2 */ { 1, s_8_2, -1, 1, 0}, /* 3 */ { 1, s_8_3, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 19, 12, 2 }; static const symbol s_0[] = { 'a', '~' }; static const symbol s_1[] = { 'o', '~' }; static const symbol s_2[] = { 0xE3 }; static const symbol s_3[] = { 0xF5 }; static const symbol s_4[] = { 'l', 'o', 'g' }; static const symbol s_5[] = { 'u' }; static const symbol s_6[] = { 'e', 'n', 't', 'e' }; static const symbol s_7[] = { 'a', 't' }; static const symbol s_8[] = { 'a', 't' }; static const symbol s_9[] = { 'e' }; static const symbol s_10[] = { 'i', 'r' }; static const symbol s_11[] = { 'u' }; static const symbol s_12[] = { 'g' }; static const symbol s_13[] = { 'i' }; static const symbol s_14[] = { 'c' }; static const symbol s_15[] = { 'c' }; static const symbol s_16[] = { 'i' }; static const symbol s_17[] = { 'c' }; static int r_prelude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 36 */ int c1 = z->c; z->bra = z->c; /* [, line 37 */ if (z->c >= z->l || (z->p[z->c + 0] != 227 && z->p[z->c + 0] != 245)) among_var = 3; else among_var = find_among(z, a_0, 3); /* substring, line 37 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 37 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 2, s_0); /* <-, line 38 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 2, s_1); /* <-, line 39 */ if (ret < 0) return ret; } break; case 3: if (z->c >= z->l) goto lab0; z->c++; /* next, line 40 */ break; } continue; lab0: z->c = c1; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 50 */ { int c2 = z->c; /* or, line 52 */ if (in_grouping(z, g_v, 97, 250, 0)) goto lab2; { int c3 = z->c; /* or, line 51 */ if (out_grouping(z, g_v, 97, 250, 0)) goto lab4; { /* gopast */ /* grouping v, line 51 */ int ret = out_grouping(z, g_v, 97, 250, 1); if (ret < 0) goto lab4; z->c += ret; } goto lab3; lab4: z->c = c3; if (in_grouping(z, g_v, 97, 250, 0)) goto lab2; { /* gopast */ /* non v, line 51 */ int ret = in_grouping(z, g_v, 97, 250, 1); if (ret < 0) goto lab2; z->c += ret; } } lab3: goto lab1; lab2: z->c = c2; if (out_grouping(z, g_v, 97, 250, 0)) goto lab0; { int c4 = z->c; /* or, line 53 */ if (out_grouping(z, g_v, 97, 250, 0)) goto lab6; { /* gopast */ /* grouping v, line 53 */ int ret = out_grouping(z, g_v, 97, 250, 1); if (ret < 0) goto lab6; z->c += ret; } goto lab5; lab6: z->c = c4; if (in_grouping(z, g_v, 97, 250, 0)) goto lab0; if (z->c >= z->l) goto lab0; z->c++; /* next, line 53 */ } lab5: ; } lab1: z->I[0] = z->c; /* setmark pV, line 54 */ lab0: z->c = c1; } { int c5 = z->c; /* do, line 56 */ { /* gopast */ /* grouping v, line 57 */ int ret = out_grouping(z, g_v, 97, 250, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 57 */ int ret = in_grouping(z, g_v, 97, 250, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 57 */ { /* gopast */ /* grouping v, line 58 */ int ret = out_grouping(z, g_v, 97, 250, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 58 */ int ret = in_grouping(z, g_v, 97, 250, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 58 */ lab7: z->c = c5; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 62 */ int c1 = z->c; z->bra = z->c; /* [, line 63 */ if (z->c + 1 >= z->l || z->p[z->c + 1] != 126) among_var = 3; else among_var = find_among(z, a_1, 3); /* substring, line 63 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 63 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_2); /* <-, line 64 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_3); /* <-, line 65 */ if (ret < 0) return ret; } break; case 3: if (z->c >= z->l) goto lab0; z->c++; /* next, line 66 */ break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 77 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((839714 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_5, 45); /* substring, line 77 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 77 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 93 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 93 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 98 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_4); /* <-, line 98 */ if (ret < 0) return ret; } break; case 3: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 102 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_5); /* <-, line 102 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 106 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 4, s_6); /* <-, line 106 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 110 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 110 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ z->ket = z->c; /* [, line 112 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718616 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab0; } among_var = find_among_b(z, a_2, 4); /* substring, line 112 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 112 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 112 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 112 */ if (ret < 0) return ret; } switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab0; } case 1: z->ket = z->c; /* [, line 113 */ if (!(eq_s_b(z, 2, s_7))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 113 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 113 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 113 */ if (ret < 0) return ret; } break; } lab0: ; } break; case 6: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 122 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 122 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 123 */ z->ket = z->c; /* [, line 124 */ if (z->c - 3 <= z->lb || (z->p[z->c - 1] != 101 && z->p[z->c - 1] != 108)) { z->c = z->l - m_keep; goto lab1; } among_var = find_among_b(z, a_3, 3); /* substring, line 124 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 124 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab1; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 127 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 127 */ if (ret < 0) return ret; } break; } lab1: ; } break; case 7: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 134 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 135 */ z->ket = z->c; /* [, line 136 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab2; } among_var = find_among_b(z, a_4, 3); /* substring, line 136 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } z->bra = z->c; /* ], line 136 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab2; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 139 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 139 */ if (ret < 0) return ret; } break; } lab2: ; } break; case 8: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 146 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 146 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 147 */ z->ket = z->c; /* [, line 148 */ if (!(eq_s_b(z, 2, s_8))) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 148 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 148 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 148 */ if (ret < 0) return ret; } lab3: ; } break; case 9: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 153 */ if (ret < 0) return ret; } if (!(eq_s_b(z, 1, s_9))) return 0; { int ret = slice_from_s(z, 2, s_10); /* <-, line 154 */ if (ret < 0) return ret; } break; } return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 159 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 159 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 160 */ among_var = find_among_b(z, a_6, 120); /* substring, line 160 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 160 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = slice_del(z); /* delete, line 179 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_residual_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 184 */ among_var = find_among_b(z, a_7, 7); /* substring, line 184 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 184 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 187 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 187 */ if (ret < 0) return ret; } break; } return 1; } static int r_residual_form(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 192 */ among_var = find_among_b(z, a_8, 4); /* substring, line 192 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 192 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 194 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 194 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 194 */ { int m1 = z->l - z->c; (void)m1; /* or, line 194 */ if (!(eq_s_b(z, 1, s_11))) goto lab1; z->bra = z->c; /* ], line 194 */ { int m_test = z->l - z->c; /* test, line 194 */ if (!(eq_s_b(z, 1, s_12))) goto lab1; z->c = z->l - m_test; } goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_13))) return 0; z->bra = z->c; /* ], line 195 */ { int m_test = z->l - z->c; /* test, line 195 */ if (!(eq_s_b(z, 1, s_14))) return 0; z->c = z->l - m_test; } } lab0: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 195 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 195 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_15); /* <-, line 196 */ if (ret < 0) return ret; } break; } return 1; } extern int portuguese_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 202 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 202 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 203 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 203 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 204 */ { int m3 = z->l - z->c; (void)m3; /* do, line 205 */ { int m4 = z->l - z->c; (void)m4; /* or, line 209 */ { int m5 = z->l - z->c; (void)m5; /* and, line 207 */ { int m6 = z->l - z->c; (void)m6; /* or, line 206 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab6; /* call standard_suffix, line 206 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m6; { int ret = r_verb_suffix(z); if (ret == 0) goto lab4; /* call verb_suffix, line 206 */ if (ret < 0) return ret; } } lab5: z->c = z->l - m5; { int m7 = z->l - z->c; (void)m7; /* do, line 207 */ z->ket = z->c; /* [, line 207 */ if (!(eq_s_b(z, 1, s_16))) goto lab7; z->bra = z->c; /* ], line 207 */ { int m_test = z->l - z->c; /* test, line 207 */ if (!(eq_s_b(z, 1, s_17))) goto lab7; z->c = z->l - m_test; } { int ret = r_RV(z); if (ret == 0) goto lab7; /* call RV, line 207 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 207 */ if (ret < 0) return ret; } lab7: z->c = z->l - m7; } } goto lab3; lab4: z->c = z->l - m4; { int ret = r_residual_suffix(z); if (ret == 0) goto lab2; /* call residual_suffix, line 209 */ if (ret < 0) return ret; } } lab3: lab2: z->c = z->l - m3; } { int m8 = z->l - z->c; (void)m8; /* do, line 211 */ { int ret = r_residual_form(z); if (ret == 0) goto lab8; /* call residual_form, line 211 */ if (ret < 0) return ret; } lab8: z->c = z->l - m8; } z->c = z->lb; { int c9 = z->c; /* do, line 213 */ { int ret = r_postlude(z); if (ret == 0) goto lab9; /* call postlude, line 213 */ if (ret < 0) return ret; } lab9: z->c = c9; } return 1; } extern struct SN_env * portuguese_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); } extern void portuguese_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h000066400000000000000000000005161456444476200321720ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* portuguese_ISO_8859_1_create_env(void); extern void portuguese_ISO_8859_1_close_env(struct SN_env* z); extern int portuguese_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_spanish.c000066400000000000000000001201461456444476200314320ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int spanish_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_residual_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_y_verb_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_attached_pronoun(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * spanish_ISO_8859_1_create_env(void); extern void spanish_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[1] = { 0xE1 }; static const symbol s_0_2[1] = { 0xE9 }; static const symbol s_0_3[1] = { 0xED }; static const symbol s_0_4[1] = { 0xF3 }; static const symbol s_0_5[1] = { 0xFA }; static const struct among a_0[6] = { /* 0 */ { 0, 0, -1, 6, 0}, /* 1 */ { 1, s_0_1, 0, 1, 0}, /* 2 */ { 1, s_0_2, 0, 2, 0}, /* 3 */ { 1, s_0_3, 0, 3, 0}, /* 4 */ { 1, s_0_4, 0, 4, 0}, /* 5 */ { 1, s_0_5, 0, 5, 0} }; static const symbol s_1_0[2] = { 'l', 'a' }; static const symbol s_1_1[4] = { 's', 'e', 'l', 'a' }; static const symbol s_1_2[2] = { 'l', 'e' }; static const symbol s_1_3[2] = { 'm', 'e' }; static const symbol s_1_4[2] = { 's', 'e' }; static const symbol s_1_5[2] = { 'l', 'o' }; static const symbol s_1_6[4] = { 's', 'e', 'l', 'o' }; static const symbol s_1_7[3] = { 'l', 'a', 's' }; static const symbol s_1_8[5] = { 's', 'e', 'l', 'a', 's' }; static const symbol s_1_9[3] = { 'l', 'e', 's' }; static const symbol s_1_10[3] = { 'l', 'o', 's' }; static const symbol s_1_11[5] = { 's', 'e', 'l', 'o', 's' }; static const symbol s_1_12[3] = { 'n', 'o', 's' }; static const struct among a_1[13] = { /* 0 */ { 2, s_1_0, -1, -1, 0}, /* 1 */ { 4, s_1_1, 0, -1, 0}, /* 2 */ { 2, s_1_2, -1, -1, 0}, /* 3 */ { 2, s_1_3, -1, -1, 0}, /* 4 */ { 2, s_1_4, -1, -1, 0}, /* 5 */ { 2, s_1_5, -1, -1, 0}, /* 6 */ { 4, s_1_6, 5, -1, 0}, /* 7 */ { 3, s_1_7, -1, -1, 0}, /* 8 */ { 5, s_1_8, 7, -1, 0}, /* 9 */ { 3, s_1_9, -1, -1, 0}, /* 10 */ { 3, s_1_10, -1, -1, 0}, /* 11 */ { 5, s_1_11, 10, -1, 0}, /* 12 */ { 3, s_1_12, -1, -1, 0} }; static const symbol s_2_0[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_2_1[5] = { 'i', 'e', 'n', 'd', 'o' }; static const symbol s_2_2[5] = { 'y', 'e', 'n', 'd', 'o' }; static const symbol s_2_3[4] = { 0xE1, 'n', 'd', 'o' }; static const symbol s_2_4[5] = { 'i', 0xE9, 'n', 'd', 'o' }; static const symbol s_2_5[2] = { 'a', 'r' }; static const symbol s_2_6[2] = { 'e', 'r' }; static const symbol s_2_7[2] = { 'i', 'r' }; static const symbol s_2_8[2] = { 0xE1, 'r' }; static const symbol s_2_9[2] = { 0xE9, 'r' }; static const symbol s_2_10[2] = { 0xED, 'r' }; static const struct among a_2[11] = { /* 0 */ { 4, s_2_0, -1, 6, 0}, /* 1 */ { 5, s_2_1, -1, 6, 0}, /* 2 */ { 5, s_2_2, -1, 7, 0}, /* 3 */ { 4, s_2_3, -1, 2, 0}, /* 4 */ { 5, s_2_4, -1, 1, 0}, /* 5 */ { 2, s_2_5, -1, 6, 0}, /* 6 */ { 2, s_2_6, -1, 6, 0}, /* 7 */ { 2, s_2_7, -1, 6, 0}, /* 8 */ { 2, s_2_8, -1, 3, 0}, /* 9 */ { 2, s_2_9, -1, 4, 0}, /* 10 */ { 2, s_2_10, -1, 5, 0} }; static const symbol s_3_0[2] = { 'i', 'c' }; static const symbol s_3_1[2] = { 'a', 'd' }; static const symbol s_3_2[2] = { 'o', 's' }; static const symbol s_3_3[2] = { 'i', 'v' }; static const struct among a_3[4] = { /* 0 */ { 2, s_3_0, -1, -1, 0}, /* 1 */ { 2, s_3_1, -1, -1, 0}, /* 2 */ { 2, s_3_2, -1, -1, 0}, /* 3 */ { 2, s_3_3, -1, 1, 0} }; static const symbol s_4_0[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_4_1[4] = { 'i', 'b', 'l', 'e' }; static const symbol s_4_2[4] = { 'a', 'n', 't', 'e' }; static const struct among a_4[3] = { /* 0 */ { 4, s_4_0, -1, 1, 0}, /* 1 */ { 4, s_4_1, -1, 1, 0}, /* 2 */ { 4, s_4_2, -1, 1, 0} }; static const symbol s_5_0[2] = { 'i', 'c' }; static const symbol s_5_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_5_2[2] = { 'i', 'v' }; static const struct among a_5[3] = { /* 0 */ { 2, s_5_0, -1, 1, 0}, /* 1 */ { 4, s_5_1, -1, 1, 0}, /* 2 */ { 2, s_5_2, -1, 1, 0} }; static const symbol s_6_0[3] = { 'i', 'c', 'a' }; static const symbol s_6_1[5] = { 'a', 'n', 'c', 'i', 'a' }; static const symbol s_6_2[5] = { 'e', 'n', 'c', 'i', 'a' }; static const symbol s_6_3[5] = { 'a', 'd', 'o', 'r', 'a' }; static const symbol s_6_4[3] = { 'o', 's', 'a' }; static const symbol s_6_5[4] = { 'i', 's', 't', 'a' }; static const symbol s_6_6[3] = { 'i', 'v', 'a' }; static const symbol s_6_7[4] = { 'a', 'n', 'z', 'a' }; static const symbol s_6_8[5] = { 'l', 'o', 'g', 0xED, 'a' }; static const symbol s_6_9[4] = { 'i', 'd', 'a', 'd' }; static const symbol s_6_10[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_6_11[4] = { 'i', 'b', 'l', 'e' }; static const symbol s_6_12[4] = { 'a', 'n', 't', 'e' }; static const symbol s_6_13[5] = { 'm', 'e', 'n', 't', 'e' }; static const symbol s_6_14[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; static const symbol s_6_15[5] = { 'a', 'c', 'i', 0xF3, 'n' }; static const symbol s_6_16[5] = { 'u', 'c', 'i', 0xF3, 'n' }; static const symbol s_6_17[3] = { 'i', 'c', 'o' }; static const symbol s_6_18[4] = { 'i', 's', 'm', 'o' }; static const symbol s_6_19[3] = { 'o', 's', 'o' }; static const symbol s_6_20[7] = { 'a', 'm', 'i', 'e', 'n', 't', 'o' }; static const symbol s_6_21[7] = { 'i', 'm', 'i', 'e', 'n', 't', 'o' }; static const symbol s_6_22[3] = { 'i', 'v', 'o' }; static const symbol s_6_23[4] = { 'a', 'd', 'o', 'r' }; static const symbol s_6_24[4] = { 'i', 'c', 'a', 's' }; static const symbol s_6_25[6] = { 'a', 'n', 'c', 'i', 'a', 's' }; static const symbol s_6_26[6] = { 'e', 'n', 'c', 'i', 'a', 's' }; static const symbol s_6_27[6] = { 'a', 'd', 'o', 'r', 'a', 's' }; static const symbol s_6_28[4] = { 'o', 's', 'a', 's' }; static const symbol s_6_29[5] = { 'i', 's', 't', 'a', 's' }; static const symbol s_6_30[4] = { 'i', 'v', 'a', 's' }; static const symbol s_6_31[5] = { 'a', 'n', 'z', 'a', 's' }; static const symbol s_6_32[6] = { 'l', 'o', 'g', 0xED, 'a', 's' }; static const symbol s_6_33[6] = { 'i', 'd', 'a', 'd', 'e', 's' }; static const symbol s_6_34[5] = { 'a', 'b', 'l', 'e', 's' }; static const symbol s_6_35[5] = { 'i', 'b', 'l', 'e', 's' }; static const symbol s_6_36[7] = { 'a', 'c', 'i', 'o', 'n', 'e', 's' }; static const symbol s_6_37[7] = { 'u', 'c', 'i', 'o', 'n', 'e', 's' }; static const symbol s_6_38[6] = { 'a', 'd', 'o', 'r', 'e', 's' }; static const symbol s_6_39[5] = { 'a', 'n', 't', 'e', 's' }; static const symbol s_6_40[4] = { 'i', 'c', 'o', 's' }; static const symbol s_6_41[5] = { 'i', 's', 'm', 'o', 's' }; static const symbol s_6_42[4] = { 'o', 's', 'o', 's' }; static const symbol s_6_43[8] = { 'a', 'm', 'i', 'e', 'n', 't', 'o', 's' }; static const symbol s_6_44[8] = { 'i', 'm', 'i', 'e', 'n', 't', 'o', 's' }; static const symbol s_6_45[4] = { 'i', 'v', 'o', 's' }; static const struct among a_6[46] = { /* 0 */ { 3, s_6_0, -1, 1, 0}, /* 1 */ { 5, s_6_1, -1, 2, 0}, /* 2 */ { 5, s_6_2, -1, 5, 0}, /* 3 */ { 5, s_6_3, -1, 2, 0}, /* 4 */ { 3, s_6_4, -1, 1, 0}, /* 5 */ { 4, s_6_5, -1, 1, 0}, /* 6 */ { 3, s_6_6, -1, 9, 0}, /* 7 */ { 4, s_6_7, -1, 1, 0}, /* 8 */ { 5, s_6_8, -1, 3, 0}, /* 9 */ { 4, s_6_9, -1, 8, 0}, /* 10 */ { 4, s_6_10, -1, 1, 0}, /* 11 */ { 4, s_6_11, -1, 1, 0}, /* 12 */ { 4, s_6_12, -1, 2, 0}, /* 13 */ { 5, s_6_13, -1, 7, 0}, /* 14 */ { 6, s_6_14, 13, 6, 0}, /* 15 */ { 5, s_6_15, -1, 2, 0}, /* 16 */ { 5, s_6_16, -1, 4, 0}, /* 17 */ { 3, s_6_17, -1, 1, 0}, /* 18 */ { 4, s_6_18, -1, 1, 0}, /* 19 */ { 3, s_6_19, -1, 1, 0}, /* 20 */ { 7, s_6_20, -1, 1, 0}, /* 21 */ { 7, s_6_21, -1, 1, 0}, /* 22 */ { 3, s_6_22, -1, 9, 0}, /* 23 */ { 4, s_6_23, -1, 2, 0}, /* 24 */ { 4, s_6_24, -1, 1, 0}, /* 25 */ { 6, s_6_25, -1, 2, 0}, /* 26 */ { 6, s_6_26, -1, 5, 0}, /* 27 */ { 6, s_6_27, -1, 2, 0}, /* 28 */ { 4, s_6_28, -1, 1, 0}, /* 29 */ { 5, s_6_29, -1, 1, 0}, /* 30 */ { 4, s_6_30, -1, 9, 0}, /* 31 */ { 5, s_6_31, -1, 1, 0}, /* 32 */ { 6, s_6_32, -1, 3, 0}, /* 33 */ { 6, s_6_33, -1, 8, 0}, /* 34 */ { 5, s_6_34, -1, 1, 0}, /* 35 */ { 5, s_6_35, -1, 1, 0}, /* 36 */ { 7, s_6_36, -1, 2, 0}, /* 37 */ { 7, s_6_37, -1, 4, 0}, /* 38 */ { 6, s_6_38, -1, 2, 0}, /* 39 */ { 5, s_6_39, -1, 2, 0}, /* 40 */ { 4, s_6_40, -1, 1, 0}, /* 41 */ { 5, s_6_41, -1, 1, 0}, /* 42 */ { 4, s_6_42, -1, 1, 0}, /* 43 */ { 8, s_6_43, -1, 1, 0}, /* 44 */ { 8, s_6_44, -1, 1, 0}, /* 45 */ { 4, s_6_45, -1, 9, 0} }; static const symbol s_7_0[2] = { 'y', 'a' }; static const symbol s_7_1[2] = { 'y', 'e' }; static const symbol s_7_2[3] = { 'y', 'a', 'n' }; static const symbol s_7_3[3] = { 'y', 'e', 'n' }; static const symbol s_7_4[5] = { 'y', 'e', 'r', 'o', 'n' }; static const symbol s_7_5[5] = { 'y', 'e', 'n', 'd', 'o' }; static const symbol s_7_6[2] = { 'y', 'o' }; static const symbol s_7_7[3] = { 'y', 'a', 's' }; static const symbol s_7_8[3] = { 'y', 'e', 's' }; static const symbol s_7_9[4] = { 'y', 'a', 'i', 's' }; static const symbol s_7_10[5] = { 'y', 'a', 'm', 'o', 's' }; static const symbol s_7_11[2] = { 'y', 0xF3 }; static const struct among a_7[12] = { /* 0 */ { 2, s_7_0, -1, 1, 0}, /* 1 */ { 2, s_7_1, -1, 1, 0}, /* 2 */ { 3, s_7_2, -1, 1, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0}, /* 4 */ { 5, s_7_4, -1, 1, 0}, /* 5 */ { 5, s_7_5, -1, 1, 0}, /* 6 */ { 2, s_7_6, -1, 1, 0}, /* 7 */ { 3, s_7_7, -1, 1, 0}, /* 8 */ { 3, s_7_8, -1, 1, 0}, /* 9 */ { 4, s_7_9, -1, 1, 0}, /* 10 */ { 5, s_7_10, -1, 1, 0}, /* 11 */ { 2, s_7_11, -1, 1, 0} }; static const symbol s_8_0[3] = { 'a', 'b', 'a' }; static const symbol s_8_1[3] = { 'a', 'd', 'a' }; static const symbol s_8_2[3] = { 'i', 'd', 'a' }; static const symbol s_8_3[3] = { 'a', 'r', 'a' }; static const symbol s_8_4[4] = { 'i', 'e', 'r', 'a' }; static const symbol s_8_5[2] = { 0xED, 'a' }; static const symbol s_8_6[4] = { 'a', 'r', 0xED, 'a' }; static const symbol s_8_7[4] = { 'e', 'r', 0xED, 'a' }; static const symbol s_8_8[4] = { 'i', 'r', 0xED, 'a' }; static const symbol s_8_9[2] = { 'a', 'd' }; static const symbol s_8_10[2] = { 'e', 'd' }; static const symbol s_8_11[2] = { 'i', 'd' }; static const symbol s_8_12[3] = { 'a', 's', 'e' }; static const symbol s_8_13[4] = { 'i', 'e', 's', 'e' }; static const symbol s_8_14[4] = { 'a', 's', 't', 'e' }; static const symbol s_8_15[4] = { 'i', 's', 't', 'e' }; static const symbol s_8_16[2] = { 'a', 'n' }; static const symbol s_8_17[4] = { 'a', 'b', 'a', 'n' }; static const symbol s_8_18[4] = { 'a', 'r', 'a', 'n' }; static const symbol s_8_19[5] = { 'i', 'e', 'r', 'a', 'n' }; static const symbol s_8_20[3] = { 0xED, 'a', 'n' }; static const symbol s_8_21[5] = { 'a', 'r', 0xED, 'a', 'n' }; static const symbol s_8_22[5] = { 'e', 'r', 0xED, 'a', 'n' }; static const symbol s_8_23[5] = { 'i', 'r', 0xED, 'a', 'n' }; static const symbol s_8_24[2] = { 'e', 'n' }; static const symbol s_8_25[4] = { 'a', 's', 'e', 'n' }; static const symbol s_8_26[5] = { 'i', 'e', 's', 'e', 'n' }; static const symbol s_8_27[4] = { 'a', 'r', 'o', 'n' }; static const symbol s_8_28[5] = { 'i', 'e', 'r', 'o', 'n' }; static const symbol s_8_29[4] = { 'a', 'r', 0xE1, 'n' }; static const symbol s_8_30[4] = { 'e', 'r', 0xE1, 'n' }; static const symbol s_8_31[4] = { 'i', 'r', 0xE1, 'n' }; static const symbol s_8_32[3] = { 'a', 'd', 'o' }; static const symbol s_8_33[3] = { 'i', 'd', 'o' }; static const symbol s_8_34[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_8_35[5] = { 'i', 'e', 'n', 'd', 'o' }; static const symbol s_8_36[2] = { 'a', 'r' }; static const symbol s_8_37[2] = { 'e', 'r' }; static const symbol s_8_38[2] = { 'i', 'r' }; static const symbol s_8_39[2] = { 'a', 's' }; static const symbol s_8_40[4] = { 'a', 'b', 'a', 's' }; static const symbol s_8_41[4] = { 'a', 'd', 'a', 's' }; static const symbol s_8_42[4] = { 'i', 'd', 'a', 's' }; static const symbol s_8_43[4] = { 'a', 'r', 'a', 's' }; static const symbol s_8_44[5] = { 'i', 'e', 'r', 'a', 's' }; static const symbol s_8_45[3] = { 0xED, 'a', 's' }; static const symbol s_8_46[5] = { 'a', 'r', 0xED, 'a', 's' }; static const symbol s_8_47[5] = { 'e', 'r', 0xED, 'a', 's' }; static const symbol s_8_48[5] = { 'i', 'r', 0xED, 'a', 's' }; static const symbol s_8_49[2] = { 'e', 's' }; static const symbol s_8_50[4] = { 'a', 's', 'e', 's' }; static const symbol s_8_51[5] = { 'i', 'e', 's', 'e', 's' }; static const symbol s_8_52[5] = { 'a', 'b', 'a', 'i', 's' }; static const symbol s_8_53[5] = { 'a', 'r', 'a', 'i', 's' }; static const symbol s_8_54[6] = { 'i', 'e', 'r', 'a', 'i', 's' }; static const symbol s_8_55[4] = { 0xED, 'a', 'i', 's' }; static const symbol s_8_56[6] = { 'a', 'r', 0xED, 'a', 'i', 's' }; static const symbol s_8_57[6] = { 'e', 'r', 0xED, 'a', 'i', 's' }; static const symbol s_8_58[6] = { 'i', 'r', 0xED, 'a', 'i', 's' }; static const symbol s_8_59[5] = { 'a', 's', 'e', 'i', 's' }; static const symbol s_8_60[6] = { 'i', 'e', 's', 'e', 'i', 's' }; static const symbol s_8_61[6] = { 'a', 's', 't', 'e', 'i', 's' }; static const symbol s_8_62[6] = { 'i', 's', 't', 'e', 'i', 's' }; static const symbol s_8_63[3] = { 0xE1, 'i', 's' }; static const symbol s_8_64[3] = { 0xE9, 'i', 's' }; static const symbol s_8_65[5] = { 'a', 'r', 0xE9, 'i', 's' }; static const symbol s_8_66[5] = { 'e', 'r', 0xE9, 'i', 's' }; static const symbol s_8_67[5] = { 'i', 'r', 0xE9, 'i', 's' }; static const symbol s_8_68[4] = { 'a', 'd', 'o', 's' }; static const symbol s_8_69[4] = { 'i', 'd', 'o', 's' }; static const symbol s_8_70[4] = { 'a', 'm', 'o', 's' }; static const symbol s_8_71[6] = { 0xE1, 'b', 'a', 'm', 'o', 's' }; static const symbol s_8_72[6] = { 0xE1, 'r', 'a', 'm', 'o', 's' }; static const symbol s_8_73[7] = { 'i', 0xE9, 'r', 'a', 'm', 'o', 's' }; static const symbol s_8_74[5] = { 0xED, 'a', 'm', 'o', 's' }; static const symbol s_8_75[7] = { 'a', 'r', 0xED, 'a', 'm', 'o', 's' }; static const symbol s_8_76[7] = { 'e', 'r', 0xED, 'a', 'm', 'o', 's' }; static const symbol s_8_77[7] = { 'i', 'r', 0xED, 'a', 'm', 'o', 's' }; static const symbol s_8_78[4] = { 'e', 'm', 'o', 's' }; static const symbol s_8_79[6] = { 'a', 'r', 'e', 'm', 'o', 's' }; static const symbol s_8_80[6] = { 'e', 'r', 'e', 'm', 'o', 's' }; static const symbol s_8_81[6] = { 'i', 'r', 'e', 'm', 'o', 's' }; static const symbol s_8_82[6] = { 0xE1, 's', 'e', 'm', 'o', 's' }; static const symbol s_8_83[7] = { 'i', 0xE9, 's', 'e', 'm', 'o', 's' }; static const symbol s_8_84[4] = { 'i', 'm', 'o', 's' }; static const symbol s_8_85[4] = { 'a', 'r', 0xE1, 's' }; static const symbol s_8_86[4] = { 'e', 'r', 0xE1, 's' }; static const symbol s_8_87[4] = { 'i', 'r', 0xE1, 's' }; static const symbol s_8_88[2] = { 0xED, 's' }; static const symbol s_8_89[3] = { 'a', 'r', 0xE1 }; static const symbol s_8_90[3] = { 'e', 'r', 0xE1 }; static const symbol s_8_91[3] = { 'i', 'r', 0xE1 }; static const symbol s_8_92[3] = { 'a', 'r', 0xE9 }; static const symbol s_8_93[3] = { 'e', 'r', 0xE9 }; static const symbol s_8_94[3] = { 'i', 'r', 0xE9 }; static const symbol s_8_95[2] = { 'i', 0xF3 }; static const struct among a_8[96] = { /* 0 */ { 3, s_8_0, -1, 2, 0}, /* 1 */ { 3, s_8_1, -1, 2, 0}, /* 2 */ { 3, s_8_2, -1, 2, 0}, /* 3 */ { 3, s_8_3, -1, 2, 0}, /* 4 */ { 4, s_8_4, -1, 2, 0}, /* 5 */ { 2, s_8_5, -1, 2, 0}, /* 6 */ { 4, s_8_6, 5, 2, 0}, /* 7 */ { 4, s_8_7, 5, 2, 0}, /* 8 */ { 4, s_8_8, 5, 2, 0}, /* 9 */ { 2, s_8_9, -1, 2, 0}, /* 10 */ { 2, s_8_10, -1, 2, 0}, /* 11 */ { 2, s_8_11, -1, 2, 0}, /* 12 */ { 3, s_8_12, -1, 2, 0}, /* 13 */ { 4, s_8_13, -1, 2, 0}, /* 14 */ { 4, s_8_14, -1, 2, 0}, /* 15 */ { 4, s_8_15, -1, 2, 0}, /* 16 */ { 2, s_8_16, -1, 2, 0}, /* 17 */ { 4, s_8_17, 16, 2, 0}, /* 18 */ { 4, s_8_18, 16, 2, 0}, /* 19 */ { 5, s_8_19, 16, 2, 0}, /* 20 */ { 3, s_8_20, 16, 2, 0}, /* 21 */ { 5, s_8_21, 20, 2, 0}, /* 22 */ { 5, s_8_22, 20, 2, 0}, /* 23 */ { 5, s_8_23, 20, 2, 0}, /* 24 */ { 2, s_8_24, -1, 1, 0}, /* 25 */ { 4, s_8_25, 24, 2, 0}, /* 26 */ { 5, s_8_26, 24, 2, 0}, /* 27 */ { 4, s_8_27, -1, 2, 0}, /* 28 */ { 5, s_8_28, -1, 2, 0}, /* 29 */ { 4, s_8_29, -1, 2, 0}, /* 30 */ { 4, s_8_30, -1, 2, 0}, /* 31 */ { 4, s_8_31, -1, 2, 0}, /* 32 */ { 3, s_8_32, -1, 2, 0}, /* 33 */ { 3, s_8_33, -1, 2, 0}, /* 34 */ { 4, s_8_34, -1, 2, 0}, /* 35 */ { 5, s_8_35, -1, 2, 0}, /* 36 */ { 2, s_8_36, -1, 2, 0}, /* 37 */ { 2, s_8_37, -1, 2, 0}, /* 38 */ { 2, s_8_38, -1, 2, 0}, /* 39 */ { 2, s_8_39, -1, 2, 0}, /* 40 */ { 4, s_8_40, 39, 2, 0}, /* 41 */ { 4, s_8_41, 39, 2, 0}, /* 42 */ { 4, s_8_42, 39, 2, 0}, /* 43 */ { 4, s_8_43, 39, 2, 0}, /* 44 */ { 5, s_8_44, 39, 2, 0}, /* 45 */ { 3, s_8_45, 39, 2, 0}, /* 46 */ { 5, s_8_46, 45, 2, 0}, /* 47 */ { 5, s_8_47, 45, 2, 0}, /* 48 */ { 5, s_8_48, 45, 2, 0}, /* 49 */ { 2, s_8_49, -1, 1, 0}, /* 50 */ { 4, s_8_50, 49, 2, 0}, /* 51 */ { 5, s_8_51, 49, 2, 0}, /* 52 */ { 5, s_8_52, -1, 2, 0}, /* 53 */ { 5, s_8_53, -1, 2, 0}, /* 54 */ { 6, s_8_54, -1, 2, 0}, /* 55 */ { 4, s_8_55, -1, 2, 0}, /* 56 */ { 6, s_8_56, 55, 2, 0}, /* 57 */ { 6, s_8_57, 55, 2, 0}, /* 58 */ { 6, s_8_58, 55, 2, 0}, /* 59 */ { 5, s_8_59, -1, 2, 0}, /* 60 */ { 6, s_8_60, -1, 2, 0}, /* 61 */ { 6, s_8_61, -1, 2, 0}, /* 62 */ { 6, s_8_62, -1, 2, 0}, /* 63 */ { 3, s_8_63, -1, 2, 0}, /* 64 */ { 3, s_8_64, -1, 1, 0}, /* 65 */ { 5, s_8_65, 64, 2, 0}, /* 66 */ { 5, s_8_66, 64, 2, 0}, /* 67 */ { 5, s_8_67, 64, 2, 0}, /* 68 */ { 4, s_8_68, -1, 2, 0}, /* 69 */ { 4, s_8_69, -1, 2, 0}, /* 70 */ { 4, s_8_70, -1, 2, 0}, /* 71 */ { 6, s_8_71, 70, 2, 0}, /* 72 */ { 6, s_8_72, 70, 2, 0}, /* 73 */ { 7, s_8_73, 70, 2, 0}, /* 74 */ { 5, s_8_74, 70, 2, 0}, /* 75 */ { 7, s_8_75, 74, 2, 0}, /* 76 */ { 7, s_8_76, 74, 2, 0}, /* 77 */ { 7, s_8_77, 74, 2, 0}, /* 78 */ { 4, s_8_78, -1, 1, 0}, /* 79 */ { 6, s_8_79, 78, 2, 0}, /* 80 */ { 6, s_8_80, 78, 2, 0}, /* 81 */ { 6, s_8_81, 78, 2, 0}, /* 82 */ { 6, s_8_82, 78, 2, 0}, /* 83 */ { 7, s_8_83, 78, 2, 0}, /* 84 */ { 4, s_8_84, -1, 2, 0}, /* 85 */ { 4, s_8_85, -1, 2, 0}, /* 86 */ { 4, s_8_86, -1, 2, 0}, /* 87 */ { 4, s_8_87, -1, 2, 0}, /* 88 */ { 2, s_8_88, -1, 2, 0}, /* 89 */ { 3, s_8_89, -1, 2, 0}, /* 90 */ { 3, s_8_90, -1, 2, 0}, /* 91 */ { 3, s_8_91, -1, 2, 0}, /* 92 */ { 3, s_8_92, -1, 2, 0}, /* 93 */ { 3, s_8_93, -1, 2, 0}, /* 94 */ { 3, s_8_94, -1, 2, 0}, /* 95 */ { 2, s_8_95, -1, 2, 0} }; static const symbol s_9_0[1] = { 'a' }; static const symbol s_9_1[1] = { 'e' }; static const symbol s_9_2[1] = { 'o' }; static const symbol s_9_3[2] = { 'o', 's' }; static const symbol s_9_4[1] = { 0xE1 }; static const symbol s_9_5[1] = { 0xE9 }; static const symbol s_9_6[1] = { 0xED }; static const symbol s_9_7[1] = { 0xF3 }; static const struct among a_9[8] = { /* 0 */ { 1, s_9_0, -1, 1, 0}, /* 1 */ { 1, s_9_1, -1, 2, 0}, /* 2 */ { 1, s_9_2, -1, 1, 0}, /* 3 */ { 2, s_9_3, -1, 1, 0}, /* 4 */ { 1, s_9_4, -1, 1, 0}, /* 5 */ { 1, s_9_5, -1, 2, 0}, /* 6 */ { 1, s_9_6, -1, 1, 0}, /* 7 */ { 1, s_9_7, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 10 }; static const symbol s_0[] = { 'a' }; static const symbol s_1[] = { 'e' }; static const symbol s_2[] = { 'i' }; static const symbol s_3[] = { 'o' }; static const symbol s_4[] = { 'u' }; static const symbol s_5[] = { 'i', 'e', 'n', 'd', 'o' }; static const symbol s_6[] = { 'a', 'n', 'd', 'o' }; static const symbol s_7[] = { 'a', 'r' }; static const symbol s_8[] = { 'e', 'r' }; static const symbol s_9[] = { 'i', 'r' }; static const symbol s_10[] = { 'u' }; static const symbol s_11[] = { 'i', 'c' }; static const symbol s_12[] = { 'l', 'o', 'g' }; static const symbol s_13[] = { 'u' }; static const symbol s_14[] = { 'e', 'n', 't', 'e' }; static const symbol s_15[] = { 'a', 't' }; static const symbol s_16[] = { 'a', 't' }; static const symbol s_17[] = { 'u' }; static const symbol s_18[] = { 'u' }; static const symbol s_19[] = { 'g' }; static const symbol s_20[] = { 'u' }; static const symbol s_21[] = { 'g' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 37 */ { int c2 = z->c; /* or, line 39 */ if (in_grouping(z, g_v, 97, 252, 0)) goto lab2; { int c3 = z->c; /* or, line 38 */ if (out_grouping(z, g_v, 97, 252, 0)) goto lab4; { /* gopast */ /* grouping v, line 38 */ int ret = out_grouping(z, g_v, 97, 252, 1); if (ret < 0) goto lab4; z->c += ret; } goto lab3; lab4: z->c = c3; if (in_grouping(z, g_v, 97, 252, 0)) goto lab2; { /* gopast */ /* non v, line 38 */ int ret = in_grouping(z, g_v, 97, 252, 1); if (ret < 0) goto lab2; z->c += ret; } } lab3: goto lab1; lab2: z->c = c2; if (out_grouping(z, g_v, 97, 252, 0)) goto lab0; { int c4 = z->c; /* or, line 40 */ if (out_grouping(z, g_v, 97, 252, 0)) goto lab6; { /* gopast */ /* grouping v, line 40 */ int ret = out_grouping(z, g_v, 97, 252, 1); if (ret < 0) goto lab6; z->c += ret; } goto lab5; lab6: z->c = c4; if (in_grouping(z, g_v, 97, 252, 0)) goto lab0; if (z->c >= z->l) goto lab0; z->c++; /* next, line 40 */ } lab5: ; } lab1: z->I[0] = z->c; /* setmark pV, line 41 */ lab0: z->c = c1; } { int c5 = z->c; /* do, line 43 */ { /* gopast */ /* grouping v, line 44 */ int ret = out_grouping(z, g_v, 97, 252, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 44 */ int ret = in_grouping(z, g_v, 97, 252, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 44 */ { /* gopast */ /* grouping v, line 45 */ int ret = out_grouping(z, g_v, 97, 252, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 45 */ int ret = in_grouping(z, g_v, 97, 252, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 45 */ lab7: z->c = c5; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 49 */ int c1 = z->c; z->bra = z->c; /* [, line 50 */ if (z->c >= z->l || z->p[z->c + 0] >> 5 != 7 || !((67641858 >> (z->p[z->c + 0] & 0x1f)) & 1)) among_var = 6; else among_var = find_among(z, a_0, 6); /* substring, line 50 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 50 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_0); /* <-, line 51 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 52 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_2); /* <-, line 53 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_3); /* <-, line 54 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_4); /* <-, line 55 */ if (ret < 0) return ret; } break; case 6: if (z->c >= z->l) goto lab0; z->c++; /* next, line 57 */ break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_attached_pronoun(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 68 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((557090 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_1, 13))) return 0; /* substring, line 68 */ z->bra = z->c; /* ], line 68 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 111 && z->p[z->c - 1] != 114)) return 0; among_var = find_among_b(z, a_2, 11); /* substring, line 72 */ if (!(among_var)) return 0; { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 72 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: z->bra = z->c; /* ], line 73 */ { int ret = slice_from_s(z, 5, s_5); /* <-, line 73 */ if (ret < 0) return ret; } break; case 2: z->bra = z->c; /* ], line 74 */ { int ret = slice_from_s(z, 4, s_6); /* <-, line 74 */ if (ret < 0) return ret; } break; case 3: z->bra = z->c; /* ], line 75 */ { int ret = slice_from_s(z, 2, s_7); /* <-, line 75 */ if (ret < 0) return ret; } break; case 4: z->bra = z->c; /* ], line 76 */ { int ret = slice_from_s(z, 2, s_8); /* <-, line 76 */ if (ret < 0) return ret; } break; case 5: z->bra = z->c; /* ], line 77 */ { int ret = slice_from_s(z, 2, s_9); /* <-, line 77 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_del(z); /* delete, line 81 */ if (ret < 0) return ret; } break; case 7: if (!(eq_s_b(z, 1, s_10))) return 0; { int ret = slice_del(z); /* delete, line 82 */ if (ret < 0) return ret; } break; } return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 87 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((835634 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_6, 46); /* substring, line 87 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 87 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 99 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 99 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 105 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 105 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 106 */ z->ket = z->c; /* [, line 106 */ if (!(eq_s_b(z, 2, s_11))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 106 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 106 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 106 */ if (ret < 0) return ret; } lab0: ; } break; case 3: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 111 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_12); /* <-, line 111 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 115 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_13); /* <-, line 115 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 119 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 4, s_14); /* <-, line 119 */ if (ret < 0) return ret; } break; case 6: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 123 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 123 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 124 */ z->ket = z->c; /* [, line 125 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718616 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab1; } among_var = find_among_b(z, a_3, 4); /* substring, line 125 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 125 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 125 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 125 */ if (ret < 0) return ret; } switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab1; } case 1: z->ket = z->c; /* [, line 126 */ if (!(eq_s_b(z, 2, s_15))) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 126 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 126 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 126 */ if (ret < 0) return ret; } break; } lab1: ; } break; case 7: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 135 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 135 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 136 */ z->ket = z->c; /* [, line 137 */ if (z->c - 3 <= z->lb || z->p[z->c - 1] != 101) { z->c = z->l - m_keep; goto lab2; } among_var = find_among_b(z, a_4, 3); /* substring, line 137 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } z->bra = z->c; /* ], line 137 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab2; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 140 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 140 */ if (ret < 0) return ret; } break; } lab2: ; } break; case 8: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 147 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 147 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 148 */ z->ket = z->c; /* [, line 149 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab3; } among_var = find_among_b(z, a_5, 3); /* substring, line 149 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 149 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab3; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 152 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 152 */ if (ret < 0) return ret; } break; } lab3: ; } break; case 9: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 159 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 159 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 160 */ z->ket = z->c; /* [, line 161 */ if (!(eq_s_b(z, 2, s_16))) { z->c = z->l - m_keep; goto lab4; } z->bra = z->c; /* ], line 161 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call R2, line 161 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 161 */ if (ret < 0) return ret; } lab4: ; } break; } return 1; } static int r_y_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 168 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 168 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 168 */ among_var = find_among_b(z, a_7, 12); /* substring, line 168 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 168 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: if (!(eq_s_b(z, 1, s_17))) return 0; { int ret = slice_del(z); /* delete, line 171 */ if (ret < 0) return ret; } break; } return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 176 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 176 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 176 */ among_var = find_among_b(z, a_8, 96); /* substring, line 176 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 176 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 179 */ if (!(eq_s_b(z, 1, s_18))) { z->c = z->l - m_keep; goto lab0; } { int m_test = z->l - z->c; /* test, line 179 */ if (!(eq_s_b(z, 1, s_19))) { z->c = z->l - m_keep; goto lab0; } z->c = z->l - m_test; } lab0: ; } z->bra = z->c; /* ], line 179 */ { int ret = slice_del(z); /* delete, line 179 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 200 */ if (ret < 0) return ret; } break; } return 1; } static int r_residual_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 205 */ among_var = find_among_b(z, a_9, 8); /* substring, line 205 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 205 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 208 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 208 */ if (ret < 0) return ret; } break; case 2: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 210 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 210 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 210 */ z->ket = z->c; /* [, line 210 */ if (!(eq_s_b(z, 1, s_20))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 210 */ { int m_test = z->l - z->c; /* test, line 210 */ if (!(eq_s_b(z, 1, s_21))) { z->c = z->l - m_keep; goto lab0; } z->c = z->l - m_test; } { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 210 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 210 */ if (ret < 0) return ret; } lab0: ; } break; } return 1; } extern int spanish_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 216 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 216 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 217 */ { int m2 = z->l - z->c; (void)m2; /* do, line 218 */ { int ret = r_attached_pronoun(z); if (ret == 0) goto lab1; /* call attached_pronoun, line 218 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 219 */ { int m4 = z->l - z->c; (void)m4; /* or, line 219 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab4; /* call standard_suffix, line 219 */ if (ret < 0) return ret; } goto lab3; lab4: z->c = z->l - m4; { int ret = r_y_verb_suffix(z); if (ret == 0) goto lab5; /* call y_verb_suffix, line 220 */ if (ret < 0) return ret; } goto lab3; lab5: z->c = z->l - m4; { int ret = r_verb_suffix(z); if (ret == 0) goto lab2; /* call verb_suffix, line 221 */ if (ret < 0) return ret; } } lab3: lab2: z->c = z->l - m3; } { int m5 = z->l - z->c; (void)m5; /* do, line 223 */ { int ret = r_residual_suffix(z); if (ret == 0) goto lab6; /* call residual_suffix, line 223 */ if (ret < 0) return ret; } lab6: z->c = z->l - m5; } z->c = z->lb; { int c6 = z->c; /* do, line 225 */ { int ret = r_postlude(z); if (ret == 0) goto lab7; /* call postlude, line 225 */ if (ret < 0) return ret; } lab7: z->c = c6; } return 1; } extern struct SN_env * spanish_ISO_8859_1_create_env(void) { return SN_create_env(0, 3, 0); } extern void spanish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h000066400000000000000000000005051456444476200314330ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* spanish_ISO_8859_1_create_env(void); extern void spanish_ISO_8859_1_close_env(struct SN_env* z); extern int spanish_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_swedish.c000066400000000000000000000245501456444476200314350ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int swedish_ISO_8859_1_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_other_suffix(struct SN_env * z); static int r_consonant_pair(struct SN_env * z); static int r_main_suffix(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * swedish_ISO_8859_1_create_env(void); extern void swedish_ISO_8859_1_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[1] = { 'a' }; static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' }; static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' }; static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' }; static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' }; static const symbol s_0_5[2] = { 'a', 'd' }; static const symbol s_0_6[1] = { 'e' }; static const symbol s_0_7[3] = { 'a', 'd', 'e' }; static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' }; static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' }; static const symbol s_0_10[3] = { 'a', 'r', 'e' }; static const symbol s_0_11[4] = { 'a', 's', 't', 'e' }; static const symbol s_0_12[2] = { 'e', 'n' }; static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' }; static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' }; static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' }; static const symbol s_0_16[3] = { 'e', 'r', 'n' }; static const symbol s_0_17[2] = { 'a', 'r' }; static const symbol s_0_18[2] = { 'e', 'r' }; static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' }; static const symbol s_0_20[2] = { 'o', 'r' }; static const symbol s_0_21[1] = { 's' }; static const symbol s_0_22[2] = { 'a', 's' }; static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' }; static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' }; static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' }; static const symbol s_0_26[2] = { 'e', 's' }; static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' }; static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' }; static const symbol s_0_29[3] = { 'e', 'n', 's' }; static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' }; static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' }; static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' }; static const symbol s_0_33[2] = { 'a', 't' }; static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' }; static const symbol s_0_35[3] = { 'h', 'e', 't' }; static const symbol s_0_36[3] = { 'a', 's', 't' }; static const struct among a_0[37] = { /* 0 */ { 1, s_0_0, -1, 1, 0}, /* 1 */ { 4, s_0_1, 0, 1, 0}, /* 2 */ { 4, s_0_2, 0, 1, 0}, /* 3 */ { 7, s_0_3, 2, 1, 0}, /* 4 */ { 4, s_0_4, 0, 1, 0}, /* 5 */ { 2, s_0_5, -1, 1, 0}, /* 6 */ { 1, s_0_6, -1, 1, 0}, /* 7 */ { 3, s_0_7, 6, 1, 0}, /* 8 */ { 4, s_0_8, 6, 1, 0}, /* 9 */ { 4, s_0_9, 6, 1, 0}, /* 10 */ { 3, s_0_10, 6, 1, 0}, /* 11 */ { 4, s_0_11, 6, 1, 0}, /* 12 */ { 2, s_0_12, -1, 1, 0}, /* 13 */ { 5, s_0_13, 12, 1, 0}, /* 14 */ { 4, s_0_14, 12, 1, 0}, /* 15 */ { 5, s_0_15, 12, 1, 0}, /* 16 */ { 3, s_0_16, -1, 1, 0}, /* 17 */ { 2, s_0_17, -1, 1, 0}, /* 18 */ { 2, s_0_18, -1, 1, 0}, /* 19 */ { 5, s_0_19, 18, 1, 0}, /* 20 */ { 2, s_0_20, -1, 1, 0}, /* 21 */ { 1, s_0_21, -1, 2, 0}, /* 22 */ { 2, s_0_22, 21, 1, 0}, /* 23 */ { 5, s_0_23, 22, 1, 0}, /* 24 */ { 5, s_0_24, 22, 1, 0}, /* 25 */ { 5, s_0_25, 22, 1, 0}, /* 26 */ { 2, s_0_26, 21, 1, 0}, /* 27 */ { 4, s_0_27, 26, 1, 0}, /* 28 */ { 5, s_0_28, 26, 1, 0}, /* 29 */ { 3, s_0_29, 21, 1, 0}, /* 30 */ { 5, s_0_30, 29, 1, 0}, /* 31 */ { 6, s_0_31, 29, 1, 0}, /* 32 */ { 4, s_0_32, 21, 1, 0}, /* 33 */ { 2, s_0_33, -1, 1, 0}, /* 34 */ { 5, s_0_34, -1, 1, 0}, /* 35 */ { 3, s_0_35, -1, 1, 0}, /* 36 */ { 3, s_0_36, -1, 1, 0} }; static const symbol s_1_0[2] = { 'd', 'd' }; static const symbol s_1_1[2] = { 'g', 'd' }; static const symbol s_1_2[2] = { 'n', 'n' }; static const symbol s_1_3[2] = { 'd', 't' }; static const symbol s_1_4[2] = { 'g', 't' }; static const symbol s_1_5[2] = { 'k', 't' }; static const symbol s_1_6[2] = { 't', 't' }; static const struct among a_1[7] = { /* 0 */ { 2, s_1_0, -1, -1, 0}, /* 1 */ { 2, s_1_1, -1, -1, 0}, /* 2 */ { 2, s_1_2, -1, -1, 0}, /* 3 */ { 2, s_1_3, -1, -1, 0}, /* 4 */ { 2, s_1_4, -1, -1, 0}, /* 5 */ { 2, s_1_5, -1, -1, 0}, /* 6 */ { 2, s_1_6, -1, -1, 0} }; static const symbol s_2_0[2] = { 'i', 'g' }; static const symbol s_2_1[3] = { 'l', 'i', 'g' }; static const symbol s_2_2[3] = { 'e', 'l', 's' }; static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' }; static const symbol s_2_4[4] = { 'l', 0xF6, 's', 't' }; static const struct among a_2[5] = { /* 0 */ { 2, s_2_0, -1, 1, 0}, /* 1 */ { 3, s_2_1, 0, 1, 0}, /* 2 */ { 3, s_2_2, -1, 1, 0}, /* 3 */ { 5, s_2_3, -1, 3, 0}, /* 4 */ { 4, s_2_4, -1, 2, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 }; static const unsigned char g_s_ending[] = { 119, 127, 149 }; static const symbol s_0[] = { 'l', 0xF6, 's' }; static const symbol s_1[] = { 'f', 'u', 'l', 'l' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; { int c_test = z->c; /* test, line 29 */ { int ret = z->c + 3; if (0 > ret || ret > z->l) return 0; z->c = ret; /* hop, line 29 */ } z->I[1] = z->c; /* setmark x, line 29 */ z->c = c_test; } if (out_grouping(z, g_v, 97, 246, 1) < 0) return 0; /* goto */ /* grouping v, line 30 */ { /* gopast */ /* non v, line 30 */ int ret = in_grouping(z, g_v, 97, 246, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 30 */ /* try, line 31 */ if (!(z->I[0] < z->I[1])) goto lab0; z->I[0] = z->I[1]; lab0: return 1; } static int r_main_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 37 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 37 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 37 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851442 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_0, 37); /* substring, line 37 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 37 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 44 */ if (ret < 0) return ret; } break; case 2: if (in_grouping_b(z, g_s_ending, 98, 121, 0)) return 0; { int ret = slice_del(z); /* delete, line 46 */ if (ret < 0) return ret; } break; } return 1; } static int r_consonant_pair(struct SN_env * z) { { int mlimit; /* setlimit, line 50 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 50 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* and, line 52 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1064976 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } if (!(find_among_b(z, a_1, 7))) { z->lb = mlimit; return 0; } /* among, line 51 */ z->c = z->l - m2; z->ket = z->c; /* [, line 52 */ if (z->c <= z->lb) { z->lb = mlimit; return 0; } z->c--; /* next, line 52 */ z->bra = z->c; /* ], line 52 */ { int ret = slice_del(z); /* delete, line 52 */ if (ret < 0) return ret; } } z->lb = mlimit; } return 1; } static int r_other_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 55 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 55 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 56 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_2, 5); /* substring, line 56 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 56 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = slice_del(z); /* delete, line 57 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 3, s_0); /* <-, line 58 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 4, s_1); /* <-, line 59 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } extern int swedish_ISO_8859_1_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 66 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 66 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 67 */ { int m2 = z->l - z->c; (void)m2; /* do, line 68 */ { int ret = r_main_suffix(z); if (ret == 0) goto lab1; /* call main_suffix, line 68 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 69 */ { int ret = r_consonant_pair(z); if (ret == 0) goto lab2; /* call consonant_pair, line 69 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 70 */ { int ret = r_other_suffix(z); if (ret == 0) goto lab3; /* call other_suffix, line 70 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } z->c = z->lb; return 1; } extern struct SN_env * swedish_ISO_8859_1_create_env(void) { return SN_create_env(0, 2, 0); } extern void swedish_ISO_8859_1_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h000066400000000000000000000005051456444476200314340ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* swedish_ISO_8859_1_create_env(void); extern void swedish_ISO_8859_1_close_env(struct SN_env* z); extern int swedish_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_2_romanian.c000066400000000000000000001100011456444476200315570ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int romanian_ISO_8859_2_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_vowel_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_combo_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_step_0(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * romanian_ISO_8859_2_create_env(void); extern void romanian_ISO_8859_2_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[1] = { 'I' }; static const symbol s_0_2[1] = { 'U' }; static const struct among a_0[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 1, s_0_1, 0, 1, 0}, /* 2 */ { 1, s_0_2, 0, 2, 0} }; static const symbol s_1_0[2] = { 'e', 'a' }; static const symbol s_1_1[4] = { 'a', 0xFE, 'i', 'a' }; static const symbol s_1_2[3] = { 'a', 'u', 'a' }; static const symbol s_1_3[3] = { 'i', 'u', 'a' }; static const symbol s_1_4[4] = { 'a', 0xFE, 'i', 'e' }; static const symbol s_1_5[3] = { 'e', 'l', 'e' }; static const symbol s_1_6[3] = { 'i', 'l', 'e' }; static const symbol s_1_7[4] = { 'i', 'i', 'l', 'e' }; static const symbol s_1_8[3] = { 'i', 'e', 'i' }; static const symbol s_1_9[4] = { 'a', 't', 'e', 'i' }; static const symbol s_1_10[2] = { 'i', 'i' }; static const symbol s_1_11[4] = { 'u', 'l', 'u', 'i' }; static const symbol s_1_12[2] = { 'u', 'l' }; static const symbol s_1_13[4] = { 'e', 'l', 'o', 'r' }; static const symbol s_1_14[4] = { 'i', 'l', 'o', 'r' }; static const symbol s_1_15[5] = { 'i', 'i', 'l', 'o', 'r' }; static const struct among a_1[16] = { /* 0 */ { 2, s_1_0, -1, 3, 0}, /* 1 */ { 4, s_1_1, -1, 7, 0}, /* 2 */ { 3, s_1_2, -1, 2, 0}, /* 3 */ { 3, s_1_3, -1, 4, 0}, /* 4 */ { 4, s_1_4, -1, 7, 0}, /* 5 */ { 3, s_1_5, -1, 3, 0}, /* 6 */ { 3, s_1_6, -1, 5, 0}, /* 7 */ { 4, s_1_7, 6, 4, 0}, /* 8 */ { 3, s_1_8, -1, 4, 0}, /* 9 */ { 4, s_1_9, -1, 6, 0}, /* 10 */ { 2, s_1_10, -1, 4, 0}, /* 11 */ { 4, s_1_11, -1, 1, 0}, /* 12 */ { 2, s_1_12, -1, 1, 0}, /* 13 */ { 4, s_1_13, -1, 3, 0}, /* 14 */ { 4, s_1_14, -1, 4, 0}, /* 15 */ { 5, s_1_15, 14, 4, 0} }; static const symbol s_2_0[5] = { 'i', 'c', 'a', 'l', 'a' }; static const symbol s_2_1[5] = { 'i', 'c', 'i', 'v', 'a' }; static const symbol s_2_2[5] = { 'a', 't', 'i', 'v', 'a' }; static const symbol s_2_3[5] = { 'i', 't', 'i', 'v', 'a' }; static const symbol s_2_4[5] = { 'i', 'c', 'a', 'l', 'e' }; static const symbol s_2_5[6] = { 'a', 0xFE, 'i', 'u', 'n', 'e' }; static const symbol s_2_6[6] = { 'i', 0xFE, 'i', 'u', 'n', 'e' }; static const symbol s_2_7[6] = { 'a', 't', 'o', 'a', 'r', 'e' }; static const symbol s_2_8[6] = { 'i', 't', 'o', 'a', 'r', 'e' }; static const symbol s_2_9[6] = { 0xE3, 't', 'o', 'a', 'r', 'e' }; static const symbol s_2_10[7] = { 'i', 'c', 'i', 't', 'a', 't', 'e' }; static const symbol s_2_11[9] = { 'a', 'b', 'i', 'l', 'i', 't', 'a', 't', 'e' }; static const symbol s_2_12[9] = { 'i', 'b', 'i', 'l', 'i', 't', 'a', 't', 'e' }; static const symbol s_2_13[7] = { 'i', 'v', 'i', 't', 'a', 't', 'e' }; static const symbol s_2_14[5] = { 'i', 'c', 'i', 'v', 'e' }; static const symbol s_2_15[5] = { 'a', 't', 'i', 'v', 'e' }; static const symbol s_2_16[5] = { 'i', 't', 'i', 'v', 'e' }; static const symbol s_2_17[5] = { 'i', 'c', 'a', 'l', 'i' }; static const symbol s_2_18[5] = { 'a', 't', 'o', 'r', 'i' }; static const symbol s_2_19[7] = { 'i', 'c', 'a', 't', 'o', 'r', 'i' }; static const symbol s_2_20[5] = { 'i', 't', 'o', 'r', 'i' }; static const symbol s_2_21[5] = { 0xE3, 't', 'o', 'r', 'i' }; static const symbol s_2_22[7] = { 'i', 'c', 'i', 't', 'a', 't', 'i' }; static const symbol s_2_23[9] = { 'a', 'b', 'i', 'l', 'i', 't', 'a', 't', 'i' }; static const symbol s_2_24[7] = { 'i', 'v', 'i', 't', 'a', 't', 'i' }; static const symbol s_2_25[5] = { 'i', 'c', 'i', 'v', 'i' }; static const symbol s_2_26[5] = { 'a', 't', 'i', 'v', 'i' }; static const symbol s_2_27[5] = { 'i', 't', 'i', 'v', 'i' }; static const symbol s_2_28[6] = { 'i', 'c', 'i', 't', 0xE3, 'i' }; static const symbol s_2_29[8] = { 'a', 'b', 'i', 'l', 'i', 't', 0xE3, 'i' }; static const symbol s_2_30[6] = { 'i', 'v', 'i', 't', 0xE3, 'i' }; static const symbol s_2_31[7] = { 'i', 'c', 'i', 't', 0xE3, 0xFE, 'i' }; static const symbol s_2_32[9] = { 'a', 'b', 'i', 'l', 'i', 't', 0xE3, 0xFE, 'i' }; static const symbol s_2_33[7] = { 'i', 'v', 'i', 't', 0xE3, 0xFE, 'i' }; static const symbol s_2_34[4] = { 'i', 'c', 'a', 'l' }; static const symbol s_2_35[4] = { 'a', 't', 'o', 'r' }; static const symbol s_2_36[6] = { 'i', 'c', 'a', 't', 'o', 'r' }; static const symbol s_2_37[4] = { 'i', 't', 'o', 'r' }; static const symbol s_2_38[4] = { 0xE3, 't', 'o', 'r' }; static const symbol s_2_39[4] = { 'i', 'c', 'i', 'v' }; static const symbol s_2_40[4] = { 'a', 't', 'i', 'v' }; static const symbol s_2_41[4] = { 'i', 't', 'i', 'v' }; static const symbol s_2_42[5] = { 'i', 'c', 'a', 'l', 0xE3 }; static const symbol s_2_43[5] = { 'i', 'c', 'i', 'v', 0xE3 }; static const symbol s_2_44[5] = { 'a', 't', 'i', 'v', 0xE3 }; static const symbol s_2_45[5] = { 'i', 't', 'i', 'v', 0xE3 }; static const struct among a_2[46] = { /* 0 */ { 5, s_2_0, -1, 4, 0}, /* 1 */ { 5, s_2_1, -1, 4, 0}, /* 2 */ { 5, s_2_2, -1, 5, 0}, /* 3 */ { 5, s_2_3, -1, 6, 0}, /* 4 */ { 5, s_2_4, -1, 4, 0}, /* 5 */ { 6, s_2_5, -1, 5, 0}, /* 6 */ { 6, s_2_6, -1, 6, 0}, /* 7 */ { 6, s_2_7, -1, 5, 0}, /* 8 */ { 6, s_2_8, -1, 6, 0}, /* 9 */ { 6, s_2_9, -1, 5, 0}, /* 10 */ { 7, s_2_10, -1, 4, 0}, /* 11 */ { 9, s_2_11, -1, 1, 0}, /* 12 */ { 9, s_2_12, -1, 2, 0}, /* 13 */ { 7, s_2_13, -1, 3, 0}, /* 14 */ { 5, s_2_14, -1, 4, 0}, /* 15 */ { 5, s_2_15, -1, 5, 0}, /* 16 */ { 5, s_2_16, -1, 6, 0}, /* 17 */ { 5, s_2_17, -1, 4, 0}, /* 18 */ { 5, s_2_18, -1, 5, 0}, /* 19 */ { 7, s_2_19, 18, 4, 0}, /* 20 */ { 5, s_2_20, -1, 6, 0}, /* 21 */ { 5, s_2_21, -1, 5, 0}, /* 22 */ { 7, s_2_22, -1, 4, 0}, /* 23 */ { 9, s_2_23, -1, 1, 0}, /* 24 */ { 7, s_2_24, -1, 3, 0}, /* 25 */ { 5, s_2_25, -1, 4, 0}, /* 26 */ { 5, s_2_26, -1, 5, 0}, /* 27 */ { 5, s_2_27, -1, 6, 0}, /* 28 */ { 6, s_2_28, -1, 4, 0}, /* 29 */ { 8, s_2_29, -1, 1, 0}, /* 30 */ { 6, s_2_30, -1, 3, 0}, /* 31 */ { 7, s_2_31, -1, 4, 0}, /* 32 */ { 9, s_2_32, -1, 1, 0}, /* 33 */ { 7, s_2_33, -1, 3, 0}, /* 34 */ { 4, s_2_34, -1, 4, 0}, /* 35 */ { 4, s_2_35, -1, 5, 0}, /* 36 */ { 6, s_2_36, 35, 4, 0}, /* 37 */ { 4, s_2_37, -1, 6, 0}, /* 38 */ { 4, s_2_38, -1, 5, 0}, /* 39 */ { 4, s_2_39, -1, 4, 0}, /* 40 */ { 4, s_2_40, -1, 5, 0}, /* 41 */ { 4, s_2_41, -1, 6, 0}, /* 42 */ { 5, s_2_42, -1, 4, 0}, /* 43 */ { 5, s_2_43, -1, 4, 0}, /* 44 */ { 5, s_2_44, -1, 5, 0}, /* 45 */ { 5, s_2_45, -1, 6, 0} }; static const symbol s_3_0[3] = { 'i', 'c', 'a' }; static const symbol s_3_1[5] = { 'a', 'b', 'i', 'l', 'a' }; static const symbol s_3_2[5] = { 'i', 'b', 'i', 'l', 'a' }; static const symbol s_3_3[4] = { 'o', 'a', 's', 'a' }; static const symbol s_3_4[3] = { 'a', 't', 'a' }; static const symbol s_3_5[3] = { 'i', 't', 'a' }; static const symbol s_3_6[4] = { 'a', 'n', 't', 'a' }; static const symbol s_3_7[4] = { 'i', 's', 't', 'a' }; static const symbol s_3_8[3] = { 'u', 't', 'a' }; static const symbol s_3_9[3] = { 'i', 'v', 'a' }; static const symbol s_3_10[2] = { 'i', 'c' }; static const symbol s_3_11[3] = { 'i', 'c', 'e' }; static const symbol s_3_12[5] = { 'a', 'b', 'i', 'l', 'e' }; static const symbol s_3_13[5] = { 'i', 'b', 'i', 'l', 'e' }; static const symbol s_3_14[4] = { 'i', 's', 'm', 'e' }; static const symbol s_3_15[4] = { 'i', 'u', 'n', 'e' }; static const symbol s_3_16[4] = { 'o', 'a', 's', 'e' }; static const symbol s_3_17[3] = { 'a', 't', 'e' }; static const symbol s_3_18[5] = { 'i', 't', 'a', 't', 'e' }; static const symbol s_3_19[3] = { 'i', 't', 'e' }; static const symbol s_3_20[4] = { 'a', 'n', 't', 'e' }; static const symbol s_3_21[4] = { 'i', 's', 't', 'e' }; static const symbol s_3_22[3] = { 'u', 't', 'e' }; static const symbol s_3_23[3] = { 'i', 'v', 'e' }; static const symbol s_3_24[3] = { 'i', 'c', 'i' }; static const symbol s_3_25[5] = { 'a', 'b', 'i', 'l', 'i' }; static const symbol s_3_26[5] = { 'i', 'b', 'i', 'l', 'i' }; static const symbol s_3_27[4] = { 'i', 'u', 'n', 'i' }; static const symbol s_3_28[5] = { 'a', 't', 'o', 'r', 'i' }; static const symbol s_3_29[3] = { 'o', 's', 'i' }; static const symbol s_3_30[3] = { 'a', 't', 'i' }; static const symbol s_3_31[5] = { 'i', 't', 'a', 't', 'i' }; static const symbol s_3_32[3] = { 'i', 't', 'i' }; static const symbol s_3_33[4] = { 'a', 'n', 't', 'i' }; static const symbol s_3_34[4] = { 'i', 's', 't', 'i' }; static const symbol s_3_35[3] = { 'u', 't', 'i' }; static const symbol s_3_36[4] = { 'i', 0xBA, 't', 'i' }; static const symbol s_3_37[3] = { 'i', 'v', 'i' }; static const symbol s_3_38[3] = { 'o', 0xBA, 'i' }; static const symbol s_3_39[4] = { 'i', 't', 0xE3, 'i' }; static const symbol s_3_40[5] = { 'i', 't', 0xE3, 0xFE, 'i' }; static const symbol s_3_41[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_3_42[4] = { 'i', 'b', 'i', 'l' }; static const symbol s_3_43[3] = { 'i', 's', 'm' }; static const symbol s_3_44[4] = { 'a', 't', 'o', 'r' }; static const symbol s_3_45[2] = { 'o', 's' }; static const symbol s_3_46[2] = { 'a', 't' }; static const symbol s_3_47[2] = { 'i', 't' }; static const symbol s_3_48[3] = { 'a', 'n', 't' }; static const symbol s_3_49[3] = { 'i', 's', 't' }; static const symbol s_3_50[2] = { 'u', 't' }; static const symbol s_3_51[2] = { 'i', 'v' }; static const symbol s_3_52[3] = { 'i', 'c', 0xE3 }; static const symbol s_3_53[5] = { 'a', 'b', 'i', 'l', 0xE3 }; static const symbol s_3_54[5] = { 'i', 'b', 'i', 'l', 0xE3 }; static const symbol s_3_55[4] = { 'o', 'a', 's', 0xE3 }; static const symbol s_3_56[3] = { 'a', 't', 0xE3 }; static const symbol s_3_57[3] = { 'i', 't', 0xE3 }; static const symbol s_3_58[4] = { 'a', 'n', 't', 0xE3 }; static const symbol s_3_59[4] = { 'i', 's', 't', 0xE3 }; static const symbol s_3_60[3] = { 'u', 't', 0xE3 }; static const symbol s_3_61[3] = { 'i', 'v', 0xE3 }; static const struct among a_3[62] = { /* 0 */ { 3, s_3_0, -1, 1, 0}, /* 1 */ { 5, s_3_1, -1, 1, 0}, /* 2 */ { 5, s_3_2, -1, 1, 0}, /* 3 */ { 4, s_3_3, -1, 1, 0}, /* 4 */ { 3, s_3_4, -1, 1, 0}, /* 5 */ { 3, s_3_5, -1, 1, 0}, /* 6 */ { 4, s_3_6, -1, 1, 0}, /* 7 */ { 4, s_3_7, -1, 3, 0}, /* 8 */ { 3, s_3_8, -1, 1, 0}, /* 9 */ { 3, s_3_9, -1, 1, 0}, /* 10 */ { 2, s_3_10, -1, 1, 0}, /* 11 */ { 3, s_3_11, -1, 1, 0}, /* 12 */ { 5, s_3_12, -1, 1, 0}, /* 13 */ { 5, s_3_13, -1, 1, 0}, /* 14 */ { 4, s_3_14, -1, 3, 0}, /* 15 */ { 4, s_3_15, -1, 2, 0}, /* 16 */ { 4, s_3_16, -1, 1, 0}, /* 17 */ { 3, s_3_17, -1, 1, 0}, /* 18 */ { 5, s_3_18, 17, 1, 0}, /* 19 */ { 3, s_3_19, -1, 1, 0}, /* 20 */ { 4, s_3_20, -1, 1, 0}, /* 21 */ { 4, s_3_21, -1, 3, 0}, /* 22 */ { 3, s_3_22, -1, 1, 0}, /* 23 */ { 3, s_3_23, -1, 1, 0}, /* 24 */ { 3, s_3_24, -1, 1, 0}, /* 25 */ { 5, s_3_25, -1, 1, 0}, /* 26 */ { 5, s_3_26, -1, 1, 0}, /* 27 */ { 4, s_3_27, -1, 2, 0}, /* 28 */ { 5, s_3_28, -1, 1, 0}, /* 29 */ { 3, s_3_29, -1, 1, 0}, /* 30 */ { 3, s_3_30, -1, 1, 0}, /* 31 */ { 5, s_3_31, 30, 1, 0}, /* 32 */ { 3, s_3_32, -1, 1, 0}, /* 33 */ { 4, s_3_33, -1, 1, 0}, /* 34 */ { 4, s_3_34, -1, 3, 0}, /* 35 */ { 3, s_3_35, -1, 1, 0}, /* 36 */ { 4, s_3_36, -1, 3, 0}, /* 37 */ { 3, s_3_37, -1, 1, 0}, /* 38 */ { 3, s_3_38, -1, 1, 0}, /* 39 */ { 4, s_3_39, -1, 1, 0}, /* 40 */ { 5, s_3_40, -1, 1, 0}, /* 41 */ { 4, s_3_41, -1, 1, 0}, /* 42 */ { 4, s_3_42, -1, 1, 0}, /* 43 */ { 3, s_3_43, -1, 3, 0}, /* 44 */ { 4, s_3_44, -1, 1, 0}, /* 45 */ { 2, s_3_45, -1, 1, 0}, /* 46 */ { 2, s_3_46, -1, 1, 0}, /* 47 */ { 2, s_3_47, -1, 1, 0}, /* 48 */ { 3, s_3_48, -1, 1, 0}, /* 49 */ { 3, s_3_49, -1, 3, 0}, /* 50 */ { 2, s_3_50, -1, 1, 0}, /* 51 */ { 2, s_3_51, -1, 1, 0}, /* 52 */ { 3, s_3_52, -1, 1, 0}, /* 53 */ { 5, s_3_53, -1, 1, 0}, /* 54 */ { 5, s_3_54, -1, 1, 0}, /* 55 */ { 4, s_3_55, -1, 1, 0}, /* 56 */ { 3, s_3_56, -1, 1, 0}, /* 57 */ { 3, s_3_57, -1, 1, 0}, /* 58 */ { 4, s_3_58, -1, 1, 0}, /* 59 */ { 4, s_3_59, -1, 3, 0}, /* 60 */ { 3, s_3_60, -1, 1, 0}, /* 61 */ { 3, s_3_61, -1, 1, 0} }; static const symbol s_4_0[2] = { 'e', 'a' }; static const symbol s_4_1[2] = { 'i', 'a' }; static const symbol s_4_2[3] = { 'e', 's', 'c' }; static const symbol s_4_3[3] = { 0xE3, 's', 'c' }; static const symbol s_4_4[3] = { 'i', 'n', 'd' }; static const symbol s_4_5[3] = { 0xE2, 'n', 'd' }; static const symbol s_4_6[3] = { 'a', 'r', 'e' }; static const symbol s_4_7[3] = { 'e', 'r', 'e' }; static const symbol s_4_8[3] = { 'i', 'r', 'e' }; static const symbol s_4_9[3] = { 0xE2, 'r', 'e' }; static const symbol s_4_10[2] = { 's', 'e' }; static const symbol s_4_11[3] = { 'a', 's', 'e' }; static const symbol s_4_12[4] = { 's', 'e', 's', 'e' }; static const symbol s_4_13[3] = { 'i', 's', 'e' }; static const symbol s_4_14[3] = { 'u', 's', 'e' }; static const symbol s_4_15[3] = { 0xE2, 's', 'e' }; static const symbol s_4_16[4] = { 'e', 0xBA, 't', 'e' }; static const symbol s_4_17[4] = { 0xE3, 0xBA, 't', 'e' }; static const symbol s_4_18[3] = { 'e', 'z', 'e' }; static const symbol s_4_19[2] = { 'a', 'i' }; static const symbol s_4_20[3] = { 'e', 'a', 'i' }; static const symbol s_4_21[3] = { 'i', 'a', 'i' }; static const symbol s_4_22[3] = { 's', 'e', 'i' }; static const symbol s_4_23[4] = { 'e', 0xBA, 't', 'i' }; static const symbol s_4_24[4] = { 0xE3, 0xBA, 't', 'i' }; static const symbol s_4_25[2] = { 'u', 'i' }; static const symbol s_4_26[3] = { 'e', 'z', 'i' }; static const symbol s_4_27[3] = { 'a', 0xBA, 'i' }; static const symbol s_4_28[4] = { 's', 'e', 0xBA, 'i' }; static const symbol s_4_29[5] = { 'a', 's', 'e', 0xBA, 'i' }; static const symbol s_4_30[6] = { 's', 'e', 's', 'e', 0xBA, 'i' }; static const symbol s_4_31[5] = { 'i', 's', 'e', 0xBA, 'i' }; static const symbol s_4_32[5] = { 'u', 's', 'e', 0xBA, 'i' }; static const symbol s_4_33[5] = { 0xE2, 's', 'e', 0xBA, 'i' }; static const symbol s_4_34[3] = { 'i', 0xBA, 'i' }; static const symbol s_4_35[3] = { 'u', 0xBA, 'i' }; static const symbol s_4_36[3] = { 0xE2, 0xBA, 'i' }; static const symbol s_4_37[2] = { 0xE2, 'i' }; static const symbol s_4_38[3] = { 'a', 0xFE, 'i' }; static const symbol s_4_39[4] = { 'e', 'a', 0xFE, 'i' }; static const symbol s_4_40[4] = { 'i', 'a', 0xFE, 'i' }; static const symbol s_4_41[3] = { 'e', 0xFE, 'i' }; static const symbol s_4_42[3] = { 'i', 0xFE, 'i' }; static const symbol s_4_43[3] = { 0xE2, 0xFE, 'i' }; static const symbol s_4_44[5] = { 'a', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_45[6] = { 's', 'e', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_46[7] = { 'a', 's', 'e', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_47[8] = { 's', 'e', 's', 'e', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_48[7] = { 'i', 's', 'e', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_49[7] = { 'u', 's', 'e', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_50[7] = { 0xE2, 's', 'e', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_51[5] = { 'i', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_52[5] = { 'u', 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_53[5] = { 0xE2, 'r', 0xE3, 0xFE, 'i' }; static const symbol s_4_54[2] = { 'a', 'm' }; static const symbol s_4_55[3] = { 'e', 'a', 'm' }; static const symbol s_4_56[3] = { 'i', 'a', 'm' }; static const symbol s_4_57[2] = { 'e', 'm' }; static const symbol s_4_58[4] = { 'a', 's', 'e', 'm' }; static const symbol s_4_59[5] = { 's', 'e', 's', 'e', 'm' }; static const symbol s_4_60[4] = { 'i', 's', 'e', 'm' }; static const symbol s_4_61[4] = { 'u', 's', 'e', 'm' }; static const symbol s_4_62[4] = { 0xE2, 's', 'e', 'm' }; static const symbol s_4_63[2] = { 'i', 'm' }; static const symbol s_4_64[2] = { 0xE2, 'm' }; static const symbol s_4_65[2] = { 0xE3, 'm' }; static const symbol s_4_66[4] = { 'a', 'r', 0xE3, 'm' }; static const symbol s_4_67[5] = { 's', 'e', 'r', 0xE3, 'm' }; static const symbol s_4_68[6] = { 'a', 's', 'e', 'r', 0xE3, 'm' }; static const symbol s_4_69[7] = { 's', 'e', 's', 'e', 'r', 0xE3, 'm' }; static const symbol s_4_70[6] = { 'i', 's', 'e', 'r', 0xE3, 'm' }; static const symbol s_4_71[6] = { 'u', 's', 'e', 'r', 0xE3, 'm' }; static const symbol s_4_72[6] = { 0xE2, 's', 'e', 'r', 0xE3, 'm' }; static const symbol s_4_73[4] = { 'i', 'r', 0xE3, 'm' }; static const symbol s_4_74[4] = { 'u', 'r', 0xE3, 'm' }; static const symbol s_4_75[4] = { 0xE2, 'r', 0xE3, 'm' }; static const symbol s_4_76[2] = { 'a', 'u' }; static const symbol s_4_77[3] = { 'e', 'a', 'u' }; static const symbol s_4_78[3] = { 'i', 'a', 'u' }; static const symbol s_4_79[4] = { 'i', 'n', 'd', 'u' }; static const symbol s_4_80[4] = { 0xE2, 'n', 'd', 'u' }; static const symbol s_4_81[2] = { 'e', 'z' }; static const symbol s_4_82[5] = { 'e', 'a', 's', 'c', 0xE3 }; static const symbol s_4_83[3] = { 'a', 'r', 0xE3 }; static const symbol s_4_84[4] = { 's', 'e', 'r', 0xE3 }; static const symbol s_4_85[5] = { 'a', 's', 'e', 'r', 0xE3 }; static const symbol s_4_86[6] = { 's', 'e', 's', 'e', 'r', 0xE3 }; static const symbol s_4_87[5] = { 'i', 's', 'e', 'r', 0xE3 }; static const symbol s_4_88[5] = { 'u', 's', 'e', 'r', 0xE3 }; static const symbol s_4_89[5] = { 0xE2, 's', 'e', 'r', 0xE3 }; static const symbol s_4_90[3] = { 'i', 'r', 0xE3 }; static const symbol s_4_91[3] = { 'u', 'r', 0xE3 }; static const symbol s_4_92[3] = { 0xE2, 'r', 0xE3 }; static const symbol s_4_93[4] = { 'e', 'a', 'z', 0xE3 }; static const struct among a_4[94] = { /* 0 */ { 2, s_4_0, -1, 1, 0}, /* 1 */ { 2, s_4_1, -1, 1, 0}, /* 2 */ { 3, s_4_2, -1, 1, 0}, /* 3 */ { 3, s_4_3, -1, 1, 0}, /* 4 */ { 3, s_4_4, -1, 1, 0}, /* 5 */ { 3, s_4_5, -1, 1, 0}, /* 6 */ { 3, s_4_6, -1, 1, 0}, /* 7 */ { 3, s_4_7, -1, 1, 0}, /* 8 */ { 3, s_4_8, -1, 1, 0}, /* 9 */ { 3, s_4_9, -1, 1, 0}, /* 10 */ { 2, s_4_10, -1, 2, 0}, /* 11 */ { 3, s_4_11, 10, 1, 0}, /* 12 */ { 4, s_4_12, 10, 2, 0}, /* 13 */ { 3, s_4_13, 10, 1, 0}, /* 14 */ { 3, s_4_14, 10, 1, 0}, /* 15 */ { 3, s_4_15, 10, 1, 0}, /* 16 */ { 4, s_4_16, -1, 1, 0}, /* 17 */ { 4, s_4_17, -1, 1, 0}, /* 18 */ { 3, s_4_18, -1, 1, 0}, /* 19 */ { 2, s_4_19, -1, 1, 0}, /* 20 */ { 3, s_4_20, 19, 1, 0}, /* 21 */ { 3, s_4_21, 19, 1, 0}, /* 22 */ { 3, s_4_22, -1, 2, 0}, /* 23 */ { 4, s_4_23, -1, 1, 0}, /* 24 */ { 4, s_4_24, -1, 1, 0}, /* 25 */ { 2, s_4_25, -1, 1, 0}, /* 26 */ { 3, s_4_26, -1, 1, 0}, /* 27 */ { 3, s_4_27, -1, 1, 0}, /* 28 */ { 4, s_4_28, -1, 2, 0}, /* 29 */ { 5, s_4_29, 28, 1, 0}, /* 30 */ { 6, s_4_30, 28, 2, 0}, /* 31 */ { 5, s_4_31, 28, 1, 0}, /* 32 */ { 5, s_4_32, 28, 1, 0}, /* 33 */ { 5, s_4_33, 28, 1, 0}, /* 34 */ { 3, s_4_34, -1, 1, 0}, /* 35 */ { 3, s_4_35, -1, 1, 0}, /* 36 */ { 3, s_4_36, -1, 1, 0}, /* 37 */ { 2, s_4_37, -1, 1, 0}, /* 38 */ { 3, s_4_38, -1, 2, 0}, /* 39 */ { 4, s_4_39, 38, 1, 0}, /* 40 */ { 4, s_4_40, 38, 1, 0}, /* 41 */ { 3, s_4_41, -1, 2, 0}, /* 42 */ { 3, s_4_42, -1, 2, 0}, /* 43 */ { 3, s_4_43, -1, 2, 0}, /* 44 */ { 5, s_4_44, -1, 1, 0}, /* 45 */ { 6, s_4_45, -1, 2, 0}, /* 46 */ { 7, s_4_46, 45, 1, 0}, /* 47 */ { 8, s_4_47, 45, 2, 0}, /* 48 */ { 7, s_4_48, 45, 1, 0}, /* 49 */ { 7, s_4_49, 45, 1, 0}, /* 50 */ { 7, s_4_50, 45, 1, 0}, /* 51 */ { 5, s_4_51, -1, 1, 0}, /* 52 */ { 5, s_4_52, -1, 1, 0}, /* 53 */ { 5, s_4_53, -1, 1, 0}, /* 54 */ { 2, s_4_54, -1, 1, 0}, /* 55 */ { 3, s_4_55, 54, 1, 0}, /* 56 */ { 3, s_4_56, 54, 1, 0}, /* 57 */ { 2, s_4_57, -1, 2, 0}, /* 58 */ { 4, s_4_58, 57, 1, 0}, /* 59 */ { 5, s_4_59, 57, 2, 0}, /* 60 */ { 4, s_4_60, 57, 1, 0}, /* 61 */ { 4, s_4_61, 57, 1, 0}, /* 62 */ { 4, s_4_62, 57, 1, 0}, /* 63 */ { 2, s_4_63, -1, 2, 0}, /* 64 */ { 2, s_4_64, -1, 2, 0}, /* 65 */ { 2, s_4_65, -1, 2, 0}, /* 66 */ { 4, s_4_66, 65, 1, 0}, /* 67 */ { 5, s_4_67, 65, 2, 0}, /* 68 */ { 6, s_4_68, 67, 1, 0}, /* 69 */ { 7, s_4_69, 67, 2, 0}, /* 70 */ { 6, s_4_70, 67, 1, 0}, /* 71 */ { 6, s_4_71, 67, 1, 0}, /* 72 */ { 6, s_4_72, 67, 1, 0}, /* 73 */ { 4, s_4_73, 65, 1, 0}, /* 74 */ { 4, s_4_74, 65, 1, 0}, /* 75 */ { 4, s_4_75, 65, 1, 0}, /* 76 */ { 2, s_4_76, -1, 1, 0}, /* 77 */ { 3, s_4_77, 76, 1, 0}, /* 78 */ { 3, s_4_78, 76, 1, 0}, /* 79 */ { 4, s_4_79, -1, 1, 0}, /* 80 */ { 4, s_4_80, -1, 1, 0}, /* 81 */ { 2, s_4_81, -1, 1, 0}, /* 82 */ { 5, s_4_82, -1, 1, 0}, /* 83 */ { 3, s_4_83, -1, 1, 0}, /* 84 */ { 4, s_4_84, -1, 2, 0}, /* 85 */ { 5, s_4_85, 84, 1, 0}, /* 86 */ { 6, s_4_86, 84, 2, 0}, /* 87 */ { 5, s_4_87, 84, 1, 0}, /* 88 */ { 5, s_4_88, 84, 1, 0}, /* 89 */ { 5, s_4_89, 84, 1, 0}, /* 90 */ { 3, s_4_90, -1, 1, 0}, /* 91 */ { 3, s_4_91, -1, 1, 0}, /* 92 */ { 3, s_4_92, -1, 1, 0}, /* 93 */ { 4, s_4_93, -1, 1, 0} }; static const symbol s_5_0[1] = { 'a' }; static const symbol s_5_1[1] = { 'e' }; static const symbol s_5_2[2] = { 'i', 'e' }; static const symbol s_5_3[1] = { 'i' }; static const symbol s_5_4[1] = { 0xE3 }; static const struct among a_5[5] = { /* 0 */ { 1, s_5_0, -1, 1, 0}, /* 1 */ { 1, s_5_1, -1, 1, 0}, /* 2 */ { 2, s_5_2, 1, 1, 0}, /* 3 */ { 1, s_5_3, -1, 1, 0}, /* 4 */ { 1, s_5_4, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 32 }; static const symbol s_0[] = { 'u' }; static const symbol s_1[] = { 'U' }; static const symbol s_2[] = { 'i' }; static const symbol s_3[] = { 'I' }; static const symbol s_4[] = { 'i' }; static const symbol s_5[] = { 'u' }; static const symbol s_6[] = { 'a' }; static const symbol s_7[] = { 'e' }; static const symbol s_8[] = { 'i' }; static const symbol s_9[] = { 'a', 'b' }; static const symbol s_10[] = { 'i' }; static const symbol s_11[] = { 'a', 't' }; static const symbol s_12[] = { 'a', 0xFE, 'i' }; static const symbol s_13[] = { 'a', 'b', 'i', 'l' }; static const symbol s_14[] = { 'i', 'b', 'i', 'l' }; static const symbol s_15[] = { 'i', 'v' }; static const symbol s_16[] = { 'i', 'c' }; static const symbol s_17[] = { 'a', 't' }; static const symbol s_18[] = { 'i', 't' }; static const symbol s_19[] = { 0xFE }; static const symbol s_20[] = { 't' }; static const symbol s_21[] = { 'i', 's', 't' }; static const symbol s_22[] = { 'u' }; static int r_prelude(struct SN_env * z) { while(1) { /* repeat, line 32 */ int c1 = z->c; while(1) { /* goto, line 32 */ int c2 = z->c; if (in_grouping(z, g_v, 97, 238, 0)) goto lab1; z->bra = z->c; /* [, line 33 */ { int c3 = z->c; /* or, line 33 */ if (!(eq_s(z, 1, s_0))) goto lab3; z->ket = z->c; /* ], line 33 */ if (in_grouping(z, g_v, 97, 238, 0)) goto lab3; { int ret = slice_from_s(z, 1, s_1); /* <-, line 33 */ if (ret < 0) return ret; } goto lab2; lab3: z->c = c3; if (!(eq_s(z, 1, s_2))) goto lab1; z->ket = z->c; /* ], line 34 */ if (in_grouping(z, g_v, 97, 238, 0)) goto lab1; { int ret = slice_from_s(z, 1, s_3); /* <-, line 34 */ if (ret < 0) return ret; } } lab2: z->c = c2; break; lab1: z->c = c2; if (z->c >= z->l) goto lab0; z->c++; /* goto, line 32 */ } continue; lab0: z->c = c1; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 44 */ { int c2 = z->c; /* or, line 46 */ if (in_grouping(z, g_v, 97, 238, 0)) goto lab2; { int c3 = z->c; /* or, line 45 */ if (out_grouping(z, g_v, 97, 238, 0)) goto lab4; { /* gopast */ /* grouping v, line 45 */ int ret = out_grouping(z, g_v, 97, 238, 1); if (ret < 0) goto lab4; z->c += ret; } goto lab3; lab4: z->c = c3; if (in_grouping(z, g_v, 97, 238, 0)) goto lab2; { /* gopast */ /* non v, line 45 */ int ret = in_grouping(z, g_v, 97, 238, 1); if (ret < 0) goto lab2; z->c += ret; } } lab3: goto lab1; lab2: z->c = c2; if (out_grouping(z, g_v, 97, 238, 0)) goto lab0; { int c4 = z->c; /* or, line 47 */ if (out_grouping(z, g_v, 97, 238, 0)) goto lab6; { /* gopast */ /* grouping v, line 47 */ int ret = out_grouping(z, g_v, 97, 238, 1); if (ret < 0) goto lab6; z->c += ret; } goto lab5; lab6: z->c = c4; if (in_grouping(z, g_v, 97, 238, 0)) goto lab0; if (z->c >= z->l) goto lab0; z->c++; /* next, line 47 */ } lab5: ; } lab1: z->I[0] = z->c; /* setmark pV, line 48 */ lab0: z->c = c1; } { int c5 = z->c; /* do, line 50 */ { /* gopast */ /* grouping v, line 51 */ int ret = out_grouping(z, g_v, 97, 238, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 51 */ int ret = in_grouping(z, g_v, 97, 238, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 51 */ { /* gopast */ /* grouping v, line 52 */ int ret = out_grouping(z, g_v, 97, 238, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 52 */ int ret = in_grouping(z, g_v, 97, 238, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 52 */ lab7: z->c = c5; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 56 */ int c1 = z->c; z->bra = z->c; /* [, line 58 */ if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 85)) among_var = 3; else among_var = find_among(z, a_0, 3); /* substring, line 58 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 58 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_4); /* <-, line 59 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_5); /* <-, line 60 */ if (ret < 0) return ret; } break; case 3: if (z->c >= z->l) goto lab0; z->c++; /* next, line 61 */ break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_step_0(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 73 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((266786 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_1, 16); /* substring, line 73 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 73 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 73 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 75 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_6); /* <-, line 77 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_7); /* <-, line 79 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_8); /* <-, line 81 */ if (ret < 0) return ret; } break; case 5: { int m1 = z->l - z->c; (void)m1; /* not, line 83 */ if (!(eq_s_b(z, 2, s_9))) goto lab0; return 0; lab0: z->c = z->l - m1; } { int ret = slice_from_s(z, 1, s_10); /* <-, line 83 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 2, s_11); /* <-, line 85 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 3, s_12); /* <-, line 87 */ if (ret < 0) return ret; } break; } return 1; } static int r_combo_suffix(struct SN_env * z) { int among_var; { int m_test = z->l - z->c; /* test, line 91 */ z->ket = z->c; /* [, line 92 */ among_var = find_among_b(z, a_2, 46); /* substring, line 92 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 92 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 92 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 4, s_13); /* <-, line 101 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 4, s_14); /* <-, line 104 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 2, s_15); /* <-, line 107 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 2, s_16); /* <-, line 113 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 2, s_17); /* <-, line 118 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 2, s_18); /* <-, line 122 */ if (ret < 0) return ret; } break; } z->B[0] = 1; /* set standard_suffix_removed, line 125 */ z->c = z->l - m_test; } return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->B[0] = 0; /* unset standard_suffix_removed, line 130 */ while(1) { /* repeat, line 131 */ int m1 = z->l - z->c; (void)m1; { int ret = r_combo_suffix(z); if (ret == 0) goto lab0; /* call combo_suffix, line 131 */ if (ret < 0) return ret; } continue; lab0: z->c = z->l - m1; break; } z->ket = z->c; /* [, line 132 */ among_var = find_among_b(z, a_3, 62); /* substring, line 132 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 132 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 132 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 149 */ if (ret < 0) return ret; } break; case 2: if (!(eq_s_b(z, 1, s_19))) return 0; z->bra = z->c; /* ], line 152 */ { int ret = slice_from_s(z, 1, s_20); /* <-, line 152 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 3, s_21); /* <-, line 156 */ if (ret < 0) return ret; } break; } z->B[0] = 1; /* set standard_suffix_removed, line 160 */ return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 164 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 164 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 165 */ among_var = find_among_b(z, a_4, 94); /* substring, line 165 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 165 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int m2 = z->l - z->c; (void)m2; /* or, line 200 */ if (out_grouping_b(z, g_v, 97, 238, 0)) goto lab1; goto lab0; lab1: z->c = z->l - m2; if (!(eq_s_b(z, 1, s_22))) { z->lb = mlimit; return 0; } } lab0: { int ret = slice_del(z); /* delete, line 200 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 214 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_vowel_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 219 */ among_var = find_among_b(z, a_5, 5); /* substring, line 219 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 219 */ { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 219 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 220 */ if (ret < 0) return ret; } break; } return 1; } extern int romanian_ISO_8859_2_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 226 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 226 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 227 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 227 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 228 */ { int m3 = z->l - z->c; (void)m3; /* do, line 229 */ { int ret = r_step_0(z); if (ret == 0) goto lab2; /* call step_0, line 229 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 230 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab3; /* call standard_suffix, line 230 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 231 */ { int m6 = z->l - z->c; (void)m6; /* or, line 231 */ if (!(z->B[0])) goto lab6; /* Boolean test standard_suffix_removed, line 231 */ goto lab5; lab6: z->c = z->l - m6; { int ret = r_verb_suffix(z); if (ret == 0) goto lab4; /* call verb_suffix, line 231 */ if (ret < 0) return ret; } } lab5: lab4: z->c = z->l - m5; } { int m7 = z->l - z->c; (void)m7; /* do, line 232 */ { int ret = r_vowel_suffix(z); if (ret == 0) goto lab7; /* call vowel_suffix, line 232 */ if (ret < 0) return ret; } lab7: z->c = z->l - m7; } z->c = z->lb; { int c8 = z->c; /* do, line 234 */ { int ret = r_postlude(z); if (ret == 0) goto lab8; /* call postlude, line 234 */ if (ret < 0) return ret; } lab8: z->c = c8; } return 1; } extern struct SN_env * romanian_ISO_8859_2_create_env(void) { return SN_create_env(0, 3, 1); } extern void romanian_ISO_8859_2_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h000066400000000000000000000005101456444476200315670ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* romanian_ISO_8859_2_create_env(void); extern void romanian_ISO_8859_2_close_env(struct SN_env* z); extern int romanian_ISO_8859_2_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_KOI8_R_russian.c000066400000000000000000000572021456444476200310770ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int russian_KOI8_R_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_tidy_up(struct SN_env * z); static int r_derivational(struct SN_env * z); static int r_noun(struct SN_env * z); static int r_verb(struct SN_env * z); static int r_reflexive(struct SN_env * z); static int r_adjectival(struct SN_env * z); static int r_adjective(struct SN_env * z); static int r_perfective_gerund(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * russian_KOI8_R_create_env(void); extern void russian_KOI8_R_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[3] = { 0xD7, 0xDB, 0xC9 }; static const symbol s_0_1[4] = { 0xC9, 0xD7, 0xDB, 0xC9 }; static const symbol s_0_2[4] = { 0xD9, 0xD7, 0xDB, 0xC9 }; static const symbol s_0_3[1] = { 0xD7 }; static const symbol s_0_4[2] = { 0xC9, 0xD7 }; static const symbol s_0_5[2] = { 0xD9, 0xD7 }; static const symbol s_0_6[5] = { 0xD7, 0xDB, 0xC9, 0xD3, 0xD8 }; static const symbol s_0_7[6] = { 0xC9, 0xD7, 0xDB, 0xC9, 0xD3, 0xD8 }; static const symbol s_0_8[6] = { 0xD9, 0xD7, 0xDB, 0xC9, 0xD3, 0xD8 }; static const struct among a_0[9] = { /* 0 */ { 3, s_0_0, -1, 1, 0}, /* 1 */ { 4, s_0_1, 0, 2, 0}, /* 2 */ { 4, s_0_2, 0, 2, 0}, /* 3 */ { 1, s_0_3, -1, 1, 0}, /* 4 */ { 2, s_0_4, 3, 2, 0}, /* 5 */ { 2, s_0_5, 3, 2, 0}, /* 6 */ { 5, s_0_6, -1, 1, 0}, /* 7 */ { 6, s_0_7, 6, 2, 0}, /* 8 */ { 6, s_0_8, 6, 2, 0} }; static const symbol s_1_0[2] = { 0xC0, 0xC0 }; static const symbol s_1_1[2] = { 0xC5, 0xC0 }; static const symbol s_1_2[2] = { 0xCF, 0xC0 }; static const symbol s_1_3[2] = { 0xD5, 0xC0 }; static const symbol s_1_4[2] = { 0xC5, 0xC5 }; static const symbol s_1_5[2] = { 0xC9, 0xC5 }; static const symbol s_1_6[2] = { 0xCF, 0xC5 }; static const symbol s_1_7[2] = { 0xD9, 0xC5 }; static const symbol s_1_8[2] = { 0xC9, 0xC8 }; static const symbol s_1_9[2] = { 0xD9, 0xC8 }; static const symbol s_1_10[3] = { 0xC9, 0xCD, 0xC9 }; static const symbol s_1_11[3] = { 0xD9, 0xCD, 0xC9 }; static const symbol s_1_12[2] = { 0xC5, 0xCA }; static const symbol s_1_13[2] = { 0xC9, 0xCA }; static const symbol s_1_14[2] = { 0xCF, 0xCA }; static const symbol s_1_15[2] = { 0xD9, 0xCA }; static const symbol s_1_16[2] = { 0xC5, 0xCD }; static const symbol s_1_17[2] = { 0xC9, 0xCD }; static const symbol s_1_18[2] = { 0xCF, 0xCD }; static const symbol s_1_19[2] = { 0xD9, 0xCD }; static const symbol s_1_20[3] = { 0xC5, 0xC7, 0xCF }; static const symbol s_1_21[3] = { 0xCF, 0xC7, 0xCF }; static const symbol s_1_22[2] = { 0xC1, 0xD1 }; static const symbol s_1_23[2] = { 0xD1, 0xD1 }; static const symbol s_1_24[3] = { 0xC5, 0xCD, 0xD5 }; static const symbol s_1_25[3] = { 0xCF, 0xCD, 0xD5 }; static const struct among a_1[26] = { /* 0 */ { 2, s_1_0, -1, 1, 0}, /* 1 */ { 2, s_1_1, -1, 1, 0}, /* 2 */ { 2, s_1_2, -1, 1, 0}, /* 3 */ { 2, s_1_3, -1, 1, 0}, /* 4 */ { 2, s_1_4, -1, 1, 0}, /* 5 */ { 2, s_1_5, -1, 1, 0}, /* 6 */ { 2, s_1_6, -1, 1, 0}, /* 7 */ { 2, s_1_7, -1, 1, 0}, /* 8 */ { 2, s_1_8, -1, 1, 0}, /* 9 */ { 2, s_1_9, -1, 1, 0}, /* 10 */ { 3, s_1_10, -1, 1, 0}, /* 11 */ { 3, s_1_11, -1, 1, 0}, /* 12 */ { 2, s_1_12, -1, 1, 0}, /* 13 */ { 2, s_1_13, -1, 1, 0}, /* 14 */ { 2, s_1_14, -1, 1, 0}, /* 15 */ { 2, s_1_15, -1, 1, 0}, /* 16 */ { 2, s_1_16, -1, 1, 0}, /* 17 */ { 2, s_1_17, -1, 1, 0}, /* 18 */ { 2, s_1_18, -1, 1, 0}, /* 19 */ { 2, s_1_19, -1, 1, 0}, /* 20 */ { 3, s_1_20, -1, 1, 0}, /* 21 */ { 3, s_1_21, -1, 1, 0}, /* 22 */ { 2, s_1_22, -1, 1, 0}, /* 23 */ { 2, s_1_23, -1, 1, 0}, /* 24 */ { 3, s_1_24, -1, 1, 0}, /* 25 */ { 3, s_1_25, -1, 1, 0} }; static const symbol s_2_0[2] = { 0xC5, 0xCD }; static const symbol s_2_1[2] = { 0xCE, 0xCE }; static const symbol s_2_2[2] = { 0xD7, 0xDB }; static const symbol s_2_3[3] = { 0xC9, 0xD7, 0xDB }; static const symbol s_2_4[3] = { 0xD9, 0xD7, 0xDB }; static const symbol s_2_5[1] = { 0xDD }; static const symbol s_2_6[2] = { 0xC0, 0xDD }; static const symbol s_2_7[3] = { 0xD5, 0xC0, 0xDD }; static const struct among a_2[8] = { /* 0 */ { 2, s_2_0, -1, 1, 0}, /* 1 */ { 2, s_2_1, -1, 1, 0}, /* 2 */ { 2, s_2_2, -1, 1, 0}, /* 3 */ { 3, s_2_3, 2, 2, 0}, /* 4 */ { 3, s_2_4, 2, 2, 0}, /* 5 */ { 1, s_2_5, -1, 1, 0}, /* 6 */ { 2, s_2_6, 5, 1, 0}, /* 7 */ { 3, s_2_7, 6, 2, 0} }; static const symbol s_3_0[2] = { 0xD3, 0xD1 }; static const symbol s_3_1[2] = { 0xD3, 0xD8 }; static const struct among a_3[2] = { /* 0 */ { 2, s_3_0, -1, 1, 0}, /* 1 */ { 2, s_3_1, -1, 1, 0} }; static const symbol s_4_0[1] = { 0xC0 }; static const symbol s_4_1[2] = { 0xD5, 0xC0 }; static const symbol s_4_2[2] = { 0xCC, 0xC1 }; static const symbol s_4_3[3] = { 0xC9, 0xCC, 0xC1 }; static const symbol s_4_4[3] = { 0xD9, 0xCC, 0xC1 }; static const symbol s_4_5[2] = { 0xCE, 0xC1 }; static const symbol s_4_6[3] = { 0xC5, 0xCE, 0xC1 }; static const symbol s_4_7[3] = { 0xC5, 0xD4, 0xC5 }; static const symbol s_4_8[3] = { 0xC9, 0xD4, 0xC5 }; static const symbol s_4_9[3] = { 0xCA, 0xD4, 0xC5 }; static const symbol s_4_10[4] = { 0xC5, 0xCA, 0xD4, 0xC5 }; static const symbol s_4_11[4] = { 0xD5, 0xCA, 0xD4, 0xC5 }; static const symbol s_4_12[2] = { 0xCC, 0xC9 }; static const symbol s_4_13[3] = { 0xC9, 0xCC, 0xC9 }; static const symbol s_4_14[3] = { 0xD9, 0xCC, 0xC9 }; static const symbol s_4_15[1] = { 0xCA }; static const symbol s_4_16[2] = { 0xC5, 0xCA }; static const symbol s_4_17[2] = { 0xD5, 0xCA }; static const symbol s_4_18[1] = { 0xCC }; static const symbol s_4_19[2] = { 0xC9, 0xCC }; static const symbol s_4_20[2] = { 0xD9, 0xCC }; static const symbol s_4_21[2] = { 0xC5, 0xCD }; static const symbol s_4_22[2] = { 0xC9, 0xCD }; static const symbol s_4_23[2] = { 0xD9, 0xCD }; static const symbol s_4_24[1] = { 0xCE }; static const symbol s_4_25[2] = { 0xC5, 0xCE }; static const symbol s_4_26[2] = { 0xCC, 0xCF }; static const symbol s_4_27[3] = { 0xC9, 0xCC, 0xCF }; static const symbol s_4_28[3] = { 0xD9, 0xCC, 0xCF }; static const symbol s_4_29[2] = { 0xCE, 0xCF }; static const symbol s_4_30[3] = { 0xC5, 0xCE, 0xCF }; static const symbol s_4_31[3] = { 0xCE, 0xCE, 0xCF }; static const symbol s_4_32[2] = { 0xC0, 0xD4 }; static const symbol s_4_33[3] = { 0xD5, 0xC0, 0xD4 }; static const symbol s_4_34[2] = { 0xC5, 0xD4 }; static const symbol s_4_35[3] = { 0xD5, 0xC5, 0xD4 }; static const symbol s_4_36[2] = { 0xC9, 0xD4 }; static const symbol s_4_37[2] = { 0xD1, 0xD4 }; static const symbol s_4_38[2] = { 0xD9, 0xD4 }; static const symbol s_4_39[2] = { 0xD4, 0xD8 }; static const symbol s_4_40[3] = { 0xC9, 0xD4, 0xD8 }; static const symbol s_4_41[3] = { 0xD9, 0xD4, 0xD8 }; static const symbol s_4_42[3] = { 0xC5, 0xDB, 0xD8 }; static const symbol s_4_43[3] = { 0xC9, 0xDB, 0xD8 }; static const symbol s_4_44[2] = { 0xCE, 0xD9 }; static const symbol s_4_45[3] = { 0xC5, 0xCE, 0xD9 }; static const struct among a_4[46] = { /* 0 */ { 1, s_4_0, -1, 2, 0}, /* 1 */ { 2, s_4_1, 0, 2, 0}, /* 2 */ { 2, s_4_2, -1, 1, 0}, /* 3 */ { 3, s_4_3, 2, 2, 0}, /* 4 */ { 3, s_4_4, 2, 2, 0}, /* 5 */ { 2, s_4_5, -1, 1, 0}, /* 6 */ { 3, s_4_6, 5, 2, 0}, /* 7 */ { 3, s_4_7, -1, 1, 0}, /* 8 */ { 3, s_4_8, -1, 2, 0}, /* 9 */ { 3, s_4_9, -1, 1, 0}, /* 10 */ { 4, s_4_10, 9, 2, 0}, /* 11 */ { 4, s_4_11, 9, 2, 0}, /* 12 */ { 2, s_4_12, -1, 1, 0}, /* 13 */ { 3, s_4_13, 12, 2, 0}, /* 14 */ { 3, s_4_14, 12, 2, 0}, /* 15 */ { 1, s_4_15, -1, 1, 0}, /* 16 */ { 2, s_4_16, 15, 2, 0}, /* 17 */ { 2, s_4_17, 15, 2, 0}, /* 18 */ { 1, s_4_18, -1, 1, 0}, /* 19 */ { 2, s_4_19, 18, 2, 0}, /* 20 */ { 2, s_4_20, 18, 2, 0}, /* 21 */ { 2, s_4_21, -1, 1, 0}, /* 22 */ { 2, s_4_22, -1, 2, 0}, /* 23 */ { 2, s_4_23, -1, 2, 0}, /* 24 */ { 1, s_4_24, -1, 1, 0}, /* 25 */ { 2, s_4_25, 24, 2, 0}, /* 26 */ { 2, s_4_26, -1, 1, 0}, /* 27 */ { 3, s_4_27, 26, 2, 0}, /* 28 */ { 3, s_4_28, 26, 2, 0}, /* 29 */ { 2, s_4_29, -1, 1, 0}, /* 30 */ { 3, s_4_30, 29, 2, 0}, /* 31 */ { 3, s_4_31, 29, 1, 0}, /* 32 */ { 2, s_4_32, -1, 1, 0}, /* 33 */ { 3, s_4_33, 32, 2, 0}, /* 34 */ { 2, s_4_34, -1, 1, 0}, /* 35 */ { 3, s_4_35, 34, 2, 0}, /* 36 */ { 2, s_4_36, -1, 2, 0}, /* 37 */ { 2, s_4_37, -1, 2, 0}, /* 38 */ { 2, s_4_38, -1, 2, 0}, /* 39 */ { 2, s_4_39, -1, 1, 0}, /* 40 */ { 3, s_4_40, 39, 2, 0}, /* 41 */ { 3, s_4_41, 39, 2, 0}, /* 42 */ { 3, s_4_42, -1, 1, 0}, /* 43 */ { 3, s_4_43, -1, 2, 0}, /* 44 */ { 2, s_4_44, -1, 1, 0}, /* 45 */ { 3, s_4_45, 44, 2, 0} }; static const symbol s_5_0[1] = { 0xC0 }; static const symbol s_5_1[2] = { 0xC9, 0xC0 }; static const symbol s_5_2[2] = { 0xD8, 0xC0 }; static const symbol s_5_3[1] = { 0xC1 }; static const symbol s_5_4[1] = { 0xC5 }; static const symbol s_5_5[2] = { 0xC9, 0xC5 }; static const symbol s_5_6[2] = { 0xD8, 0xC5 }; static const symbol s_5_7[2] = { 0xC1, 0xC8 }; static const symbol s_5_8[2] = { 0xD1, 0xC8 }; static const symbol s_5_9[3] = { 0xC9, 0xD1, 0xC8 }; static const symbol s_5_10[1] = { 0xC9 }; static const symbol s_5_11[2] = { 0xC5, 0xC9 }; static const symbol s_5_12[2] = { 0xC9, 0xC9 }; static const symbol s_5_13[3] = { 0xC1, 0xCD, 0xC9 }; static const symbol s_5_14[3] = { 0xD1, 0xCD, 0xC9 }; static const symbol s_5_15[4] = { 0xC9, 0xD1, 0xCD, 0xC9 }; static const symbol s_5_16[1] = { 0xCA }; static const symbol s_5_17[2] = { 0xC5, 0xCA }; static const symbol s_5_18[3] = { 0xC9, 0xC5, 0xCA }; static const symbol s_5_19[2] = { 0xC9, 0xCA }; static const symbol s_5_20[2] = { 0xCF, 0xCA }; static const symbol s_5_21[2] = { 0xC1, 0xCD }; static const symbol s_5_22[2] = { 0xC5, 0xCD }; static const symbol s_5_23[3] = { 0xC9, 0xC5, 0xCD }; static const symbol s_5_24[2] = { 0xCF, 0xCD }; static const symbol s_5_25[2] = { 0xD1, 0xCD }; static const symbol s_5_26[3] = { 0xC9, 0xD1, 0xCD }; static const symbol s_5_27[1] = { 0xCF }; static const symbol s_5_28[1] = { 0xD1 }; static const symbol s_5_29[2] = { 0xC9, 0xD1 }; static const symbol s_5_30[2] = { 0xD8, 0xD1 }; static const symbol s_5_31[1] = { 0xD5 }; static const symbol s_5_32[2] = { 0xC5, 0xD7 }; static const symbol s_5_33[2] = { 0xCF, 0xD7 }; static const symbol s_5_34[1] = { 0xD8 }; static const symbol s_5_35[1] = { 0xD9 }; static const struct among a_5[36] = { /* 0 */ { 1, s_5_0, -1, 1, 0}, /* 1 */ { 2, s_5_1, 0, 1, 0}, /* 2 */ { 2, s_5_2, 0, 1, 0}, /* 3 */ { 1, s_5_3, -1, 1, 0}, /* 4 */ { 1, s_5_4, -1, 1, 0}, /* 5 */ { 2, s_5_5, 4, 1, 0}, /* 6 */ { 2, s_5_6, 4, 1, 0}, /* 7 */ { 2, s_5_7, -1, 1, 0}, /* 8 */ { 2, s_5_8, -1, 1, 0}, /* 9 */ { 3, s_5_9, 8, 1, 0}, /* 10 */ { 1, s_5_10, -1, 1, 0}, /* 11 */ { 2, s_5_11, 10, 1, 0}, /* 12 */ { 2, s_5_12, 10, 1, 0}, /* 13 */ { 3, s_5_13, 10, 1, 0}, /* 14 */ { 3, s_5_14, 10, 1, 0}, /* 15 */ { 4, s_5_15, 14, 1, 0}, /* 16 */ { 1, s_5_16, -1, 1, 0}, /* 17 */ { 2, s_5_17, 16, 1, 0}, /* 18 */ { 3, s_5_18, 17, 1, 0}, /* 19 */ { 2, s_5_19, 16, 1, 0}, /* 20 */ { 2, s_5_20, 16, 1, 0}, /* 21 */ { 2, s_5_21, -1, 1, 0}, /* 22 */ { 2, s_5_22, -1, 1, 0}, /* 23 */ { 3, s_5_23, 22, 1, 0}, /* 24 */ { 2, s_5_24, -1, 1, 0}, /* 25 */ { 2, s_5_25, -1, 1, 0}, /* 26 */ { 3, s_5_26, 25, 1, 0}, /* 27 */ { 1, s_5_27, -1, 1, 0}, /* 28 */ { 1, s_5_28, -1, 1, 0}, /* 29 */ { 2, s_5_29, 28, 1, 0}, /* 30 */ { 2, s_5_30, 28, 1, 0}, /* 31 */ { 1, s_5_31, -1, 1, 0}, /* 32 */ { 2, s_5_32, -1, 1, 0}, /* 33 */ { 2, s_5_33, -1, 1, 0}, /* 34 */ { 1, s_5_34, -1, 1, 0}, /* 35 */ { 1, s_5_35, -1, 1, 0} }; static const symbol s_6_0[3] = { 0xCF, 0xD3, 0xD4 }; static const symbol s_6_1[4] = { 0xCF, 0xD3, 0xD4, 0xD8 }; static const struct among a_6[2] = { /* 0 */ { 3, s_6_0, -1, 1, 0}, /* 1 */ { 4, s_6_1, -1, 1, 0} }; static const symbol s_7_0[4] = { 0xC5, 0xCA, 0xDB, 0xC5 }; static const symbol s_7_1[1] = { 0xCE }; static const symbol s_7_2[1] = { 0xD8 }; static const symbol s_7_3[3] = { 0xC5, 0xCA, 0xDB }; static const struct among a_7[4] = { /* 0 */ { 4, s_7_0, -1, 1, 0}, /* 1 */ { 1, s_7_1, -1, 2, 0}, /* 2 */ { 1, s_7_2, -1, 3, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0} }; static const unsigned char g_v[] = { 35, 130, 34, 18 }; static const symbol s_0[] = { 0xC1 }; static const symbol s_1[] = { 0xD1 }; static const symbol s_2[] = { 0xC1 }; static const symbol s_3[] = { 0xD1 }; static const symbol s_4[] = { 0xC1 }; static const symbol s_5[] = { 0xD1 }; static const symbol s_6[] = { 0xCE }; static const symbol s_7[] = { 0xCE }; static const symbol s_8[] = { 0xCE }; static const symbol s_9[] = { 0xC9 }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; { int c1 = z->c; /* do, line 63 */ { /* gopast */ /* grouping v, line 64 */ int ret = out_grouping(z, g_v, 192, 220, 1); if (ret < 0) goto lab0; z->c += ret; } z->I[0] = z->c; /* setmark pV, line 64 */ { /* gopast */ /* non v, line 64 */ int ret = in_grouping(z, g_v, 192, 220, 1); if (ret < 0) goto lab0; z->c += ret; } { /* gopast */ /* grouping v, line 65 */ int ret = out_grouping(z, g_v, 192, 220, 1); if (ret < 0) goto lab0; z->c += ret; } { /* gopast */ /* non v, line 65 */ int ret = in_grouping(z, g_v, 192, 220, 1); if (ret < 0) goto lab0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 65 */ lab0: z->c = c1; } return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_perfective_gerund(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 74 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((25166336 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_0, 9); /* substring, line 74 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 74 */ switch(among_var) { case 0: return 0; case 1: { int m1 = z->l - z->c; (void)m1; /* or, line 78 */ if (!(eq_s_b(z, 1, s_0))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_1))) return 0; } lab0: { int ret = slice_del(z); /* delete, line 78 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 85 */ if (ret < 0) return ret; } break; } return 1; } static int r_adjective(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 90 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((2271009 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_1, 26); /* substring, line 90 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 90 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 99 */ if (ret < 0) return ret; } break; } return 1; } static int r_adjectival(struct SN_env * z) { int among_var; { int ret = r_adjective(z); if (ret == 0) return 0; /* call adjective, line 104 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ z->ket = z->c; /* [, line 112 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((671113216 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab0; } among_var = find_among_b(z, a_2, 8); /* substring, line 112 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 112 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab0; } case 1: { int m1 = z->l - z->c; (void)m1; /* or, line 117 */ if (!(eq_s_b(z, 1, s_2))) goto lab2; goto lab1; lab2: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m_keep; goto lab0; } } lab1: { int ret = slice_del(z); /* delete, line 117 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 124 */ if (ret < 0) return ret; } break; } lab0: ; } return 1; } static int r_reflexive(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 131 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 209 && z->p[z->c - 1] != 216)) return 0; among_var = find_among_b(z, a_3, 2); /* substring, line 131 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 131 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } break; } return 1; } static int r_verb(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 139 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((51443235 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_4, 46); /* substring, line 139 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 139 */ switch(among_var) { case 0: return 0; case 1: { int m1 = z->l - z->c; (void)m1; /* or, line 145 */ if (!(eq_s_b(z, 1, s_4))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_5))) return 0; } lab0: { int ret = slice_del(z); /* delete, line 145 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 153 */ if (ret < 0) return ret; } break; } return 1; } static int r_noun(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 162 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((60991267 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_5, 36); /* substring, line 162 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 162 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 169 */ if (ret < 0) return ret; } break; } return 1; } static int r_derivational(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 178 */ if (z->c - 2 <= z->lb || (z->p[z->c - 1] != 212 && z->p[z->c - 1] != 216)) return 0; among_var = find_among_b(z, a_6, 2); /* substring, line 178 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 178 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 178 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 181 */ if (ret < 0) return ret; } break; } return 1; } static int r_tidy_up(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 186 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 6 || !((151011360 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_7, 4); /* substring, line 186 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 186 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 190 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 191 */ if (!(eq_s_b(z, 1, s_6))) return 0; z->bra = z->c; /* ], line 191 */ if (!(eq_s_b(z, 1, s_7))) return 0; { int ret = slice_del(z); /* delete, line 191 */ if (ret < 0) return ret; } break; case 2: if (!(eq_s_b(z, 1, s_8))) return 0; { int ret = slice_del(z); /* delete, line 194 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 196 */ if (ret < 0) return ret; } break; } return 1; } extern int russian_KOI8_R_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 203 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 203 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 204 */ { int mlimit; /* setlimit, line 204 */ int m2 = z->l - z->c; (void)m2; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 204 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m2; { int m3 = z->l - z->c; (void)m3; /* do, line 205 */ { int m4 = z->l - z->c; (void)m4; /* or, line 206 */ { int ret = r_perfective_gerund(z); if (ret == 0) goto lab3; /* call perfective_gerund, line 206 */ if (ret < 0) return ret; } goto lab2; lab3: z->c = z->l - m4; { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 207 */ { int ret = r_reflexive(z); if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call reflexive, line 207 */ if (ret < 0) return ret; } lab4: ; } { int m5 = z->l - z->c; (void)m5; /* or, line 208 */ { int ret = r_adjectival(z); if (ret == 0) goto lab6; /* call adjectival, line 208 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m5; { int ret = r_verb(z); if (ret == 0) goto lab7; /* call verb, line 208 */ if (ret < 0) return ret; } goto lab5; lab7: z->c = z->l - m5; { int ret = r_noun(z); if (ret == 0) goto lab1; /* call noun, line 208 */ if (ret < 0) return ret; } } lab5: ; } lab2: lab1: z->c = z->l - m3; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 211 */ z->ket = z->c; /* [, line 211 */ if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m_keep; goto lab8; } z->bra = z->c; /* ], line 211 */ { int ret = slice_del(z); /* delete, line 211 */ if (ret < 0) return ret; } lab8: ; } { int m6 = z->l - z->c; (void)m6; /* do, line 214 */ { int ret = r_derivational(z); if (ret == 0) goto lab9; /* call derivational, line 214 */ if (ret < 0) return ret; } lab9: z->c = z->l - m6; } { int m7 = z->l - z->c; (void)m7; /* do, line 215 */ { int ret = r_tidy_up(z); if (ret == 0) goto lab10; /* call tidy_up, line 215 */ if (ret < 0) return ret; } lab10: z->c = z->l - m7; } z->lb = mlimit; } z->c = z->lb; return 1; } extern struct SN_env * russian_KOI8_R_create_env(void) { return SN_create_env(0, 2, 0); } extern void russian_KOI8_R_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_KOI8_R_russian.h000066400000000000000000000004711456444476200311000ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* russian_KOI8_R_create_env(void); extern void russian_KOI8_R_close_env(struct SN_env* z); extern int russian_KOI8_R_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_danish.c000066400000000000000000000265061456444476200305560ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int danish_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_undouble(struct SN_env * z); static int r_other_suffix(struct SN_env * z); static int r_consonant_pair(struct SN_env * z); static int r_main_suffix(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * danish_UTF_8_create_env(void); extern void danish_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[3] = { 'h', 'e', 'd' }; static const symbol s_0_1[5] = { 'e', 't', 'h', 'e', 'd' }; static const symbol s_0_2[4] = { 'e', 'r', 'e', 'd' }; static const symbol s_0_3[1] = { 'e' }; static const symbol s_0_4[5] = { 'e', 'r', 'e', 'd', 'e' }; static const symbol s_0_5[4] = { 'e', 'n', 'd', 'e' }; static const symbol s_0_6[6] = { 'e', 'r', 'e', 'n', 'd', 'e' }; static const symbol s_0_7[3] = { 'e', 'n', 'e' }; static const symbol s_0_8[4] = { 'e', 'r', 'n', 'e' }; static const symbol s_0_9[3] = { 'e', 'r', 'e' }; static const symbol s_0_10[2] = { 'e', 'n' }; static const symbol s_0_11[5] = { 'h', 'e', 'd', 'e', 'n' }; static const symbol s_0_12[4] = { 'e', 'r', 'e', 'n' }; static const symbol s_0_13[2] = { 'e', 'r' }; static const symbol s_0_14[5] = { 'h', 'e', 'd', 'e', 'r' }; static const symbol s_0_15[4] = { 'e', 'r', 'e', 'r' }; static const symbol s_0_16[1] = { 's' }; static const symbol s_0_17[4] = { 'h', 'e', 'd', 's' }; static const symbol s_0_18[2] = { 'e', 's' }; static const symbol s_0_19[5] = { 'e', 'n', 'd', 'e', 's' }; static const symbol s_0_20[7] = { 'e', 'r', 'e', 'n', 'd', 'e', 's' }; static const symbol s_0_21[4] = { 'e', 'n', 'e', 's' }; static const symbol s_0_22[5] = { 'e', 'r', 'n', 'e', 's' }; static const symbol s_0_23[4] = { 'e', 'r', 'e', 's' }; static const symbol s_0_24[3] = { 'e', 'n', 's' }; static const symbol s_0_25[6] = { 'h', 'e', 'd', 'e', 'n', 's' }; static const symbol s_0_26[5] = { 'e', 'r', 'e', 'n', 's' }; static const symbol s_0_27[3] = { 'e', 'r', 's' }; static const symbol s_0_28[3] = { 'e', 't', 's' }; static const symbol s_0_29[5] = { 'e', 'r', 'e', 't', 's' }; static const symbol s_0_30[2] = { 'e', 't' }; static const symbol s_0_31[4] = { 'e', 'r', 'e', 't' }; static const struct among a_0[32] = { /* 0 */ { 3, s_0_0, -1, 1, 0}, /* 1 */ { 5, s_0_1, 0, 1, 0}, /* 2 */ { 4, s_0_2, -1, 1, 0}, /* 3 */ { 1, s_0_3, -1, 1, 0}, /* 4 */ { 5, s_0_4, 3, 1, 0}, /* 5 */ { 4, s_0_5, 3, 1, 0}, /* 6 */ { 6, s_0_6, 5, 1, 0}, /* 7 */ { 3, s_0_7, 3, 1, 0}, /* 8 */ { 4, s_0_8, 3, 1, 0}, /* 9 */ { 3, s_0_9, 3, 1, 0}, /* 10 */ { 2, s_0_10, -1, 1, 0}, /* 11 */ { 5, s_0_11, 10, 1, 0}, /* 12 */ { 4, s_0_12, 10, 1, 0}, /* 13 */ { 2, s_0_13, -1, 1, 0}, /* 14 */ { 5, s_0_14, 13, 1, 0}, /* 15 */ { 4, s_0_15, 13, 1, 0}, /* 16 */ { 1, s_0_16, -1, 2, 0}, /* 17 */ { 4, s_0_17, 16, 1, 0}, /* 18 */ { 2, s_0_18, 16, 1, 0}, /* 19 */ { 5, s_0_19, 18, 1, 0}, /* 20 */ { 7, s_0_20, 19, 1, 0}, /* 21 */ { 4, s_0_21, 18, 1, 0}, /* 22 */ { 5, s_0_22, 18, 1, 0}, /* 23 */ { 4, s_0_23, 18, 1, 0}, /* 24 */ { 3, s_0_24, 16, 1, 0}, /* 25 */ { 6, s_0_25, 24, 1, 0}, /* 26 */ { 5, s_0_26, 24, 1, 0}, /* 27 */ { 3, s_0_27, 16, 1, 0}, /* 28 */ { 3, s_0_28, 16, 1, 0}, /* 29 */ { 5, s_0_29, 28, 1, 0}, /* 30 */ { 2, s_0_30, -1, 1, 0}, /* 31 */ { 4, s_0_31, 30, 1, 0} }; static const symbol s_1_0[2] = { 'g', 'd' }; static const symbol s_1_1[2] = { 'd', 't' }; static const symbol s_1_2[2] = { 'g', 't' }; static const symbol s_1_3[2] = { 'k', 't' }; static const struct among a_1[4] = { /* 0 */ { 2, s_1_0, -1, -1, 0}, /* 1 */ { 2, s_1_1, -1, -1, 0}, /* 2 */ { 2, s_1_2, -1, -1, 0}, /* 3 */ { 2, s_1_3, -1, -1, 0} }; static const symbol s_2_0[2] = { 'i', 'g' }; static const symbol s_2_1[3] = { 'l', 'i', 'g' }; static const symbol s_2_2[4] = { 'e', 'l', 'i', 'g' }; static const symbol s_2_3[3] = { 'e', 'l', 's' }; static const symbol s_2_4[5] = { 'l', 0xC3, 0xB8, 's', 't' }; static const struct among a_2[5] = { /* 0 */ { 2, s_2_0, -1, 1, 0}, /* 1 */ { 3, s_2_1, 0, 1, 0}, /* 2 */ { 4, s_2_2, 1, 1, 0}, /* 3 */ { 3, s_2_3, -1, 1, 0}, /* 4 */ { 5, s_2_4, -1, 2, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; static const unsigned char g_s_ending[] = { 239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 }; static const symbol s_0[] = { 's', 't' }; static const symbol s_1[] = { 'i', 'g' }; static const symbol s_2[] = { 'l', 0xC3, 0xB8, 's' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; { int c_test = z->c; /* test, line 33 */ { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); if (ret < 0) return 0; z->c = ret; /* hop, line 33 */ } z->I[1] = z->c; /* setmark x, line 33 */ z->c = c_test; } if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 34 */ { /* gopast */ /* non v, line 34 */ int ret = in_grouping_U(z, g_v, 97, 248, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 34 */ /* try, line 35 */ if (!(z->I[0] < z->I[1])) goto lab0; z->I[0] = z->I[1]; lab0: return 1; } static int r_main_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 41 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 41 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 41 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851440 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_0, 32); /* substring, line 41 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 41 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 48 */ if (ret < 0) return ret; } break; case 2: if (in_grouping_b_U(z, g_s_ending, 97, 229, 0)) return 0; { int ret = slice_del(z); /* delete, line 50 */ if (ret < 0) return ret; } break; } return 1; } static int r_consonant_pair(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 55 */ { int mlimit; /* setlimit, line 56 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 56 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 56 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 116)) { z->lb = mlimit; return 0; } if (!(find_among_b(z, a_1, 4))) { z->lb = mlimit; return 0; } /* substring, line 56 */ z->bra = z->c; /* ], line 56 */ z->lb = mlimit; } z->c = z->l - m_test; } { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 62 */ } z->bra = z->c; /* ], line 62 */ { int ret = slice_del(z); /* delete, line 62 */ if (ret < 0) return ret; } return 1; } static int r_other_suffix(struct SN_env * z) { int among_var; { int m1 = z->l - z->c; (void)m1; /* do, line 66 */ z->ket = z->c; /* [, line 66 */ if (!(eq_s_b(z, 2, s_0))) goto lab0; z->bra = z->c; /* ], line 66 */ if (!(eq_s_b(z, 2, s_1))) goto lab0; { int ret = slice_del(z); /* delete, line 66 */ if (ret < 0) return ret; } lab0: z->c = z->l - m1; } { int mlimit; /* setlimit, line 67 */ int m2 = z->l - z->c; (void)m2; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 67 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m2; z->ket = z->c; /* [, line 67 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_2, 5); /* substring, line 67 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 67 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 70 */ if (ret < 0) return ret; } { int m3 = z->l - z->c; (void)m3; /* do, line 70 */ { int ret = r_consonant_pair(z); if (ret == 0) goto lab1; /* call consonant_pair, line 70 */ if (ret < 0) return ret; } lab1: z->c = z->l - m3; } break; case 2: { int ret = slice_from_s(z, 4, s_2); /* <-, line 72 */ if (ret < 0) return ret; } break; } return 1; } static int r_undouble(struct SN_env * z) { { int mlimit; /* setlimit, line 76 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 76 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 76 */ if (out_grouping_b_U(z, g_v, 97, 248, 0)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 76 */ z->S[0] = slice_to(z, z->S[0]); /* -> ch, line 76 */ if (z->S[0] == 0) return -1; /* -> ch, line 76 */ z->lb = mlimit; } if (!(eq_v_b(z, z->S[0]))) return 0; /* name ch, line 77 */ { int ret = slice_del(z); /* delete, line 78 */ if (ret < 0) return ret; } return 1; } extern int danish_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 84 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 84 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 85 */ { int m2 = z->l - z->c; (void)m2; /* do, line 86 */ { int ret = r_main_suffix(z); if (ret == 0) goto lab1; /* call main_suffix, line 86 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 87 */ { int ret = r_consonant_pair(z); if (ret == 0) goto lab2; /* call consonant_pair, line 87 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 88 */ { int ret = r_other_suffix(z); if (ret == 0) goto lab3; /* call other_suffix, line 88 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 89 */ { int ret = r_undouble(z); if (ret == 0) goto lab4; /* call undouble, line 89 */ if (ret < 0) return ret; } lab4: z->c = z->l - m5; } z->c = z->lb; return 1; } extern struct SN_env * danish_UTF_8_create_env(void) { return SN_create_env(1, 2, 0); } extern void danish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 1); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_danish.h000066400000000000000000000004631456444476200305550ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* danish_UTF_8_create_env(void); extern void danish_UTF_8_close_env(struct SN_env* z); extern int danish_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_dutch.c000066400000000000000000000514251456444476200304150ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int dutch_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_standard_suffix(struct SN_env * z); static int r_undouble(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_en_ending(struct SN_env * z); static int r_e_ending(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * dutch_UTF_8_create_env(void); extern void dutch_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[2] = { 0xC3, 0xA1 }; static const symbol s_0_2[2] = { 0xC3, 0xA4 }; static const symbol s_0_3[2] = { 0xC3, 0xA9 }; static const symbol s_0_4[2] = { 0xC3, 0xAB }; static const symbol s_0_5[2] = { 0xC3, 0xAD }; static const symbol s_0_6[2] = { 0xC3, 0xAF }; static const symbol s_0_7[2] = { 0xC3, 0xB3 }; static const symbol s_0_8[2] = { 0xC3, 0xB6 }; static const symbol s_0_9[2] = { 0xC3, 0xBA }; static const symbol s_0_10[2] = { 0xC3, 0xBC }; static const struct among a_0[11] = { /* 0 */ { 0, 0, -1, 6, 0}, /* 1 */ { 2, s_0_1, 0, 1, 0}, /* 2 */ { 2, s_0_2, 0, 1, 0}, /* 3 */ { 2, s_0_3, 0, 2, 0}, /* 4 */ { 2, s_0_4, 0, 2, 0}, /* 5 */ { 2, s_0_5, 0, 3, 0}, /* 6 */ { 2, s_0_6, 0, 3, 0}, /* 7 */ { 2, s_0_7, 0, 4, 0}, /* 8 */ { 2, s_0_8, 0, 4, 0}, /* 9 */ { 2, s_0_9, 0, 5, 0}, /* 10 */ { 2, s_0_10, 0, 5, 0} }; static const symbol s_1_1[1] = { 'I' }; static const symbol s_1_2[1] = { 'Y' }; static const struct among a_1[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 1, s_1_1, 0, 2, 0}, /* 2 */ { 1, s_1_2, 0, 1, 0} }; static const symbol s_2_0[2] = { 'd', 'd' }; static const symbol s_2_1[2] = { 'k', 'k' }; static const symbol s_2_2[2] = { 't', 't' }; static const struct among a_2[3] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 2, s_2_1, -1, -1, 0}, /* 2 */ { 2, s_2_2, -1, -1, 0} }; static const symbol s_3_0[3] = { 'e', 'n', 'e' }; static const symbol s_3_1[2] = { 's', 'e' }; static const symbol s_3_2[2] = { 'e', 'n' }; static const symbol s_3_3[5] = { 'h', 'e', 'd', 'e', 'n' }; static const symbol s_3_4[1] = { 's' }; static const struct among a_3[5] = { /* 0 */ { 3, s_3_0, -1, 2, 0}, /* 1 */ { 2, s_3_1, -1, 3, 0}, /* 2 */ { 2, s_3_2, -1, 2, 0}, /* 3 */ { 5, s_3_3, 2, 1, 0}, /* 4 */ { 1, s_3_4, -1, 3, 0} }; static const symbol s_4_0[3] = { 'e', 'n', 'd' }; static const symbol s_4_1[2] = { 'i', 'g' }; static const symbol s_4_2[3] = { 'i', 'n', 'g' }; static const symbol s_4_3[4] = { 'l', 'i', 'j', 'k' }; static const symbol s_4_4[4] = { 'b', 'a', 'a', 'r' }; static const symbol s_4_5[3] = { 'b', 'a', 'r' }; static const struct among a_4[6] = { /* 0 */ { 3, s_4_0, -1, 1, 0}, /* 1 */ { 2, s_4_1, -1, 2, 0}, /* 2 */ { 3, s_4_2, -1, 1, 0}, /* 3 */ { 4, s_4_3, -1, 3, 0}, /* 4 */ { 4, s_4_4, -1, 4, 0}, /* 5 */ { 3, s_4_5, -1, 5, 0} }; static const symbol s_5_0[2] = { 'a', 'a' }; static const symbol s_5_1[2] = { 'e', 'e' }; static const symbol s_5_2[2] = { 'o', 'o' }; static const symbol s_5_3[2] = { 'u', 'u' }; static const struct among a_5[4] = { /* 0 */ { 2, s_5_0, -1, -1, 0}, /* 1 */ { 2, s_5_1, -1, -1, 0}, /* 2 */ { 2, s_5_2, -1, -1, 0}, /* 3 */ { 2, s_5_3, -1, -1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; static const unsigned char g_v_I[] = { 1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; static const unsigned char g_v_j[] = { 17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; static const symbol s_0[] = { 'a' }; static const symbol s_1[] = { 'e' }; static const symbol s_2[] = { 'i' }; static const symbol s_3[] = { 'o' }; static const symbol s_4[] = { 'u' }; static const symbol s_5[] = { 'y' }; static const symbol s_6[] = { 'Y' }; static const symbol s_7[] = { 'i' }; static const symbol s_8[] = { 'I' }; static const symbol s_9[] = { 'y' }; static const symbol s_10[] = { 'Y' }; static const symbol s_11[] = { 'y' }; static const symbol s_12[] = { 'i' }; static const symbol s_13[] = { 'e' }; static const symbol s_14[] = { 'g', 'e', 'm' }; static const symbol s_15[] = { 'h', 'e', 'i', 'd' }; static const symbol s_16[] = { 'h', 'e', 'i', 'd' }; static const symbol s_17[] = { 'c' }; static const symbol s_18[] = { 'e', 'n' }; static const symbol s_19[] = { 'i', 'g' }; static const symbol s_20[] = { 'e' }; static const symbol s_21[] = { 'e' }; static int r_prelude(struct SN_env * z) { int among_var; { int c_test = z->c; /* test, line 42 */ while(1) { /* repeat, line 42 */ int c1 = z->c; z->bra = z->c; /* [, line 43 */ if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 5 || !((340306450 >> (z->p[z->c + 1] & 0x1f)) & 1)) among_var = 6; else among_var = find_among(z, a_0, 11); /* substring, line 43 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 43 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_0); /* <-, line 45 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 47 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_2); /* <-, line 49 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_3); /* <-, line 51 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_4); /* <-, line 53 */ if (ret < 0) return ret; } break; case 6: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 54 */ } break; } continue; lab0: z->c = c1; break; } z->c = c_test; } { int c_keep = z->c; /* try, line 57 */ z->bra = z->c; /* [, line 57 */ if (!(eq_s(z, 1, s_5))) { z->c = c_keep; goto lab1; } z->ket = z->c; /* ], line 57 */ { int ret = slice_from_s(z, 1, s_6); /* <-, line 57 */ if (ret < 0) return ret; } lab1: ; } while(1) { /* repeat, line 58 */ int c2 = z->c; while(1) { /* goto, line 58 */ int c3 = z->c; if (in_grouping_U(z, g_v, 97, 232, 0)) goto lab3; z->bra = z->c; /* [, line 59 */ { int c4 = z->c; /* or, line 59 */ if (!(eq_s(z, 1, s_7))) goto lab5; z->ket = z->c; /* ], line 59 */ if (in_grouping_U(z, g_v, 97, 232, 0)) goto lab5; { int ret = slice_from_s(z, 1, s_8); /* <-, line 59 */ if (ret < 0) return ret; } goto lab4; lab5: z->c = c4; if (!(eq_s(z, 1, s_9))) goto lab3; z->ket = z->c; /* ], line 60 */ { int ret = slice_from_s(z, 1, s_10); /* <-, line 60 */ if (ret < 0) return ret; } } lab4: z->c = c3; break; lab3: z->c = c3; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab2; z->c = ret; /* goto, line 58 */ } } continue; lab2: z->c = c2; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; { /* gopast */ /* grouping v, line 69 */ int ret = out_grouping_U(z, g_v, 97, 232, 1); if (ret < 0) return 0; z->c += ret; } { /* gopast */ /* non v, line 69 */ int ret = in_grouping_U(z, g_v, 97, 232, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 69 */ /* try, line 70 */ if (!(z->I[0] < 3)) goto lab0; z->I[0] = 3; lab0: { /* gopast */ /* grouping v, line 71 */ int ret = out_grouping_U(z, g_v, 97, 232, 1); if (ret < 0) return 0; z->c += ret; } { /* gopast */ /* non v, line 71 */ int ret = in_grouping_U(z, g_v, 97, 232, 1); if (ret < 0) return 0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 71 */ return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 75 */ int c1 = z->c; z->bra = z->c; /* [, line 77 */ if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 89)) among_var = 3; else among_var = find_among(z, a_1, 3); /* substring, line 77 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 77 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */ if (ret < 0) return ret; } break; case 3: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 80 */ } break; } continue; lab0: z->c = c1; break; } return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_undouble(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 91 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1050640 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_2, 3))) return 0; /* among, line 91 */ z->c = z->l - m_test; } z->ket = z->c; /* [, line 91 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 91 */ } z->bra = z->c; /* ], line 91 */ { int ret = slice_del(z); /* delete, line 91 */ if (ret < 0) return ret; } return 1; } static int r_e_ending(struct SN_env * z) { z->B[0] = 0; /* unset e_found, line 95 */ z->ket = z->c; /* [, line 96 */ if (!(eq_s_b(z, 1, s_13))) return 0; z->bra = z->c; /* ], line 96 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 96 */ if (ret < 0) return ret; } { int m_test = z->l - z->c; /* test, line 96 */ if (out_grouping_b_U(z, g_v, 97, 232, 0)) return 0; z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 96 */ if (ret < 0) return ret; } z->B[0] = 1; /* set e_found, line 97 */ { int ret = r_undouble(z); if (ret == 0) return 0; /* call undouble, line 98 */ if (ret < 0) return ret; } return 1; } static int r_en_ending(struct SN_env * z) { { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 102 */ if (ret < 0) return ret; } { int m1 = z->l - z->c; (void)m1; /* and, line 102 */ if (out_grouping_b_U(z, g_v, 97, 232, 0)) return 0; z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* not, line 102 */ if (!(eq_s_b(z, 3, s_14))) goto lab0; return 0; lab0: z->c = z->l - m2; } } { int ret = slice_del(z); /* delete, line 102 */ if (ret < 0) return ret; } { int ret = r_undouble(z); if (ret == 0) return 0; /* call undouble, line 103 */ if (ret < 0) return ret; } return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; { int m1 = z->l - z->c; (void)m1; /* do, line 107 */ z->ket = z->c; /* [, line 108 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((540704 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0; among_var = find_among_b(z, a_3, 5); /* substring, line 108 */ if (!(among_var)) goto lab0; z->bra = z->c; /* ], line 108 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = r_R1(z); if (ret == 0) goto lab0; /* call R1, line 110 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 4, s_15); /* <-, line 110 */ if (ret < 0) return ret; } break; case 2: { int ret = r_en_ending(z); if (ret == 0) goto lab0; /* call en_ending, line 113 */ if (ret < 0) return ret; } break; case 3: { int ret = r_R1(z); if (ret == 0) goto lab0; /* call R1, line 116 */ if (ret < 0) return ret; } if (out_grouping_b_U(z, g_v_j, 97, 232, 0)) goto lab0; { int ret = slice_del(z); /* delete, line 116 */ if (ret < 0) return ret; } break; } lab0: z->c = z->l - m1; } { int m2 = z->l - z->c; (void)m2; /* do, line 120 */ { int ret = r_e_ending(z); if (ret == 0) goto lab1; /* call e_ending, line 120 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 122 */ z->ket = z->c; /* [, line 122 */ if (!(eq_s_b(z, 4, s_16))) goto lab2; z->bra = z->c; /* ], line 122 */ { int ret = r_R2(z); if (ret == 0) goto lab2; /* call R2, line 122 */ if (ret < 0) return ret; } { int m4 = z->l - z->c; (void)m4; /* not, line 122 */ if (!(eq_s_b(z, 1, s_17))) goto lab3; goto lab2; lab3: z->c = z->l - m4; } { int ret = slice_del(z); /* delete, line 122 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 123 */ if (!(eq_s_b(z, 2, s_18))) goto lab2; z->bra = z->c; /* ], line 123 */ { int ret = r_en_ending(z); if (ret == 0) goto lab2; /* call en_ending, line 123 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m5 = z->l - z->c; (void)m5; /* do, line 126 */ z->ket = z->c; /* [, line 127 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((264336 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab4; among_var = find_among_b(z, a_4, 6); /* substring, line 127 */ if (!(among_var)) goto lab4; z->bra = z->c; /* ], line 127 */ switch(among_var) { case 0: goto lab4; case 1: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 129 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 129 */ if (ret < 0) return ret; } { int m6 = z->l - z->c; (void)m6; /* or, line 130 */ z->ket = z->c; /* [, line 130 */ if (!(eq_s_b(z, 2, s_19))) goto lab6; z->bra = z->c; /* ], line 130 */ { int ret = r_R2(z); if (ret == 0) goto lab6; /* call R2, line 130 */ if (ret < 0) return ret; } { int m7 = z->l - z->c; (void)m7; /* not, line 130 */ if (!(eq_s_b(z, 1, s_20))) goto lab7; goto lab6; lab7: z->c = z->l - m7; } { int ret = slice_del(z); /* delete, line 130 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m6; { int ret = r_undouble(z); if (ret == 0) goto lab4; /* call undouble, line 130 */ if (ret < 0) return ret; } } lab5: break; case 2: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 133 */ if (ret < 0) return ret; } { int m8 = z->l - z->c; (void)m8; /* not, line 133 */ if (!(eq_s_b(z, 1, s_21))) goto lab8; goto lab4; lab8: z->c = z->l - m8; } { int ret = slice_del(z); /* delete, line 133 */ if (ret < 0) return ret; } break; case 3: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 136 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 136 */ if (ret < 0) return ret; } { int ret = r_e_ending(z); if (ret == 0) goto lab4; /* call e_ending, line 136 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 139 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 139 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R2(z); if (ret == 0) goto lab4; /* call R2, line 142 */ if (ret < 0) return ret; } if (!(z->B[0])) goto lab4; /* Boolean test e_found, line 142 */ { int ret = slice_del(z); /* delete, line 142 */ if (ret < 0) return ret; } break; } lab4: z->c = z->l - m5; } { int m9 = z->l - z->c; (void)m9; /* do, line 146 */ if (out_grouping_b_U(z, g_v_I, 73, 232, 0)) goto lab9; { int m_test = z->l - z->c; /* test, line 148 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((2129954 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab9; if (!(find_among_b(z, a_5, 4))) goto lab9; /* among, line 149 */ if (out_grouping_b_U(z, g_v, 97, 232, 0)) goto lab9; z->c = z->l - m_test; } z->ket = z->c; /* [, line 152 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) goto lab9; z->c = ret; /* next, line 152 */ } z->bra = z->c; /* ], line 152 */ { int ret = slice_del(z); /* delete, line 152 */ if (ret < 0) return ret; } lab9: z->c = z->l - m9; } return 1; } extern int dutch_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 159 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 159 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 160 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 160 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 161 */ { int m3 = z->l - z->c; (void)m3; /* do, line 162 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab2; /* call standard_suffix, line 162 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } z->c = z->lb; { int c4 = z->c; /* do, line 163 */ { int ret = r_postlude(z); if (ret == 0) goto lab3; /* call postlude, line 163 */ if (ret < 0) return ret; } lab3: z->c = c4; } return 1; } extern struct SN_env * dutch_UTF_8_create_env(void) { return SN_create_env(0, 2, 1); } extern void dutch_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_dutch.h000066400000000000000000000004601456444476200304130ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* dutch_UTF_8_create_env(void); extern void dutch_UTF_8_close_env(struct SN_env* z); extern int dutch_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_english.c000066400000000000000000001142601456444476200307340ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int english_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_exception2(struct SN_env * z); static int r_exception1(struct SN_env * z); static int r_Step_5(struct SN_env * z); static int r_Step_4(struct SN_env * z); static int r_Step_3(struct SN_env * z); static int r_Step_2(struct SN_env * z); static int r_Step_1c(struct SN_env * z); static int r_Step_1b(struct SN_env * z); static int r_Step_1a(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_shortv(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * english_UTF_8_create_env(void); extern void english_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[5] = { 'a', 'r', 's', 'e', 'n' }; static const symbol s_0_1[6] = { 'c', 'o', 'm', 'm', 'u', 'n' }; static const symbol s_0_2[5] = { 'g', 'e', 'n', 'e', 'r' }; static const struct among a_0[3] = { /* 0 */ { 5, s_0_0, -1, -1, 0}, /* 1 */ { 6, s_0_1, -1, -1, 0}, /* 2 */ { 5, s_0_2, -1, -1, 0} }; static const symbol s_1_0[1] = { '\'' }; static const symbol s_1_1[3] = { '\'', 's', '\'' }; static const symbol s_1_2[2] = { '\'', 's' }; static const struct among a_1[3] = { /* 0 */ { 1, s_1_0, -1, 1, 0}, /* 1 */ { 3, s_1_1, 0, 1, 0}, /* 2 */ { 2, s_1_2, -1, 1, 0} }; static const symbol s_2_0[3] = { 'i', 'e', 'd' }; static const symbol s_2_1[1] = { 's' }; static const symbol s_2_2[3] = { 'i', 'e', 's' }; static const symbol s_2_3[4] = { 's', 's', 'e', 's' }; static const symbol s_2_4[2] = { 's', 's' }; static const symbol s_2_5[2] = { 'u', 's' }; static const struct among a_2[6] = { /* 0 */ { 3, s_2_0, -1, 2, 0}, /* 1 */ { 1, s_2_1, -1, 3, 0}, /* 2 */ { 3, s_2_2, 1, 2, 0}, /* 3 */ { 4, s_2_3, 1, 1, 0}, /* 4 */ { 2, s_2_4, 1, -1, 0}, /* 5 */ { 2, s_2_5, 1, -1, 0} }; static const symbol s_3_1[2] = { 'b', 'b' }; static const symbol s_3_2[2] = { 'd', 'd' }; static const symbol s_3_3[2] = { 'f', 'f' }; static const symbol s_3_4[2] = { 'g', 'g' }; static const symbol s_3_5[2] = { 'b', 'l' }; static const symbol s_3_6[2] = { 'm', 'm' }; static const symbol s_3_7[2] = { 'n', 'n' }; static const symbol s_3_8[2] = { 'p', 'p' }; static const symbol s_3_9[2] = { 'r', 'r' }; static const symbol s_3_10[2] = { 'a', 't' }; static const symbol s_3_11[2] = { 't', 't' }; static const symbol s_3_12[2] = { 'i', 'z' }; static const struct among a_3[13] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 2, s_3_1, 0, 2, 0}, /* 2 */ { 2, s_3_2, 0, 2, 0}, /* 3 */ { 2, s_3_3, 0, 2, 0}, /* 4 */ { 2, s_3_4, 0, 2, 0}, /* 5 */ { 2, s_3_5, 0, 1, 0}, /* 6 */ { 2, s_3_6, 0, 2, 0}, /* 7 */ { 2, s_3_7, 0, 2, 0}, /* 8 */ { 2, s_3_8, 0, 2, 0}, /* 9 */ { 2, s_3_9, 0, 2, 0}, /* 10 */ { 2, s_3_10, 0, 1, 0}, /* 11 */ { 2, s_3_11, 0, 2, 0}, /* 12 */ { 2, s_3_12, 0, 1, 0} }; static const symbol s_4_0[2] = { 'e', 'd' }; static const symbol s_4_1[3] = { 'e', 'e', 'd' }; static const symbol s_4_2[3] = { 'i', 'n', 'g' }; static const symbol s_4_3[4] = { 'e', 'd', 'l', 'y' }; static const symbol s_4_4[5] = { 'e', 'e', 'd', 'l', 'y' }; static const symbol s_4_5[5] = { 'i', 'n', 'g', 'l', 'y' }; static const struct among a_4[6] = { /* 0 */ { 2, s_4_0, -1, 2, 0}, /* 1 */ { 3, s_4_1, 0, 1, 0}, /* 2 */ { 3, s_4_2, -1, 2, 0}, /* 3 */ { 4, s_4_3, -1, 2, 0}, /* 4 */ { 5, s_4_4, 3, 1, 0}, /* 5 */ { 5, s_4_5, -1, 2, 0} }; static const symbol s_5_0[4] = { 'a', 'n', 'c', 'i' }; static const symbol s_5_1[4] = { 'e', 'n', 'c', 'i' }; static const symbol s_5_2[3] = { 'o', 'g', 'i' }; static const symbol s_5_3[2] = { 'l', 'i' }; static const symbol s_5_4[3] = { 'b', 'l', 'i' }; static const symbol s_5_5[4] = { 'a', 'b', 'l', 'i' }; static const symbol s_5_6[4] = { 'a', 'l', 'l', 'i' }; static const symbol s_5_7[5] = { 'f', 'u', 'l', 'l', 'i' }; static const symbol s_5_8[6] = { 'l', 'e', 's', 's', 'l', 'i' }; static const symbol s_5_9[5] = { 'o', 'u', 's', 'l', 'i' }; static const symbol s_5_10[5] = { 'e', 'n', 't', 'l', 'i' }; static const symbol s_5_11[5] = { 'a', 'l', 'i', 't', 'i' }; static const symbol s_5_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' }; static const symbol s_5_13[5] = { 'i', 'v', 'i', 't', 'i' }; static const symbol s_5_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_5_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_5_16[5] = { 'a', 'l', 'i', 's', 'm' }; static const symbol s_5_17[5] = { 'a', 't', 'i', 'o', 'n' }; static const symbol s_5_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' }; static const symbol s_5_19[4] = { 'i', 'z', 'e', 'r' }; static const symbol s_5_20[4] = { 'a', 't', 'o', 'r' }; static const symbol s_5_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' }; static const symbol s_5_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' }; static const symbol s_5_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' }; static const struct among a_5[24] = { /* 0 */ { 4, s_5_0, -1, 3, 0}, /* 1 */ { 4, s_5_1, -1, 2, 0}, /* 2 */ { 3, s_5_2, -1, 13, 0}, /* 3 */ { 2, s_5_3, -1, 16, 0}, /* 4 */ { 3, s_5_4, 3, 12, 0}, /* 5 */ { 4, s_5_5, 4, 4, 0}, /* 6 */ { 4, s_5_6, 3, 8, 0}, /* 7 */ { 5, s_5_7, 3, 14, 0}, /* 8 */ { 6, s_5_8, 3, 15, 0}, /* 9 */ { 5, s_5_9, 3, 10, 0}, /* 10 */ { 5, s_5_10, 3, 5, 0}, /* 11 */ { 5, s_5_11, -1, 8, 0}, /* 12 */ { 6, s_5_12, -1, 12, 0}, /* 13 */ { 5, s_5_13, -1, 11, 0}, /* 14 */ { 6, s_5_14, -1, 1, 0}, /* 15 */ { 7, s_5_15, 14, 7, 0}, /* 16 */ { 5, s_5_16, -1, 8, 0}, /* 17 */ { 5, s_5_17, -1, 7, 0}, /* 18 */ { 7, s_5_18, 17, 6, 0}, /* 19 */ { 4, s_5_19, -1, 6, 0}, /* 20 */ { 4, s_5_20, -1, 7, 0}, /* 21 */ { 7, s_5_21, -1, 11, 0}, /* 22 */ { 7, s_5_22, -1, 9, 0}, /* 23 */ { 7, s_5_23, -1, 10, 0} }; static const symbol s_6_0[5] = { 'i', 'c', 'a', 't', 'e' }; static const symbol s_6_1[5] = { 'a', 't', 'i', 'v', 'e' }; static const symbol s_6_2[5] = { 'a', 'l', 'i', 'z', 'e' }; static const symbol s_6_3[5] = { 'i', 'c', 'i', 't', 'i' }; static const symbol s_6_4[4] = { 'i', 'c', 'a', 'l' }; static const symbol s_6_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_6_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_6_7[3] = { 'f', 'u', 'l' }; static const symbol s_6_8[4] = { 'n', 'e', 's', 's' }; static const struct among a_6[9] = { /* 0 */ { 5, s_6_0, -1, 4, 0}, /* 1 */ { 5, s_6_1, -1, 6, 0}, /* 2 */ { 5, s_6_2, -1, 3, 0}, /* 3 */ { 5, s_6_3, -1, 4, 0}, /* 4 */ { 4, s_6_4, -1, 4, 0}, /* 5 */ { 6, s_6_5, -1, 1, 0}, /* 6 */ { 7, s_6_6, 5, 2, 0}, /* 7 */ { 3, s_6_7, -1, 5, 0}, /* 8 */ { 4, s_6_8, -1, 5, 0} }; static const symbol s_7_0[2] = { 'i', 'c' }; static const symbol s_7_1[4] = { 'a', 'n', 'c', 'e' }; static const symbol s_7_2[4] = { 'e', 'n', 'c', 'e' }; static const symbol s_7_3[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_7_4[4] = { 'i', 'b', 'l', 'e' }; static const symbol s_7_5[3] = { 'a', 't', 'e' }; static const symbol s_7_6[3] = { 'i', 'v', 'e' }; static const symbol s_7_7[3] = { 'i', 'z', 'e' }; static const symbol s_7_8[3] = { 'i', 't', 'i' }; static const symbol s_7_9[2] = { 'a', 'l' }; static const symbol s_7_10[3] = { 'i', 's', 'm' }; static const symbol s_7_11[3] = { 'i', 'o', 'n' }; static const symbol s_7_12[2] = { 'e', 'r' }; static const symbol s_7_13[3] = { 'o', 'u', 's' }; static const symbol s_7_14[3] = { 'a', 'n', 't' }; static const symbol s_7_15[3] = { 'e', 'n', 't' }; static const symbol s_7_16[4] = { 'm', 'e', 'n', 't' }; static const symbol s_7_17[5] = { 'e', 'm', 'e', 'n', 't' }; static const struct among a_7[18] = { /* 0 */ { 2, s_7_0, -1, 1, 0}, /* 1 */ { 4, s_7_1, -1, 1, 0}, /* 2 */ { 4, s_7_2, -1, 1, 0}, /* 3 */ { 4, s_7_3, -1, 1, 0}, /* 4 */ { 4, s_7_4, -1, 1, 0}, /* 5 */ { 3, s_7_5, -1, 1, 0}, /* 6 */ { 3, s_7_6, -1, 1, 0}, /* 7 */ { 3, s_7_7, -1, 1, 0}, /* 8 */ { 3, s_7_8, -1, 1, 0}, /* 9 */ { 2, s_7_9, -1, 1, 0}, /* 10 */ { 3, s_7_10, -1, 1, 0}, /* 11 */ { 3, s_7_11, -1, 2, 0}, /* 12 */ { 2, s_7_12, -1, 1, 0}, /* 13 */ { 3, s_7_13, -1, 1, 0}, /* 14 */ { 3, s_7_14, -1, 1, 0}, /* 15 */ { 3, s_7_15, -1, 1, 0}, /* 16 */ { 4, s_7_16, 15, 1, 0}, /* 17 */ { 5, s_7_17, 16, 1, 0} }; static const symbol s_8_0[1] = { 'e' }; static const symbol s_8_1[1] = { 'l' }; static const struct among a_8[2] = { /* 0 */ { 1, s_8_0, -1, 1, 0}, /* 1 */ { 1, s_8_1, -1, 2, 0} }; static const symbol s_9_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' }; static const symbol s_9_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' }; static const symbol s_9_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' }; static const symbol s_9_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' }; static const symbol s_9_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' }; static const symbol s_9_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' }; static const symbol s_9_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' }; static const symbol s_9_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' }; static const struct among a_9[8] = { /* 0 */ { 7, s_9_0, -1, -1, 0}, /* 1 */ { 7, s_9_1, -1, -1, 0}, /* 2 */ { 6, s_9_2, -1, -1, 0}, /* 3 */ { 7, s_9_3, -1, -1, 0}, /* 4 */ { 6, s_9_4, -1, -1, 0}, /* 5 */ { 7, s_9_5, -1, -1, 0}, /* 6 */ { 7, s_9_6, -1, -1, 0}, /* 7 */ { 6, s_9_7, -1, -1, 0} }; static const symbol s_10_0[5] = { 'a', 'n', 'd', 'e', 's' }; static const symbol s_10_1[5] = { 'a', 't', 'l', 'a', 's' }; static const symbol s_10_2[4] = { 'b', 'i', 'a', 's' }; static const symbol s_10_3[6] = { 'c', 'o', 's', 'm', 'o', 's' }; static const symbol s_10_4[5] = { 'd', 'y', 'i', 'n', 'g' }; static const symbol s_10_5[5] = { 'e', 'a', 'r', 'l', 'y' }; static const symbol s_10_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' }; static const symbol s_10_7[4] = { 'h', 'o', 'w', 'e' }; static const symbol s_10_8[4] = { 'i', 'd', 'l', 'y' }; static const symbol s_10_9[5] = { 'l', 'y', 'i', 'n', 'g' }; static const symbol s_10_10[4] = { 'n', 'e', 'w', 's' }; static const symbol s_10_11[4] = { 'o', 'n', 'l', 'y' }; static const symbol s_10_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' }; static const symbol s_10_13[5] = { 's', 'k', 'i', 'e', 's' }; static const symbol s_10_14[4] = { 's', 'k', 'i', 's' }; static const symbol s_10_15[3] = { 's', 'k', 'y' }; static const symbol s_10_16[5] = { 't', 'y', 'i', 'n', 'g' }; static const symbol s_10_17[4] = { 'u', 'g', 'l', 'y' }; static const struct among a_10[18] = { /* 0 */ { 5, s_10_0, -1, -1, 0}, /* 1 */ { 5, s_10_1, -1, -1, 0}, /* 2 */ { 4, s_10_2, -1, -1, 0}, /* 3 */ { 6, s_10_3, -1, -1, 0}, /* 4 */ { 5, s_10_4, -1, 3, 0}, /* 5 */ { 5, s_10_5, -1, 9, 0}, /* 6 */ { 6, s_10_6, -1, 7, 0}, /* 7 */ { 4, s_10_7, -1, -1, 0}, /* 8 */ { 4, s_10_8, -1, 6, 0}, /* 9 */ { 5, s_10_9, -1, 4, 0}, /* 10 */ { 4, s_10_10, -1, -1, 0}, /* 11 */ { 4, s_10_11, -1, 10, 0}, /* 12 */ { 6, s_10_12, -1, 11, 0}, /* 13 */ { 5, s_10_13, -1, 2, 0}, /* 14 */ { 4, s_10_14, -1, 1, 0}, /* 15 */ { 3, s_10_15, -1, -1, 0}, /* 16 */ { 5, s_10_16, -1, 5, 0}, /* 17 */ { 4, s_10_17, -1, 8, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1 }; static const unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 }; static const unsigned char g_valid_LI[] = { 55, 141, 2 }; static const symbol s_0[] = { '\'' }; static const symbol s_1[] = { 'y' }; static const symbol s_2[] = { 'Y' }; static const symbol s_3[] = { 'y' }; static const symbol s_4[] = { 'Y' }; static const symbol s_5[] = { 's', 's' }; static const symbol s_6[] = { 'i' }; static const symbol s_7[] = { 'i', 'e' }; static const symbol s_8[] = { 'e', 'e' }; static const symbol s_9[] = { 'e' }; static const symbol s_10[] = { 'e' }; static const symbol s_11[] = { 'y' }; static const symbol s_12[] = { 'Y' }; static const symbol s_13[] = { 'i' }; static const symbol s_14[] = { 't', 'i', 'o', 'n' }; static const symbol s_15[] = { 'e', 'n', 'c', 'e' }; static const symbol s_16[] = { 'a', 'n', 'c', 'e' }; static const symbol s_17[] = { 'a', 'b', 'l', 'e' }; static const symbol s_18[] = { 'e', 'n', 't' }; static const symbol s_19[] = { 'i', 'z', 'e' }; static const symbol s_20[] = { 'a', 't', 'e' }; static const symbol s_21[] = { 'a', 'l' }; static const symbol s_22[] = { 'f', 'u', 'l' }; static const symbol s_23[] = { 'o', 'u', 's' }; static const symbol s_24[] = { 'i', 'v', 'e' }; static const symbol s_25[] = { 'b', 'l', 'e' }; static const symbol s_26[] = { 'l' }; static const symbol s_27[] = { 'o', 'g' }; static const symbol s_28[] = { 'f', 'u', 'l' }; static const symbol s_29[] = { 'l', 'e', 's', 's' }; static const symbol s_30[] = { 't', 'i', 'o', 'n' }; static const symbol s_31[] = { 'a', 't', 'e' }; static const symbol s_32[] = { 'a', 'l' }; static const symbol s_33[] = { 'i', 'c' }; static const symbol s_34[] = { 's' }; static const symbol s_35[] = { 't' }; static const symbol s_36[] = { 'l' }; static const symbol s_37[] = { 's', 'k', 'i' }; static const symbol s_38[] = { 's', 'k', 'y' }; static const symbol s_39[] = { 'd', 'i', 'e' }; static const symbol s_40[] = { 'l', 'i', 'e' }; static const symbol s_41[] = { 't', 'i', 'e' }; static const symbol s_42[] = { 'i', 'd', 'l' }; static const symbol s_43[] = { 'g', 'e', 'n', 't', 'l' }; static const symbol s_44[] = { 'u', 'g', 'l', 'i' }; static const symbol s_45[] = { 'e', 'a', 'r', 'l', 'i' }; static const symbol s_46[] = { 'o', 'n', 'l', 'i' }; static const symbol s_47[] = { 's', 'i', 'n', 'g', 'l' }; static const symbol s_48[] = { 'Y' }; static const symbol s_49[] = { 'y' }; static int r_prelude(struct SN_env * z) { z->B[0] = 0; /* unset Y_found, line 26 */ { int c1 = z->c; /* do, line 27 */ z->bra = z->c; /* [, line 27 */ if (!(eq_s(z, 1, s_0))) goto lab0; z->ket = z->c; /* ], line 27 */ { int ret = slice_del(z); /* delete, line 27 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 28 */ z->bra = z->c; /* [, line 28 */ if (!(eq_s(z, 1, s_1))) goto lab1; z->ket = z->c; /* ], line 28 */ { int ret = slice_from_s(z, 1, s_2); /* <-, line 28 */ if (ret < 0) return ret; } z->B[0] = 1; /* set Y_found, line 28 */ lab1: z->c = c2; } { int c3 = z->c; /* do, line 29 */ while(1) { /* repeat, line 29 */ int c4 = z->c; while(1) { /* goto, line 29 */ int c5 = z->c; if (in_grouping_U(z, g_v, 97, 121, 0)) goto lab4; z->bra = z->c; /* [, line 29 */ if (!(eq_s(z, 1, s_3))) goto lab4; z->ket = z->c; /* ], line 29 */ z->c = c5; break; lab4: z->c = c5; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab3; z->c = ret; /* goto, line 29 */ } } { int ret = slice_from_s(z, 1, s_4); /* <-, line 29 */ if (ret < 0) return ret; } z->B[0] = 1; /* set Y_found, line 29 */ continue; lab3: z->c = c4; break; } z->c = c3; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; { int c1 = z->c; /* do, line 35 */ { int c2 = z->c; /* or, line 41 */ if (z->c + 4 >= z->l || z->p[z->c + 4] >> 5 != 3 || !((2375680 >> (z->p[z->c + 4] & 0x1f)) & 1)) goto lab2; if (!(find_among(z, a_0, 3))) goto lab2; /* among, line 36 */ goto lab1; lab2: z->c = c2; { /* gopast */ /* grouping v, line 41 */ int ret = out_grouping_U(z, g_v, 97, 121, 1); if (ret < 0) goto lab0; z->c += ret; } { /* gopast */ /* non v, line 41 */ int ret = in_grouping_U(z, g_v, 97, 121, 1); if (ret < 0) goto lab0; z->c += ret; } } lab1: z->I[0] = z->c; /* setmark p1, line 42 */ { /* gopast */ /* grouping v, line 43 */ int ret = out_grouping_U(z, g_v, 97, 121, 1); if (ret < 0) goto lab0; z->c += ret; } { /* gopast */ /* non v, line 43 */ int ret = in_grouping_U(z, g_v, 97, 121, 1); if (ret < 0) goto lab0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 43 */ lab0: z->c = c1; } return 1; } static int r_shortv(struct SN_env * z) { { int m1 = z->l - z->c; (void)m1; /* or, line 51 */ if (out_grouping_b_U(z, g_v_WXY, 89, 121, 0)) goto lab1; if (in_grouping_b_U(z, g_v, 97, 121, 0)) goto lab1; if (out_grouping_b_U(z, g_v, 97, 121, 0)) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (out_grouping_b_U(z, g_v, 97, 121, 0)) return 0; if (in_grouping_b_U(z, g_v, 97, 121, 0)) return 0; if (z->c > z->lb) return 0; /* atlimit, line 52 */ } lab0: return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_Step_1a(struct SN_env * z) { int among_var; { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 59 */ z->ket = z->c; /* [, line 60 */ if (z->c <= z->lb || (z->p[z->c - 1] != 39 && z->p[z->c - 1] != 115)) { z->c = z->l - m_keep; goto lab0; } among_var = find_among_b(z, a_1, 3); /* substring, line 60 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 60 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab0; } case 1: { int ret = slice_del(z); /* delete, line 62 */ if (ret < 0) return ret; } break; } lab0: ; } z->ket = z->c; /* [, line 65 */ if (z->c <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 115)) return 0; among_var = find_among_b(z, a_2, 6); /* substring, line 65 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 65 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 2, s_5); /* <-, line 66 */ if (ret < 0) return ret; } break; case 2: { int m1 = z->l - z->c; (void)m1; /* or, line 68 */ { int ret = skip_utf8(z->p, z->c, z->lb, z->l, - 2); if (ret < 0) goto lab2; z->c = ret; /* hop, line 68 */ } { int ret = slice_from_s(z, 1, s_6); /* <-, line 68 */ if (ret < 0) return ret; } goto lab1; lab2: z->c = z->l - m1; { int ret = slice_from_s(z, 2, s_7); /* <-, line 68 */ if (ret < 0) return ret; } } lab1: break; case 3: { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 69 */ } { /* gopast */ /* grouping v, line 69 */ int ret = out_grouping_b_U(z, g_v, 97, 121, 1); if (ret < 0) return 0; z->c -= ret; } { int ret = slice_del(z); /* delete, line 69 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_1b(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 75 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((33554576 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_4, 6); /* substring, line 75 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 75 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 77 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 2, s_8); /* <-, line 77 */ if (ret < 0) return ret; } break; case 2: { int m_test = z->l - z->c; /* test, line 80 */ { /* gopast */ /* grouping v, line 80 */ int ret = out_grouping_b_U(z, g_v, 97, 121, 1); if (ret < 0) return 0; z->c -= ret; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 80 */ if (ret < 0) return ret; } { int m_test = z->l - z->c; /* test, line 81 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68514004 >> (z->p[z->c - 1] & 0x1f)) & 1)) among_var = 3; else among_var = find_among_b(z, a_3, 13); /* substring, line 81 */ if (!(among_var)) return 0; z->c = z->l - m_test; } switch(among_var) { case 0: return 0; case 1: { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_9); /* <+, line 83 */ z->c = c_keep; if (ret < 0) return ret; } break; case 2: z->ket = z->c; /* [, line 86 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 86 */ } z->bra = z->c; /* ], line 86 */ { int ret = slice_del(z); /* delete, line 86 */ if (ret < 0) return ret; } break; case 3: if (z->c != z->I[0]) return 0; /* atmark, line 87 */ { int m_test = z->l - z->c; /* test, line 87 */ { int ret = r_shortv(z); if (ret == 0) return 0; /* call shortv, line 87 */ if (ret < 0) return ret; } z->c = z->l - m_test; } { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_10); /* <+, line 87 */ z->c = c_keep; if (ret < 0) return ret; } break; } break; } return 1; } static int r_Step_1c(struct SN_env * z) { z->ket = z->c; /* [, line 94 */ { int m1 = z->l - z->c; (void)m1; /* or, line 94 */ if (!(eq_s_b(z, 1, s_11))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_12))) return 0; } lab0: z->bra = z->c; /* ], line 94 */ if (out_grouping_b_U(z, g_v, 97, 121, 0)) return 0; { int m2 = z->l - z->c; (void)m2; /* not, line 95 */ if (z->c > z->lb) goto lab2; /* atlimit, line 95 */ return 0; lab2: z->c = z->l - m2; } { int ret = slice_from_s(z, 1, s_13); /* <-, line 96 */ if (ret < 0) return ret; } return 1; } static int r_Step_2(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 100 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((815616 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_5, 24); /* substring, line 100 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 100 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 100 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 4, s_14); /* <-, line 101 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 4, s_15); /* <-, line 102 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 4, s_16); /* <-, line 103 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 4, s_17); /* <-, line 104 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 3, s_18); /* <-, line 105 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 3, s_19); /* <-, line 107 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 3, s_20); /* <-, line 109 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 2, s_21); /* <-, line 111 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_from_s(z, 3, s_22); /* <-, line 112 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_from_s(z, 3, s_23); /* <-, line 114 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_from_s(z, 3, s_24); /* <-, line 116 */ if (ret < 0) return ret; } break; case 12: { int ret = slice_from_s(z, 3, s_25); /* <-, line 118 */ if (ret < 0) return ret; } break; case 13: if (!(eq_s_b(z, 1, s_26))) return 0; { int ret = slice_from_s(z, 2, s_27); /* <-, line 119 */ if (ret < 0) return ret; } break; case 14: { int ret = slice_from_s(z, 3, s_28); /* <-, line 120 */ if (ret < 0) return ret; } break; case 15: { int ret = slice_from_s(z, 4, s_29); /* <-, line 121 */ if (ret < 0) return ret; } break; case 16: if (in_grouping_b_U(z, g_valid_LI, 99, 116, 0)) return 0; { int ret = slice_del(z); /* delete, line 122 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_3(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 127 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((528928 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_6, 9); /* substring, line 127 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 127 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 127 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 4, s_30); /* <-, line 128 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 3, s_31); /* <-, line 129 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 2, s_32); /* <-, line 130 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 2, s_33); /* <-, line 132 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } break; case 6: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 136 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 136 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_4(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 141 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1864232 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_7, 18); /* substring, line 141 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 141 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 141 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 144 */ if (ret < 0) return ret; } break; case 2: { int m1 = z->l - z->c; (void)m1; /* or, line 145 */ if (!(eq_s_b(z, 1, s_34))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_35))) return 0; } lab0: { int ret = slice_del(z); /* delete, line 145 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_5(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 150 */ if (z->c <= z->lb || (z->p[z->c - 1] != 101 && z->p[z->c - 1] != 108)) return 0; among_var = find_among_b(z, a_8, 2); /* substring, line 150 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 150 */ switch(among_var) { case 0: return 0; case 1: { int m1 = z->l - z->c; (void)m1; /* or, line 151 */ { int ret = r_R2(z); if (ret == 0) goto lab1; /* call R2, line 151 */ if (ret < 0) return ret; } goto lab0; lab1: z->c = z->l - m1; { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 151 */ if (ret < 0) return ret; } { int m2 = z->l - z->c; (void)m2; /* not, line 151 */ { int ret = r_shortv(z); if (ret == 0) goto lab2; /* call shortv, line 151 */ if (ret < 0) return ret; } return 0; lab2: z->c = z->l - m2; } } lab0: { int ret = slice_del(z); /* delete, line 151 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 152 */ if (ret < 0) return ret; } if (!(eq_s_b(z, 1, s_36))) return 0; { int ret = slice_del(z); /* delete, line 152 */ if (ret < 0) return ret; } break; } return 1; } static int r_exception2(struct SN_env * z) { z->ket = z->c; /* [, line 158 */ if (z->c - 5 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 103)) return 0; if (!(find_among_b(z, a_9, 8))) return 0; /* substring, line 158 */ z->bra = z->c; /* ], line 158 */ if (z->c > z->lb) return 0; /* atlimit, line 158 */ return 1; } static int r_exception1(struct SN_env * z) { int among_var; z->bra = z->c; /* [, line 170 */ if (z->c + 2 >= z->l || z->p[z->c + 2] >> 5 != 3 || !((42750482 >> (z->p[z->c + 2] & 0x1f)) & 1)) return 0; among_var = find_among(z, a_10, 18); /* substring, line 170 */ if (!(among_var)) return 0; z->ket = z->c; /* ], line 170 */ if (z->c < z->l) return 0; /* atlimit, line 170 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 3, s_37); /* <-, line 174 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 3, s_38); /* <-, line 175 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 3, s_39); /* <-, line 176 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 3, s_40); /* <-, line 177 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 3, s_41); /* <-, line 178 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 3, s_42); /* <-, line 182 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 5, s_43); /* <-, line 183 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 4, s_44); /* <-, line 184 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_from_s(z, 5, s_45); /* <-, line 185 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_from_s(z, 4, s_46); /* <-, line 186 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_from_s(z, 5, s_47); /* <-, line 187 */ if (ret < 0) return ret; } break; } return 1; } static int r_postlude(struct SN_env * z) { if (!(z->B[0])) return 0; /* Boolean test Y_found, line 203 */ while(1) { /* repeat, line 203 */ int c1 = z->c; while(1) { /* goto, line 203 */ int c2 = z->c; z->bra = z->c; /* [, line 203 */ if (!(eq_s(z, 1, s_48))) goto lab1; z->ket = z->c; /* ], line 203 */ z->c = c2; break; lab1: z->c = c2; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* goto, line 203 */ } } { int ret = slice_from_s(z, 1, s_49); /* <-, line 203 */ if (ret < 0) return ret; } continue; lab0: z->c = c1; break; } return 1; } extern int english_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* or, line 207 */ { int ret = r_exception1(z); if (ret == 0) goto lab1; /* call exception1, line 207 */ if (ret < 0) return ret; } goto lab0; lab1: z->c = c1; { int c2 = z->c; /* not, line 208 */ { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); if (ret < 0) goto lab3; z->c = ret; /* hop, line 208 */ } goto lab2; lab3: z->c = c2; } goto lab0; lab2: z->c = c1; { int c3 = z->c; /* do, line 209 */ { int ret = r_prelude(z); if (ret == 0) goto lab4; /* call prelude, line 209 */ if (ret < 0) return ret; } lab4: z->c = c3; } { int c4 = z->c; /* do, line 210 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab5; /* call mark_regions, line 210 */ if (ret < 0) return ret; } lab5: z->c = c4; } z->lb = z->c; z->c = z->l; /* backwards, line 211 */ { int m5 = z->l - z->c; (void)m5; /* do, line 213 */ { int ret = r_Step_1a(z); if (ret == 0) goto lab6; /* call Step_1a, line 213 */ if (ret < 0) return ret; } lab6: z->c = z->l - m5; } { int m6 = z->l - z->c; (void)m6; /* or, line 215 */ { int ret = r_exception2(z); if (ret == 0) goto lab8; /* call exception2, line 215 */ if (ret < 0) return ret; } goto lab7; lab8: z->c = z->l - m6; { int m7 = z->l - z->c; (void)m7; /* do, line 217 */ { int ret = r_Step_1b(z); if (ret == 0) goto lab9; /* call Step_1b, line 217 */ if (ret < 0) return ret; } lab9: z->c = z->l - m7; } { int m8 = z->l - z->c; (void)m8; /* do, line 218 */ { int ret = r_Step_1c(z); if (ret == 0) goto lab10; /* call Step_1c, line 218 */ if (ret < 0) return ret; } lab10: z->c = z->l - m8; } { int m9 = z->l - z->c; (void)m9; /* do, line 220 */ { int ret = r_Step_2(z); if (ret == 0) goto lab11; /* call Step_2, line 220 */ if (ret < 0) return ret; } lab11: z->c = z->l - m9; } { int m10 = z->l - z->c; (void)m10; /* do, line 221 */ { int ret = r_Step_3(z); if (ret == 0) goto lab12; /* call Step_3, line 221 */ if (ret < 0) return ret; } lab12: z->c = z->l - m10; } { int m11 = z->l - z->c; (void)m11; /* do, line 222 */ { int ret = r_Step_4(z); if (ret == 0) goto lab13; /* call Step_4, line 222 */ if (ret < 0) return ret; } lab13: z->c = z->l - m11; } { int m12 = z->l - z->c; (void)m12; /* do, line 224 */ { int ret = r_Step_5(z); if (ret == 0) goto lab14; /* call Step_5, line 224 */ if (ret < 0) return ret; } lab14: z->c = z->l - m12; } } lab7: z->c = z->lb; { int c13 = z->c; /* do, line 227 */ { int ret = r_postlude(z); if (ret == 0) goto lab15; /* call postlude, line 227 */ if (ret < 0) return ret; } lab15: z->c = c13; } } lab0: return 1; } extern struct SN_env * english_UTF_8_create_env(void) { return SN_create_env(0, 2, 1); } extern void english_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_english.h000066400000000000000000000004661456444476200307430ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* english_UTF_8_create_env(void); extern void english_UTF_8_close_env(struct SN_env* z); extern int english_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_finnish.c000066400000000000000000000632731456444476200307500ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int finnish_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_tidy(struct SN_env * z); static int r_other_endings(struct SN_env * z); static int r_t_plural(struct SN_env * z); static int r_i_plural(struct SN_env * z); static int r_case_ending(struct SN_env * z); static int r_VI(struct SN_env * z); static int r_LONG(struct SN_env * z); static int r_possessive(struct SN_env * z); static int r_particle_etc(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * finnish_UTF_8_create_env(void); extern void finnish_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[2] = { 'p', 'a' }; static const symbol s_0_1[3] = { 's', 't', 'i' }; static const symbol s_0_2[4] = { 'k', 'a', 'a', 'n' }; static const symbol s_0_3[3] = { 'h', 'a', 'n' }; static const symbol s_0_4[3] = { 'k', 'i', 'n' }; static const symbol s_0_5[4] = { 'h', 0xC3, 0xA4, 'n' }; static const symbol s_0_6[6] = { 'k', 0xC3, 0xA4, 0xC3, 0xA4, 'n' }; static const symbol s_0_7[2] = { 'k', 'o' }; static const symbol s_0_8[3] = { 'p', 0xC3, 0xA4 }; static const symbol s_0_9[3] = { 'k', 0xC3, 0xB6 }; static const struct among a_0[10] = { /* 0 */ { 2, s_0_0, -1, 1, 0}, /* 1 */ { 3, s_0_1, -1, 2, 0}, /* 2 */ { 4, s_0_2, -1, 1, 0}, /* 3 */ { 3, s_0_3, -1, 1, 0}, /* 4 */ { 3, s_0_4, -1, 1, 0}, /* 5 */ { 4, s_0_5, -1, 1, 0}, /* 6 */ { 6, s_0_6, -1, 1, 0}, /* 7 */ { 2, s_0_7, -1, 1, 0}, /* 8 */ { 3, s_0_8, -1, 1, 0}, /* 9 */ { 3, s_0_9, -1, 1, 0} }; static const symbol s_1_0[3] = { 'l', 'l', 'a' }; static const symbol s_1_1[2] = { 'n', 'a' }; static const symbol s_1_2[3] = { 's', 's', 'a' }; static const symbol s_1_3[2] = { 't', 'a' }; static const symbol s_1_4[3] = { 'l', 't', 'a' }; static const symbol s_1_5[3] = { 's', 't', 'a' }; static const struct among a_1[6] = { /* 0 */ { 3, s_1_0, -1, -1, 0}, /* 1 */ { 2, s_1_1, -1, -1, 0}, /* 2 */ { 3, s_1_2, -1, -1, 0}, /* 3 */ { 2, s_1_3, -1, -1, 0}, /* 4 */ { 3, s_1_4, 3, -1, 0}, /* 5 */ { 3, s_1_5, 3, -1, 0} }; static const symbol s_2_0[4] = { 'l', 'l', 0xC3, 0xA4 }; static const symbol s_2_1[3] = { 'n', 0xC3, 0xA4 }; static const symbol s_2_2[4] = { 's', 's', 0xC3, 0xA4 }; static const symbol s_2_3[3] = { 't', 0xC3, 0xA4 }; static const symbol s_2_4[4] = { 'l', 't', 0xC3, 0xA4 }; static const symbol s_2_5[4] = { 's', 't', 0xC3, 0xA4 }; static const struct among a_2[6] = { /* 0 */ { 4, s_2_0, -1, -1, 0}, /* 1 */ { 3, s_2_1, -1, -1, 0}, /* 2 */ { 4, s_2_2, -1, -1, 0}, /* 3 */ { 3, s_2_3, -1, -1, 0}, /* 4 */ { 4, s_2_4, 3, -1, 0}, /* 5 */ { 4, s_2_5, 3, -1, 0} }; static const symbol s_3_0[3] = { 'l', 'l', 'e' }; static const symbol s_3_1[3] = { 'i', 'n', 'e' }; static const struct among a_3[2] = { /* 0 */ { 3, s_3_0, -1, -1, 0}, /* 1 */ { 3, s_3_1, -1, -1, 0} }; static const symbol s_4_0[3] = { 'n', 's', 'a' }; static const symbol s_4_1[3] = { 'm', 'm', 'e' }; static const symbol s_4_2[3] = { 'n', 'n', 'e' }; static const symbol s_4_3[2] = { 'n', 'i' }; static const symbol s_4_4[2] = { 's', 'i' }; static const symbol s_4_5[2] = { 'a', 'n' }; static const symbol s_4_6[2] = { 'e', 'n' }; static const symbol s_4_7[3] = { 0xC3, 0xA4, 'n' }; static const symbol s_4_8[4] = { 'n', 's', 0xC3, 0xA4 }; static const struct among a_4[9] = { /* 0 */ { 3, s_4_0, -1, 3, 0}, /* 1 */ { 3, s_4_1, -1, 3, 0}, /* 2 */ { 3, s_4_2, -1, 3, 0}, /* 3 */ { 2, s_4_3, -1, 2, 0}, /* 4 */ { 2, s_4_4, -1, 1, 0}, /* 5 */ { 2, s_4_5, -1, 4, 0}, /* 6 */ { 2, s_4_6, -1, 6, 0}, /* 7 */ { 3, s_4_7, -1, 5, 0}, /* 8 */ { 4, s_4_8, -1, 3, 0} }; static const symbol s_5_0[2] = { 'a', 'a' }; static const symbol s_5_1[2] = { 'e', 'e' }; static const symbol s_5_2[2] = { 'i', 'i' }; static const symbol s_5_3[2] = { 'o', 'o' }; static const symbol s_5_4[2] = { 'u', 'u' }; static const symbol s_5_5[4] = { 0xC3, 0xA4, 0xC3, 0xA4 }; static const symbol s_5_6[4] = { 0xC3, 0xB6, 0xC3, 0xB6 }; static const struct among a_5[7] = { /* 0 */ { 2, s_5_0, -1, -1, 0}, /* 1 */ { 2, s_5_1, -1, -1, 0}, /* 2 */ { 2, s_5_2, -1, -1, 0}, /* 3 */ { 2, s_5_3, -1, -1, 0}, /* 4 */ { 2, s_5_4, -1, -1, 0}, /* 5 */ { 4, s_5_5, -1, -1, 0}, /* 6 */ { 4, s_5_6, -1, -1, 0} }; static const symbol s_6_0[1] = { 'a' }; static const symbol s_6_1[3] = { 'l', 'l', 'a' }; static const symbol s_6_2[2] = { 'n', 'a' }; static const symbol s_6_3[3] = { 's', 's', 'a' }; static const symbol s_6_4[2] = { 't', 'a' }; static const symbol s_6_5[3] = { 'l', 't', 'a' }; static const symbol s_6_6[3] = { 's', 't', 'a' }; static const symbol s_6_7[3] = { 't', 't', 'a' }; static const symbol s_6_8[3] = { 'l', 'l', 'e' }; static const symbol s_6_9[3] = { 'i', 'n', 'e' }; static const symbol s_6_10[3] = { 'k', 's', 'i' }; static const symbol s_6_11[1] = { 'n' }; static const symbol s_6_12[3] = { 'h', 'a', 'n' }; static const symbol s_6_13[3] = { 'd', 'e', 'n' }; static const symbol s_6_14[4] = { 's', 'e', 'e', 'n' }; static const symbol s_6_15[3] = { 'h', 'e', 'n' }; static const symbol s_6_16[4] = { 't', 't', 'e', 'n' }; static const symbol s_6_17[3] = { 'h', 'i', 'n' }; static const symbol s_6_18[4] = { 's', 'i', 'i', 'n' }; static const symbol s_6_19[3] = { 'h', 'o', 'n' }; static const symbol s_6_20[4] = { 'h', 0xC3, 0xA4, 'n' }; static const symbol s_6_21[4] = { 'h', 0xC3, 0xB6, 'n' }; static const symbol s_6_22[2] = { 0xC3, 0xA4 }; static const symbol s_6_23[4] = { 'l', 'l', 0xC3, 0xA4 }; static const symbol s_6_24[3] = { 'n', 0xC3, 0xA4 }; static const symbol s_6_25[4] = { 's', 's', 0xC3, 0xA4 }; static const symbol s_6_26[3] = { 't', 0xC3, 0xA4 }; static const symbol s_6_27[4] = { 'l', 't', 0xC3, 0xA4 }; static const symbol s_6_28[4] = { 's', 't', 0xC3, 0xA4 }; static const symbol s_6_29[4] = { 't', 't', 0xC3, 0xA4 }; static const struct among a_6[30] = { /* 0 */ { 1, s_6_0, -1, 8, 0}, /* 1 */ { 3, s_6_1, 0, -1, 0}, /* 2 */ { 2, s_6_2, 0, -1, 0}, /* 3 */ { 3, s_6_3, 0, -1, 0}, /* 4 */ { 2, s_6_4, 0, -1, 0}, /* 5 */ { 3, s_6_5, 4, -1, 0}, /* 6 */ { 3, s_6_6, 4, -1, 0}, /* 7 */ { 3, s_6_7, 4, 9, 0}, /* 8 */ { 3, s_6_8, -1, -1, 0}, /* 9 */ { 3, s_6_9, -1, -1, 0}, /* 10 */ { 3, s_6_10, -1, -1, 0}, /* 11 */ { 1, s_6_11, -1, 7, 0}, /* 12 */ { 3, s_6_12, 11, 1, 0}, /* 13 */ { 3, s_6_13, 11, -1, r_VI}, /* 14 */ { 4, s_6_14, 11, -1, r_LONG}, /* 15 */ { 3, s_6_15, 11, 2, 0}, /* 16 */ { 4, s_6_16, 11, -1, r_VI}, /* 17 */ { 3, s_6_17, 11, 3, 0}, /* 18 */ { 4, s_6_18, 11, -1, r_VI}, /* 19 */ { 3, s_6_19, 11, 4, 0}, /* 20 */ { 4, s_6_20, 11, 5, 0}, /* 21 */ { 4, s_6_21, 11, 6, 0}, /* 22 */ { 2, s_6_22, -1, 8, 0}, /* 23 */ { 4, s_6_23, 22, -1, 0}, /* 24 */ { 3, s_6_24, 22, -1, 0}, /* 25 */ { 4, s_6_25, 22, -1, 0}, /* 26 */ { 3, s_6_26, 22, -1, 0}, /* 27 */ { 4, s_6_27, 26, -1, 0}, /* 28 */ { 4, s_6_28, 26, -1, 0}, /* 29 */ { 4, s_6_29, 26, 9, 0} }; static const symbol s_7_0[3] = { 'e', 'j', 'a' }; static const symbol s_7_1[3] = { 'm', 'm', 'a' }; static const symbol s_7_2[4] = { 'i', 'm', 'm', 'a' }; static const symbol s_7_3[3] = { 'm', 'p', 'a' }; static const symbol s_7_4[4] = { 'i', 'm', 'p', 'a' }; static const symbol s_7_5[3] = { 'm', 'm', 'i' }; static const symbol s_7_6[4] = { 'i', 'm', 'm', 'i' }; static const symbol s_7_7[3] = { 'm', 'p', 'i' }; static const symbol s_7_8[4] = { 'i', 'm', 'p', 'i' }; static const symbol s_7_9[4] = { 'e', 'j', 0xC3, 0xA4 }; static const symbol s_7_10[4] = { 'm', 'm', 0xC3, 0xA4 }; static const symbol s_7_11[5] = { 'i', 'm', 'm', 0xC3, 0xA4 }; static const symbol s_7_12[4] = { 'm', 'p', 0xC3, 0xA4 }; static const symbol s_7_13[5] = { 'i', 'm', 'p', 0xC3, 0xA4 }; static const struct among a_7[14] = { /* 0 */ { 3, s_7_0, -1, -1, 0}, /* 1 */ { 3, s_7_1, -1, 1, 0}, /* 2 */ { 4, s_7_2, 1, -1, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0}, /* 4 */ { 4, s_7_4, 3, -1, 0}, /* 5 */ { 3, s_7_5, -1, 1, 0}, /* 6 */ { 4, s_7_6, 5, -1, 0}, /* 7 */ { 3, s_7_7, -1, 1, 0}, /* 8 */ { 4, s_7_8, 7, -1, 0}, /* 9 */ { 4, s_7_9, -1, -1, 0}, /* 10 */ { 4, s_7_10, -1, 1, 0}, /* 11 */ { 5, s_7_11, 10, -1, 0}, /* 12 */ { 4, s_7_12, -1, 1, 0}, /* 13 */ { 5, s_7_13, 12, -1, 0} }; static const symbol s_8_0[1] = { 'i' }; static const symbol s_8_1[1] = { 'j' }; static const struct among a_8[2] = { /* 0 */ { 1, s_8_0, -1, -1, 0}, /* 1 */ { 1, s_8_1, -1, -1, 0} }; static const symbol s_9_0[3] = { 'm', 'm', 'a' }; static const symbol s_9_1[4] = { 'i', 'm', 'm', 'a' }; static const struct among a_9[2] = { /* 0 */ { 3, s_9_0, -1, 1, 0}, /* 1 */ { 4, s_9_1, 0, -1, 0} }; static const unsigned char g_AEI[] = { 17, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }; static const unsigned char g_V1[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; static const unsigned char g_V2[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; static const unsigned char g_particle_end[] = { 17, 97, 24, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; static const symbol s_0[] = { 'k' }; static const symbol s_1[] = { 'k', 's', 'e' }; static const symbol s_2[] = { 'k', 's', 'i' }; static const symbol s_3[] = { 'i' }; static const symbol s_4[] = { 'a' }; static const symbol s_5[] = { 'e' }; static const symbol s_6[] = { 'i' }; static const symbol s_7[] = { 'o' }; static const symbol s_8[] = { 0xC3, 0xA4 }; static const symbol s_9[] = { 0xC3, 0xB6 }; static const symbol s_10[] = { 'i', 'e' }; static const symbol s_11[] = { 'e' }; static const symbol s_12[] = { 'p', 'o' }; static const symbol s_13[] = { 't' }; static const symbol s_14[] = { 'p', 'o' }; static const symbol s_15[] = { 'j' }; static const symbol s_16[] = { 'o' }; static const symbol s_17[] = { 'u' }; static const symbol s_18[] = { 'o' }; static const symbol s_19[] = { 'j' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; if (out_grouping_U(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* grouping V1, line 46 */ { /* gopast */ /* non V1, line 46 */ int ret = in_grouping_U(z, g_V1, 97, 246, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 46 */ if (out_grouping_U(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* grouping V1, line 47 */ { /* gopast */ /* non V1, line 47 */ int ret = in_grouping_U(z, g_V1, 97, 246, 1); if (ret < 0) return 0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 47 */ return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_particle_etc(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 55 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 55 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 55 */ among_var = find_among_b(z, a_0, 10); /* substring, line 55 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 55 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: if (in_grouping_b_U(z, g_particle_end, 97, 246, 0)) return 0; break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 64 */ if (ret < 0) return ret; } break; } { int ret = slice_del(z); /* delete, line 66 */ if (ret < 0) return ret; } return 1; } static int r_possessive(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 69 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 69 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 69 */ among_var = find_among_b(z, a_4, 9); /* substring, line 69 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 69 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int m2 = z->l - z->c; (void)m2; /* not, line 72 */ if (!(eq_s_b(z, 1, s_0))) goto lab0; return 0; lab0: z->c = z->l - m2; } { int ret = slice_del(z); /* delete, line 72 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 74 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 74 */ if (!(eq_s_b(z, 3, s_1))) return 0; z->bra = z->c; /* ], line 74 */ { int ret = slice_from_s(z, 3, s_2); /* <-, line 74 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 78 */ if (ret < 0) return ret; } break; case 4: if (z->c - 1 <= z->lb || z->p[z->c - 1] != 97) return 0; if (!(find_among_b(z, a_1, 6))) return 0; /* among, line 81 */ { int ret = slice_del(z); /* delete, line 81 */ if (ret < 0) return ret; } break; case 5: if (z->c - 2 <= z->lb || z->p[z->c - 1] != 164) return 0; if (!(find_among_b(z, a_2, 6))) return 0; /* among, line 83 */ { int ret = slice_del(z); /* delete, line 84 */ if (ret < 0) return ret; } break; case 6: if (z->c - 2 <= z->lb || z->p[z->c - 1] != 101) return 0; if (!(find_among_b(z, a_3, 2))) return 0; /* among, line 86 */ { int ret = slice_del(z); /* delete, line 86 */ if (ret < 0) return ret; } break; } return 1; } static int r_LONG(struct SN_env * z) { if (!(find_among_b(z, a_5, 7))) return 0; /* among, line 91 */ return 1; } static int r_VI(struct SN_env * z) { if (!(eq_s_b(z, 1, s_3))) return 0; if (in_grouping_b_U(z, g_V2, 97, 246, 0)) return 0; return 1; } static int r_case_ending(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 96 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 96 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 96 */ among_var = find_among_b(z, a_6, 30); /* substring, line 96 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 96 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: if (!(eq_s_b(z, 1, s_4))) return 0; break; case 2: if (!(eq_s_b(z, 1, s_5))) return 0; break; case 3: if (!(eq_s_b(z, 1, s_6))) return 0; break; case 4: if (!(eq_s_b(z, 1, s_7))) return 0; break; case 5: if (!(eq_s_b(z, 2, s_8))) return 0; break; case 6: if (!(eq_s_b(z, 2, s_9))) return 0; break; case 7: { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ { int m2 = z->l - z->c; (void)m2; /* and, line 113 */ { int m3 = z->l - z->c; (void)m3; /* or, line 112 */ { int ret = r_LONG(z); if (ret == 0) goto lab2; /* call LONG, line 111 */ if (ret < 0) return ret; } goto lab1; lab2: z->c = z->l - m3; if (!(eq_s_b(z, 2, s_10))) { z->c = z->l - m_keep; goto lab0; } } lab1: z->c = z->l - m2; { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) { z->c = z->l - m_keep; goto lab0; } z->c = ret; /* next, line 113 */ } } z->bra = z->c; /* ], line 113 */ lab0: ; } break; case 8: if (in_grouping_b_U(z, g_V1, 97, 246, 0)) return 0; if (out_grouping_b_U(z, g_V1, 97, 246, 0)) return 0; break; case 9: if (!(eq_s_b(z, 1, s_11))) return 0; break; } { int ret = slice_del(z); /* delete, line 138 */ if (ret < 0) return ret; } z->B[0] = 1; /* set ending_removed, line 139 */ return 1; } static int r_other_endings(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 142 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[1]) return 0; z->c = z->I[1]; /* tomark, line 142 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 142 */ among_var = find_among_b(z, a_7, 14); /* substring, line 142 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 142 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int m2 = z->l - z->c; (void)m2; /* not, line 146 */ if (!(eq_s_b(z, 2, s_12))) goto lab0; return 0; lab0: z->c = z->l - m2; } break; } { int ret = slice_del(z); /* delete, line 151 */ if (ret < 0) return ret; } return 1; } static int r_i_plural(struct SN_env * z) { { int mlimit; /* setlimit, line 154 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 154 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 154 */ if (z->c <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 106)) { z->lb = mlimit; return 0; } if (!(find_among_b(z, a_8, 2))) { z->lb = mlimit; return 0; } /* substring, line 154 */ z->bra = z->c; /* ], line 154 */ z->lb = mlimit; } { int ret = slice_del(z); /* delete, line 158 */ if (ret < 0) return ret; } return 1; } static int r_t_plural(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 161 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 161 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 162 */ if (!(eq_s_b(z, 1, s_13))) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 162 */ { int m_test = z->l - z->c; /* test, line 162 */ if (in_grouping_b_U(z, g_V1, 97, 246, 0)) { z->lb = mlimit; return 0; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 163 */ if (ret < 0) return ret; } z->lb = mlimit; } { int mlimit; /* setlimit, line 165 */ int m2 = z->l - z->c; (void)m2; if (z->c < z->I[1]) return 0; z->c = z->I[1]; /* tomark, line 165 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m2; z->ket = z->c; /* [, line 165 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] != 97) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_9, 2); /* substring, line 165 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 165 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int m3 = z->l - z->c; (void)m3; /* not, line 167 */ if (!(eq_s_b(z, 2, s_14))) goto lab0; return 0; lab0: z->c = z->l - m3; } break; } { int ret = slice_del(z); /* delete, line 170 */ if (ret < 0) return ret; } return 1; } static int r_tidy(struct SN_env * z) { { int mlimit; /* setlimit, line 173 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 173 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* do, line 174 */ { int m3 = z->l - z->c; (void)m3; /* and, line 174 */ { int ret = r_LONG(z); if (ret == 0) goto lab0; /* call LONG, line 174 */ if (ret < 0) return ret; } z->c = z->l - m3; z->ket = z->c; /* [, line 174 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) goto lab0; z->c = ret; /* next, line 174 */ } z->bra = z->c; /* ], line 174 */ { int ret = slice_del(z); /* delete, line 174 */ if (ret < 0) return ret; } } lab0: z->c = z->l - m2; } { int m4 = z->l - z->c; (void)m4; /* do, line 175 */ z->ket = z->c; /* [, line 175 */ if (in_grouping_b_U(z, g_AEI, 97, 228, 0)) goto lab1; z->bra = z->c; /* ], line 175 */ if (out_grouping_b_U(z, g_V1, 97, 246, 0)) goto lab1; { int ret = slice_del(z); /* delete, line 175 */ if (ret < 0) return ret; } lab1: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 176 */ z->ket = z->c; /* [, line 176 */ if (!(eq_s_b(z, 1, s_15))) goto lab2; z->bra = z->c; /* ], line 176 */ { int m6 = z->l - z->c; (void)m6; /* or, line 176 */ if (!(eq_s_b(z, 1, s_16))) goto lab4; goto lab3; lab4: z->c = z->l - m6; if (!(eq_s_b(z, 1, s_17))) goto lab2; } lab3: { int ret = slice_del(z); /* delete, line 176 */ if (ret < 0) return ret; } lab2: z->c = z->l - m5; } { int m7 = z->l - z->c; (void)m7; /* do, line 177 */ z->ket = z->c; /* [, line 177 */ if (!(eq_s_b(z, 1, s_18))) goto lab5; z->bra = z->c; /* ], line 177 */ if (!(eq_s_b(z, 1, s_19))) goto lab5; { int ret = slice_del(z); /* delete, line 177 */ if (ret < 0) return ret; } lab5: z->c = z->l - m7; } z->lb = mlimit; } if (in_grouping_b_U(z, g_V1, 97, 246, 1) < 0) return 0; /* goto */ /* non V1, line 179 */ z->ket = z->c; /* [, line 179 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 179 */ } z->bra = z->c; /* ], line 179 */ z->S[0] = slice_to(z, z->S[0]); /* -> x, line 179 */ if (z->S[0] == 0) return -1; /* -> x, line 179 */ if (!(eq_v_b(z, z->S[0]))) return 0; /* name x, line 179 */ { int ret = slice_del(z); /* delete, line 179 */ if (ret < 0) return ret; } return 1; } extern int finnish_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 185 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 185 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->B[0] = 0; /* unset ending_removed, line 186 */ z->lb = z->c; z->c = z->l; /* backwards, line 187 */ { int m2 = z->l - z->c; (void)m2; /* do, line 188 */ { int ret = r_particle_etc(z); if (ret == 0) goto lab1; /* call particle_etc, line 188 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 189 */ { int ret = r_possessive(z); if (ret == 0) goto lab2; /* call possessive, line 189 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 190 */ { int ret = r_case_ending(z); if (ret == 0) goto lab3; /* call case_ending, line 190 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 191 */ { int ret = r_other_endings(z); if (ret == 0) goto lab4; /* call other_endings, line 191 */ if (ret < 0) return ret; } lab4: z->c = z->l - m5; } { int m6 = z->l - z->c; (void)m6; /* or, line 192 */ if (!(z->B[0])) goto lab6; /* Boolean test ending_removed, line 192 */ { int m7 = z->l - z->c; (void)m7; /* do, line 192 */ { int ret = r_i_plural(z); if (ret == 0) goto lab7; /* call i_plural, line 192 */ if (ret < 0) return ret; } lab7: z->c = z->l - m7; } goto lab5; lab6: z->c = z->l - m6; { int m8 = z->l - z->c; (void)m8; /* do, line 192 */ { int ret = r_t_plural(z); if (ret == 0) goto lab8; /* call t_plural, line 192 */ if (ret < 0) return ret; } lab8: z->c = z->l - m8; } } lab5: { int m9 = z->l - z->c; (void)m9; /* do, line 193 */ { int ret = r_tidy(z); if (ret == 0) goto lab9; /* call tidy, line 193 */ if (ret < 0) return ret; } lab9: z->c = z->l - m9; } z->c = z->lb; return 1; } extern struct SN_env * finnish_UTF_8_create_env(void) { return SN_create_env(1, 2, 1); } extern void finnish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 1); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_finnish.h000066400000000000000000000004661456444476200307500ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* finnish_UTF_8_create_env(void); extern void finnish_UTF_8_close_env(struct SN_env* z); extern int finnish_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_french.c000066400000000000000000001356041456444476200305550ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int french_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_un_accent(struct SN_env * z); static int r_un_double(struct SN_env * z); static int r_residual_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_i_verb_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * french_UTF_8_create_env(void); extern void french_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[3] = { 'c', 'o', 'l' }; static const symbol s_0_1[3] = { 'p', 'a', 'r' }; static const symbol s_0_2[3] = { 't', 'a', 'p' }; static const struct among a_0[3] = { /* 0 */ { 3, s_0_0, -1, -1, 0}, /* 1 */ { 3, s_0_1, -1, -1, 0}, /* 2 */ { 3, s_0_2, -1, -1, 0} }; static const symbol s_1_1[1] = { 'I' }; static const symbol s_1_2[1] = { 'U' }; static const symbol s_1_3[1] = { 'Y' }; static const struct among a_1[4] = { /* 0 */ { 0, 0, -1, 4, 0}, /* 1 */ { 1, s_1_1, 0, 1, 0}, /* 2 */ { 1, s_1_2, 0, 2, 0}, /* 3 */ { 1, s_1_3, 0, 3, 0} }; static const symbol s_2_0[3] = { 'i', 'q', 'U' }; static const symbol s_2_1[3] = { 'a', 'b', 'l' }; static const symbol s_2_2[4] = { 'I', 0xC3, 0xA8, 'r' }; static const symbol s_2_3[4] = { 'i', 0xC3, 0xA8, 'r' }; static const symbol s_2_4[3] = { 'e', 'u', 's' }; static const symbol s_2_5[2] = { 'i', 'v' }; static const struct among a_2[6] = { /* 0 */ { 3, s_2_0, -1, 3, 0}, /* 1 */ { 3, s_2_1, -1, 3, 0}, /* 2 */ { 4, s_2_2, -1, 4, 0}, /* 3 */ { 4, s_2_3, -1, 4, 0}, /* 4 */ { 3, s_2_4, -1, 2, 0}, /* 5 */ { 2, s_2_5, -1, 1, 0} }; static const symbol s_3_0[2] = { 'i', 'c' }; static const symbol s_3_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_3_2[2] = { 'i', 'v' }; static const struct among a_3[3] = { /* 0 */ { 2, s_3_0, -1, 2, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0}, /* 2 */ { 2, s_3_2, -1, 3, 0} }; static const symbol s_4_0[4] = { 'i', 'q', 'U', 'e' }; static const symbol s_4_1[6] = { 'a', 't', 'r', 'i', 'c', 'e' }; static const symbol s_4_2[4] = { 'a', 'n', 'c', 'e' }; static const symbol s_4_3[4] = { 'e', 'n', 'c', 'e' }; static const symbol s_4_4[5] = { 'l', 'o', 'g', 'i', 'e' }; static const symbol s_4_5[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_4_6[4] = { 'i', 's', 'm', 'e' }; static const symbol s_4_7[4] = { 'e', 'u', 's', 'e' }; static const symbol s_4_8[4] = { 'i', 's', 't', 'e' }; static const symbol s_4_9[3] = { 'i', 'v', 'e' }; static const symbol s_4_10[2] = { 'i', 'f' }; static const symbol s_4_11[5] = { 'u', 's', 'i', 'o', 'n' }; static const symbol s_4_12[5] = { 'a', 't', 'i', 'o', 'n' }; static const symbol s_4_13[5] = { 'u', 't', 'i', 'o', 'n' }; static const symbol s_4_14[5] = { 'a', 't', 'e', 'u', 'r' }; static const symbol s_4_15[5] = { 'i', 'q', 'U', 'e', 's' }; static const symbol s_4_16[7] = { 'a', 't', 'r', 'i', 'c', 'e', 's' }; static const symbol s_4_17[5] = { 'a', 'n', 'c', 'e', 's' }; static const symbol s_4_18[5] = { 'e', 'n', 'c', 'e', 's' }; static const symbol s_4_19[6] = { 'l', 'o', 'g', 'i', 'e', 's' }; static const symbol s_4_20[5] = { 'a', 'b', 'l', 'e', 's' }; static const symbol s_4_21[5] = { 'i', 's', 'm', 'e', 's' }; static const symbol s_4_22[5] = { 'e', 'u', 's', 'e', 's' }; static const symbol s_4_23[5] = { 'i', 's', 't', 'e', 's' }; static const symbol s_4_24[4] = { 'i', 'v', 'e', 's' }; static const symbol s_4_25[3] = { 'i', 'f', 's' }; static const symbol s_4_26[6] = { 'u', 's', 'i', 'o', 'n', 's' }; static const symbol s_4_27[6] = { 'a', 't', 'i', 'o', 'n', 's' }; static const symbol s_4_28[6] = { 'u', 't', 'i', 'o', 'n', 's' }; static const symbol s_4_29[6] = { 'a', 't', 'e', 'u', 'r', 's' }; static const symbol s_4_30[5] = { 'm', 'e', 'n', 't', 's' }; static const symbol s_4_31[6] = { 'e', 'm', 'e', 'n', 't', 's' }; static const symbol s_4_32[9] = { 'i', 's', 's', 'e', 'm', 'e', 'n', 't', 's' }; static const symbol s_4_33[5] = { 'i', 't', 0xC3, 0xA9, 's' }; static const symbol s_4_34[4] = { 'm', 'e', 'n', 't' }; static const symbol s_4_35[5] = { 'e', 'm', 'e', 'n', 't' }; static const symbol s_4_36[8] = { 'i', 's', 's', 'e', 'm', 'e', 'n', 't' }; static const symbol s_4_37[6] = { 'a', 'm', 'm', 'e', 'n', 't' }; static const symbol s_4_38[6] = { 'e', 'm', 'm', 'e', 'n', 't' }; static const symbol s_4_39[3] = { 'a', 'u', 'x' }; static const symbol s_4_40[4] = { 'e', 'a', 'u', 'x' }; static const symbol s_4_41[3] = { 'e', 'u', 'x' }; static const symbol s_4_42[4] = { 'i', 't', 0xC3, 0xA9 }; static const struct among a_4[43] = { /* 0 */ { 4, s_4_0, -1, 1, 0}, /* 1 */ { 6, s_4_1, -1, 2, 0}, /* 2 */ { 4, s_4_2, -1, 1, 0}, /* 3 */ { 4, s_4_3, -1, 5, 0}, /* 4 */ { 5, s_4_4, -1, 3, 0}, /* 5 */ { 4, s_4_5, -1, 1, 0}, /* 6 */ { 4, s_4_6, -1, 1, 0}, /* 7 */ { 4, s_4_7, -1, 11, 0}, /* 8 */ { 4, s_4_8, -1, 1, 0}, /* 9 */ { 3, s_4_9, -1, 8, 0}, /* 10 */ { 2, s_4_10, -1, 8, 0}, /* 11 */ { 5, s_4_11, -1, 4, 0}, /* 12 */ { 5, s_4_12, -1, 2, 0}, /* 13 */ { 5, s_4_13, -1, 4, 0}, /* 14 */ { 5, s_4_14, -1, 2, 0}, /* 15 */ { 5, s_4_15, -1, 1, 0}, /* 16 */ { 7, s_4_16, -1, 2, 0}, /* 17 */ { 5, s_4_17, -1, 1, 0}, /* 18 */ { 5, s_4_18, -1, 5, 0}, /* 19 */ { 6, s_4_19, -1, 3, 0}, /* 20 */ { 5, s_4_20, -1, 1, 0}, /* 21 */ { 5, s_4_21, -1, 1, 0}, /* 22 */ { 5, s_4_22, -1, 11, 0}, /* 23 */ { 5, s_4_23, -1, 1, 0}, /* 24 */ { 4, s_4_24, -1, 8, 0}, /* 25 */ { 3, s_4_25, -1, 8, 0}, /* 26 */ { 6, s_4_26, -1, 4, 0}, /* 27 */ { 6, s_4_27, -1, 2, 0}, /* 28 */ { 6, s_4_28, -1, 4, 0}, /* 29 */ { 6, s_4_29, -1, 2, 0}, /* 30 */ { 5, s_4_30, -1, 15, 0}, /* 31 */ { 6, s_4_31, 30, 6, 0}, /* 32 */ { 9, s_4_32, 31, 12, 0}, /* 33 */ { 5, s_4_33, -1, 7, 0}, /* 34 */ { 4, s_4_34, -1, 15, 0}, /* 35 */ { 5, s_4_35, 34, 6, 0}, /* 36 */ { 8, s_4_36, 35, 12, 0}, /* 37 */ { 6, s_4_37, 34, 13, 0}, /* 38 */ { 6, s_4_38, 34, 14, 0}, /* 39 */ { 3, s_4_39, -1, 10, 0}, /* 40 */ { 4, s_4_40, 39, 9, 0}, /* 41 */ { 3, s_4_41, -1, 1, 0}, /* 42 */ { 4, s_4_42, -1, 7, 0} }; static const symbol s_5_0[3] = { 'i', 'r', 'a' }; static const symbol s_5_1[2] = { 'i', 'e' }; static const symbol s_5_2[4] = { 'i', 's', 's', 'e' }; static const symbol s_5_3[7] = { 'i', 's', 's', 'a', 'n', 't', 'e' }; static const symbol s_5_4[1] = { 'i' }; static const symbol s_5_5[4] = { 'i', 'r', 'a', 'i' }; static const symbol s_5_6[2] = { 'i', 'r' }; static const symbol s_5_7[4] = { 'i', 'r', 'a', 's' }; static const symbol s_5_8[3] = { 'i', 'e', 's' }; static const symbol s_5_9[5] = { 0xC3, 0xAE, 'm', 'e', 's' }; static const symbol s_5_10[5] = { 'i', 's', 's', 'e', 's' }; static const symbol s_5_11[8] = { 'i', 's', 's', 'a', 'n', 't', 'e', 's' }; static const symbol s_5_12[5] = { 0xC3, 0xAE, 't', 'e', 's' }; static const symbol s_5_13[2] = { 'i', 's' }; static const symbol s_5_14[5] = { 'i', 'r', 'a', 'i', 's' }; static const symbol s_5_15[6] = { 'i', 's', 's', 'a', 'i', 's' }; static const symbol s_5_16[6] = { 'i', 'r', 'i', 'o', 'n', 's' }; static const symbol s_5_17[7] = { 'i', 's', 's', 'i', 'o', 'n', 's' }; static const symbol s_5_18[5] = { 'i', 'r', 'o', 'n', 's' }; static const symbol s_5_19[6] = { 'i', 's', 's', 'o', 'n', 's' }; static const symbol s_5_20[7] = { 'i', 's', 's', 'a', 'n', 't', 's' }; static const symbol s_5_21[2] = { 'i', 't' }; static const symbol s_5_22[5] = { 'i', 'r', 'a', 'i', 't' }; static const symbol s_5_23[6] = { 'i', 's', 's', 'a', 'i', 't' }; static const symbol s_5_24[6] = { 'i', 's', 's', 'a', 'n', 't' }; static const symbol s_5_25[7] = { 'i', 'r', 'a', 'I', 'e', 'n', 't' }; static const symbol s_5_26[8] = { 'i', 's', 's', 'a', 'I', 'e', 'n', 't' }; static const symbol s_5_27[5] = { 'i', 'r', 'e', 'n', 't' }; static const symbol s_5_28[6] = { 'i', 's', 's', 'e', 'n', 't' }; static const symbol s_5_29[5] = { 'i', 'r', 'o', 'n', 't' }; static const symbol s_5_30[3] = { 0xC3, 0xAE, 't' }; static const symbol s_5_31[5] = { 'i', 'r', 'i', 'e', 'z' }; static const symbol s_5_32[6] = { 'i', 's', 's', 'i', 'e', 'z' }; static const symbol s_5_33[4] = { 'i', 'r', 'e', 'z' }; static const symbol s_5_34[5] = { 'i', 's', 's', 'e', 'z' }; static const struct among a_5[35] = { /* 0 */ { 3, s_5_0, -1, 1, 0}, /* 1 */ { 2, s_5_1, -1, 1, 0}, /* 2 */ { 4, s_5_2, -1, 1, 0}, /* 3 */ { 7, s_5_3, -1, 1, 0}, /* 4 */ { 1, s_5_4, -1, 1, 0}, /* 5 */ { 4, s_5_5, 4, 1, 0}, /* 6 */ { 2, s_5_6, -1, 1, 0}, /* 7 */ { 4, s_5_7, -1, 1, 0}, /* 8 */ { 3, s_5_8, -1, 1, 0}, /* 9 */ { 5, s_5_9, -1, 1, 0}, /* 10 */ { 5, s_5_10, -1, 1, 0}, /* 11 */ { 8, s_5_11, -1, 1, 0}, /* 12 */ { 5, s_5_12, -1, 1, 0}, /* 13 */ { 2, s_5_13, -1, 1, 0}, /* 14 */ { 5, s_5_14, 13, 1, 0}, /* 15 */ { 6, s_5_15, 13, 1, 0}, /* 16 */ { 6, s_5_16, -1, 1, 0}, /* 17 */ { 7, s_5_17, -1, 1, 0}, /* 18 */ { 5, s_5_18, -1, 1, 0}, /* 19 */ { 6, s_5_19, -1, 1, 0}, /* 20 */ { 7, s_5_20, -1, 1, 0}, /* 21 */ { 2, s_5_21, -1, 1, 0}, /* 22 */ { 5, s_5_22, 21, 1, 0}, /* 23 */ { 6, s_5_23, 21, 1, 0}, /* 24 */ { 6, s_5_24, -1, 1, 0}, /* 25 */ { 7, s_5_25, -1, 1, 0}, /* 26 */ { 8, s_5_26, -1, 1, 0}, /* 27 */ { 5, s_5_27, -1, 1, 0}, /* 28 */ { 6, s_5_28, -1, 1, 0}, /* 29 */ { 5, s_5_29, -1, 1, 0}, /* 30 */ { 3, s_5_30, -1, 1, 0}, /* 31 */ { 5, s_5_31, -1, 1, 0}, /* 32 */ { 6, s_5_32, -1, 1, 0}, /* 33 */ { 4, s_5_33, -1, 1, 0}, /* 34 */ { 5, s_5_34, -1, 1, 0} }; static const symbol s_6_0[1] = { 'a' }; static const symbol s_6_1[3] = { 'e', 'r', 'a' }; static const symbol s_6_2[4] = { 'a', 's', 's', 'e' }; static const symbol s_6_3[4] = { 'a', 'n', 't', 'e' }; static const symbol s_6_4[3] = { 0xC3, 0xA9, 'e' }; static const symbol s_6_5[2] = { 'a', 'i' }; static const symbol s_6_6[4] = { 'e', 'r', 'a', 'i' }; static const symbol s_6_7[2] = { 'e', 'r' }; static const symbol s_6_8[2] = { 'a', 's' }; static const symbol s_6_9[4] = { 'e', 'r', 'a', 's' }; static const symbol s_6_10[5] = { 0xC3, 0xA2, 'm', 'e', 's' }; static const symbol s_6_11[5] = { 'a', 's', 's', 'e', 's' }; static const symbol s_6_12[5] = { 'a', 'n', 't', 'e', 's' }; static const symbol s_6_13[5] = { 0xC3, 0xA2, 't', 'e', 's' }; static const symbol s_6_14[4] = { 0xC3, 0xA9, 'e', 's' }; static const symbol s_6_15[3] = { 'a', 'i', 's' }; static const symbol s_6_16[5] = { 'e', 'r', 'a', 'i', 's' }; static const symbol s_6_17[4] = { 'i', 'o', 'n', 's' }; static const symbol s_6_18[6] = { 'e', 'r', 'i', 'o', 'n', 's' }; static const symbol s_6_19[7] = { 'a', 's', 's', 'i', 'o', 'n', 's' }; static const symbol s_6_20[5] = { 'e', 'r', 'o', 'n', 's' }; static const symbol s_6_21[4] = { 'a', 'n', 't', 's' }; static const symbol s_6_22[3] = { 0xC3, 0xA9, 's' }; static const symbol s_6_23[3] = { 'a', 'i', 't' }; static const symbol s_6_24[5] = { 'e', 'r', 'a', 'i', 't' }; static const symbol s_6_25[3] = { 'a', 'n', 't' }; static const symbol s_6_26[5] = { 'a', 'I', 'e', 'n', 't' }; static const symbol s_6_27[7] = { 'e', 'r', 'a', 'I', 'e', 'n', 't' }; static const symbol s_6_28[6] = { 0xC3, 0xA8, 'r', 'e', 'n', 't' }; static const symbol s_6_29[6] = { 'a', 's', 's', 'e', 'n', 't' }; static const symbol s_6_30[5] = { 'e', 'r', 'o', 'n', 't' }; static const symbol s_6_31[3] = { 0xC3, 0xA2, 't' }; static const symbol s_6_32[2] = { 'e', 'z' }; static const symbol s_6_33[3] = { 'i', 'e', 'z' }; static const symbol s_6_34[5] = { 'e', 'r', 'i', 'e', 'z' }; static const symbol s_6_35[6] = { 'a', 's', 's', 'i', 'e', 'z' }; static const symbol s_6_36[4] = { 'e', 'r', 'e', 'z' }; static const symbol s_6_37[2] = { 0xC3, 0xA9 }; static const struct among a_6[38] = { /* 0 */ { 1, s_6_0, -1, 3, 0}, /* 1 */ { 3, s_6_1, 0, 2, 0}, /* 2 */ { 4, s_6_2, -1, 3, 0}, /* 3 */ { 4, s_6_3, -1, 3, 0}, /* 4 */ { 3, s_6_4, -1, 2, 0}, /* 5 */ { 2, s_6_5, -1, 3, 0}, /* 6 */ { 4, s_6_6, 5, 2, 0}, /* 7 */ { 2, s_6_7, -1, 2, 0}, /* 8 */ { 2, s_6_8, -1, 3, 0}, /* 9 */ { 4, s_6_9, 8, 2, 0}, /* 10 */ { 5, s_6_10, -1, 3, 0}, /* 11 */ { 5, s_6_11, -1, 3, 0}, /* 12 */ { 5, s_6_12, -1, 3, 0}, /* 13 */ { 5, s_6_13, -1, 3, 0}, /* 14 */ { 4, s_6_14, -1, 2, 0}, /* 15 */ { 3, s_6_15, -1, 3, 0}, /* 16 */ { 5, s_6_16, 15, 2, 0}, /* 17 */ { 4, s_6_17, -1, 1, 0}, /* 18 */ { 6, s_6_18, 17, 2, 0}, /* 19 */ { 7, s_6_19, 17, 3, 0}, /* 20 */ { 5, s_6_20, -1, 2, 0}, /* 21 */ { 4, s_6_21, -1, 3, 0}, /* 22 */ { 3, s_6_22, -1, 2, 0}, /* 23 */ { 3, s_6_23, -1, 3, 0}, /* 24 */ { 5, s_6_24, 23, 2, 0}, /* 25 */ { 3, s_6_25, -1, 3, 0}, /* 26 */ { 5, s_6_26, -1, 3, 0}, /* 27 */ { 7, s_6_27, 26, 2, 0}, /* 28 */ { 6, s_6_28, -1, 2, 0}, /* 29 */ { 6, s_6_29, -1, 3, 0}, /* 30 */ { 5, s_6_30, -1, 2, 0}, /* 31 */ { 3, s_6_31, -1, 3, 0}, /* 32 */ { 2, s_6_32, -1, 2, 0}, /* 33 */ { 3, s_6_33, 32, 2, 0}, /* 34 */ { 5, s_6_34, 33, 2, 0}, /* 35 */ { 6, s_6_35, 33, 3, 0}, /* 36 */ { 4, s_6_36, 32, 2, 0}, /* 37 */ { 2, s_6_37, -1, 2, 0} }; static const symbol s_7_0[1] = { 'e' }; static const symbol s_7_1[5] = { 'I', 0xC3, 0xA8, 'r', 'e' }; static const symbol s_7_2[5] = { 'i', 0xC3, 0xA8, 'r', 'e' }; static const symbol s_7_3[3] = { 'i', 'o', 'n' }; static const symbol s_7_4[3] = { 'I', 'e', 'r' }; static const symbol s_7_5[3] = { 'i', 'e', 'r' }; static const symbol s_7_6[2] = { 0xC3, 0xAB }; static const struct among a_7[7] = { /* 0 */ { 1, s_7_0, -1, 3, 0}, /* 1 */ { 5, s_7_1, 0, 2, 0}, /* 2 */ { 5, s_7_2, 0, 2, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0}, /* 4 */ { 3, s_7_4, -1, 2, 0}, /* 5 */ { 3, s_7_5, -1, 2, 0}, /* 6 */ { 2, s_7_6, -1, 4, 0} }; static const symbol s_8_0[3] = { 'e', 'l', 'l' }; static const symbol s_8_1[4] = { 'e', 'i', 'l', 'l' }; static const symbol s_8_2[3] = { 'e', 'n', 'n' }; static const symbol s_8_3[3] = { 'o', 'n', 'n' }; static const symbol s_8_4[3] = { 'e', 't', 't' }; static const struct among a_8[5] = { /* 0 */ { 3, s_8_0, -1, -1, 0}, /* 1 */ { 4, s_8_1, -1, -1, 0}, /* 2 */ { 3, s_8_2, -1, -1, 0}, /* 3 */ { 3, s_8_3, -1, -1, 0}, /* 4 */ { 3, s_8_4, -1, -1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 130, 103, 8, 5 }; static const unsigned char g_keep_with_s[] = { 1, 65, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; static const symbol s_0[] = { 'u' }; static const symbol s_1[] = { 'U' }; static const symbol s_2[] = { 'i' }; static const symbol s_3[] = { 'I' }; static const symbol s_4[] = { 'y' }; static const symbol s_5[] = { 'Y' }; static const symbol s_6[] = { 'y' }; static const symbol s_7[] = { 'Y' }; static const symbol s_8[] = { 'q' }; static const symbol s_9[] = { 'u' }; static const symbol s_10[] = { 'U' }; static const symbol s_11[] = { 'i' }; static const symbol s_12[] = { 'u' }; static const symbol s_13[] = { 'y' }; static const symbol s_14[] = { 'i', 'c' }; static const symbol s_15[] = { 'i', 'q', 'U' }; static const symbol s_16[] = { 'l', 'o', 'g' }; static const symbol s_17[] = { 'u' }; static const symbol s_18[] = { 'e', 'n', 't' }; static const symbol s_19[] = { 'a', 't' }; static const symbol s_20[] = { 'e', 'u', 'x' }; static const symbol s_21[] = { 'i' }; static const symbol s_22[] = { 'a', 'b', 'l' }; static const symbol s_23[] = { 'i', 'q', 'U' }; static const symbol s_24[] = { 'a', 't' }; static const symbol s_25[] = { 'i', 'c' }; static const symbol s_26[] = { 'i', 'q', 'U' }; static const symbol s_27[] = { 'e', 'a', 'u' }; static const symbol s_28[] = { 'a', 'l' }; static const symbol s_29[] = { 'e', 'u', 'x' }; static const symbol s_30[] = { 'a', 'n', 't' }; static const symbol s_31[] = { 'e', 'n', 't' }; static const symbol s_32[] = { 'e' }; static const symbol s_33[] = { 's' }; static const symbol s_34[] = { 's' }; static const symbol s_35[] = { 't' }; static const symbol s_36[] = { 'i' }; static const symbol s_37[] = { 'g', 'u' }; static const symbol s_38[] = { 0xC3, 0xA9 }; static const symbol s_39[] = { 0xC3, 0xA8 }; static const symbol s_40[] = { 'e' }; static const symbol s_41[] = { 'Y' }; static const symbol s_42[] = { 'i' }; static const symbol s_43[] = { 0xC3, 0xA7 }; static const symbol s_44[] = { 'c' }; static int r_prelude(struct SN_env * z) { while(1) { /* repeat, line 38 */ int c1 = z->c; while(1) { /* goto, line 38 */ int c2 = z->c; { int c3 = z->c; /* or, line 44 */ if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab3; z->bra = z->c; /* [, line 40 */ { int c4 = z->c; /* or, line 40 */ if (!(eq_s(z, 1, s_0))) goto lab5; z->ket = z->c; /* ], line 40 */ if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab5; { int ret = slice_from_s(z, 1, s_1); /* <-, line 40 */ if (ret < 0) return ret; } goto lab4; lab5: z->c = c4; if (!(eq_s(z, 1, s_2))) goto lab6; z->ket = z->c; /* ], line 41 */ if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab6; { int ret = slice_from_s(z, 1, s_3); /* <-, line 41 */ if (ret < 0) return ret; } goto lab4; lab6: z->c = c4; if (!(eq_s(z, 1, s_4))) goto lab3; z->ket = z->c; /* ], line 42 */ { int ret = slice_from_s(z, 1, s_5); /* <-, line 42 */ if (ret < 0) return ret; } } lab4: goto lab2; lab3: z->c = c3; z->bra = z->c; /* [, line 45 */ if (!(eq_s(z, 1, s_6))) goto lab7; z->ket = z->c; /* ], line 45 */ if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab7; { int ret = slice_from_s(z, 1, s_7); /* <-, line 45 */ if (ret < 0) return ret; } goto lab2; lab7: z->c = c3; if (!(eq_s(z, 1, s_8))) goto lab1; z->bra = z->c; /* [, line 47 */ if (!(eq_s(z, 1, s_9))) goto lab1; z->ket = z->c; /* ], line 47 */ { int ret = slice_from_s(z, 1, s_10); /* <-, line 47 */ if (ret < 0) return ret; } } lab2: z->c = c2; break; lab1: z->c = c2; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* goto, line 38 */ } } continue; lab0: z->c = c1; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 56 */ { int c2 = z->c; /* or, line 58 */ if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab2; if (in_grouping_U(z, g_v, 97, 251, 0)) goto lab2; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab2; z->c = ret; /* next, line 57 */ } goto lab1; lab2: z->c = c2; if (z->c + 2 >= z->l || z->p[z->c + 2] >> 5 != 3 || !((331776 >> (z->p[z->c + 2] & 0x1f)) & 1)) goto lab3; if (!(find_among(z, a_0, 3))) goto lab3; /* among, line 59 */ goto lab1; lab3: z->c = c2; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 66 */ } { /* gopast */ /* grouping v, line 66 */ int ret = out_grouping_U(z, g_v, 97, 251, 1); if (ret < 0) goto lab0; z->c += ret; } } lab1: z->I[0] = z->c; /* setmark pV, line 67 */ lab0: z->c = c1; } { int c3 = z->c; /* do, line 69 */ { /* gopast */ /* grouping v, line 70 */ int ret = out_grouping_U(z, g_v, 97, 251, 1); if (ret < 0) goto lab4; z->c += ret; } { /* gopast */ /* non v, line 70 */ int ret = in_grouping_U(z, g_v, 97, 251, 1); if (ret < 0) goto lab4; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 70 */ { /* gopast */ /* grouping v, line 71 */ int ret = out_grouping_U(z, g_v, 97, 251, 1); if (ret < 0) goto lab4; z->c += ret; } { /* gopast */ /* non v, line 71 */ int ret = in_grouping_U(z, g_v, 97, 251, 1); if (ret < 0) goto lab4; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 71 */ lab4: z->c = c3; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 75 */ int c1 = z->c; z->bra = z->c; /* [, line 77 */ if (z->c >= z->l || z->p[z->c + 0] >> 5 != 2 || !((35652096 >> (z->p[z->c + 0] & 0x1f)) & 1)) among_var = 4; else among_var = find_among(z, a_1, 4); /* substring, line 77 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 77 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_11); /* <-, line 78 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_12); /* <-, line 79 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_13); /* <-, line 80 */ if (ret < 0) return ret; } break; case 4: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 81 */ } break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 92 */ among_var = find_among_b(z, a_4, 43); /* substring, line 92 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 92 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 96 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 96 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 99 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 99 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 100 */ z->ket = z->c; /* [, line 100 */ if (!(eq_s_b(z, 2, s_14))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 100 */ { int m1 = z->l - z->c; (void)m1; /* or, line 100 */ { int ret = r_R2(z); if (ret == 0) goto lab2; /* call R2, line 100 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 100 */ if (ret < 0) return ret; } goto lab1; lab2: z->c = z->l - m1; { int ret = slice_from_s(z, 3, s_15); /* <-, line 100 */ if (ret < 0) return ret; } } lab1: lab0: ; } break; case 3: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 104 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_16); /* <-, line 104 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 107 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_17); /* <-, line 107 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 110 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_18); /* <-, line 110 */ if (ret < 0) return ret; } break; case 6: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 114 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 114 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 115 */ z->ket = z->c; /* [, line 116 */ among_var = find_among_b(z, a_2, 6); /* substring, line 116 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 116 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab3; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 117 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 117 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 117 */ if (!(eq_s_b(z, 2, s_19))) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 117 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 117 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 117 */ if (ret < 0) return ret; } break; case 2: { int m2 = z->l - z->c; (void)m2; /* or, line 118 */ { int ret = r_R2(z); if (ret == 0) goto lab5; /* call R2, line 118 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 118 */ if (ret < 0) return ret; } goto lab4; lab5: z->c = z->l - m2; { int ret = r_R1(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R1, line 118 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_20); /* <-, line 118 */ if (ret < 0) return ret; } } lab4: break; case 3: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 120 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 120 */ if (ret < 0) return ret; } break; case 4: { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call RV, line 122 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_21); /* <-, line 122 */ if (ret < 0) return ret; } break; } lab3: ; } break; case 7: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 129 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 129 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 130 */ z->ket = z->c; /* [, line 131 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab6; } among_var = find_among_b(z, a_3, 3); /* substring, line 131 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab6; } z->bra = z->c; /* ], line 131 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab6; } case 1: { int m3 = z->l - z->c; (void)m3; /* or, line 132 */ { int ret = r_R2(z); if (ret == 0) goto lab8; /* call R2, line 132 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 132 */ if (ret < 0) return ret; } goto lab7; lab8: z->c = z->l - m3; { int ret = slice_from_s(z, 3, s_22); /* <-, line 132 */ if (ret < 0) return ret; } } lab7: break; case 2: { int m4 = z->l - z->c; (void)m4; /* or, line 133 */ { int ret = r_R2(z); if (ret == 0) goto lab10; /* call R2, line 133 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 133 */ if (ret < 0) return ret; } goto lab9; lab10: z->c = z->l - m4; { int ret = slice_from_s(z, 3, s_23); /* <-, line 133 */ if (ret < 0) return ret; } } lab9: break; case 3: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call R2, line 134 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } break; } lab6: ; } break; case 8: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 141 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 141 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 142 */ z->ket = z->c; /* [, line 142 */ if (!(eq_s_b(z, 2, s_24))) { z->c = z->l - m_keep; goto lab11; } z->bra = z->c; /* ], line 142 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab11; } /* call R2, line 142 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 142 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 142 */ if (!(eq_s_b(z, 2, s_25))) { z->c = z->l - m_keep; goto lab11; } z->bra = z->c; /* ], line 142 */ { int m5 = z->l - z->c; (void)m5; /* or, line 142 */ { int ret = r_R2(z); if (ret == 0) goto lab13; /* call R2, line 142 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 142 */ if (ret < 0) return ret; } goto lab12; lab13: z->c = z->l - m5; { int ret = slice_from_s(z, 3, s_26); /* <-, line 142 */ if (ret < 0) return ret; } } lab12: lab11: ; } break; case 9: { int ret = slice_from_s(z, 3, s_27); /* <-, line 144 */ if (ret < 0) return ret; } break; case 10: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 145 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 2, s_28); /* <-, line 145 */ if (ret < 0) return ret; } break; case 11: { int m6 = z->l - z->c; (void)m6; /* or, line 147 */ { int ret = r_R2(z); if (ret == 0) goto lab15; /* call R2, line 147 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 147 */ if (ret < 0) return ret; } goto lab14; lab15: z->c = z->l - m6; { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 147 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_29); /* <-, line 147 */ if (ret < 0) return ret; } } lab14: break; case 12: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 150 */ if (ret < 0) return ret; } if (out_grouping_b_U(z, g_v, 97, 251, 0)) return 0; { int ret = slice_del(z); /* delete, line 150 */ if (ret < 0) return ret; } break; case 13: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 155 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_30); /* <-, line 155 */ if (ret < 0) return ret; } return 0; /* fail, line 155 */ break; case 14: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 156 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_31); /* <-, line 156 */ if (ret < 0) return ret; } return 0; /* fail, line 156 */ break; case 15: { int m_test = z->l - z->c; /* test, line 158 */ if (in_grouping_b_U(z, g_v, 97, 251, 0)) return 0; { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 158 */ if (ret < 0) return ret; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 158 */ if (ret < 0) return ret; } return 0; /* fail, line 158 */ break; } return 1; } static int r_i_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 163 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 163 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 164 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68944418 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_5, 35); /* substring, line 164 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 164 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: if (out_grouping_b_U(z, g_v, 97, 251, 0)) { z->lb = mlimit; return 0; } { int ret = slice_del(z); /* delete, line 170 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 174 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 174 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 175 */ among_var = find_among_b(z, a_6, 38); /* substring, line 175 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 175 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = r_R2(z); if (ret == 0) { z->lb = mlimit; return 0; } /* call R2, line 177 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 177 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 185 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 190 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 191 */ z->ket = z->c; /* [, line 191 */ if (!(eq_s_b(z, 1, s_32))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 191 */ { int ret = slice_del(z); /* delete, line 191 */ if (ret < 0) return ret; } lab0: ; } break; } z->lb = mlimit; } return 1; } static int r_residual_suffix(struct SN_env * z) { int among_var; { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 199 */ z->ket = z->c; /* [, line 199 */ if (!(eq_s_b(z, 1, s_33))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 199 */ { int m_test = z->l - z->c; /* test, line 199 */ if (out_grouping_b_U(z, g_keep_with_s, 97, 232, 0)) { z->c = z->l - m_keep; goto lab0; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 199 */ if (ret < 0) return ret; } lab0: ; } { int mlimit; /* setlimit, line 200 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 200 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 201 */ among_var = find_among_b(z, a_7, 7); /* substring, line 201 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 201 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = r_R2(z); if (ret == 0) { z->lb = mlimit; return 0; } /* call R2, line 202 */ if (ret < 0) return ret; } { int m2 = z->l - z->c; (void)m2; /* or, line 202 */ if (!(eq_s_b(z, 1, s_34))) goto lab2; goto lab1; lab2: z->c = z->l - m2; if (!(eq_s_b(z, 1, s_35))) { z->lb = mlimit; return 0; } } lab1: { int ret = slice_del(z); /* delete, line 202 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_36); /* <-, line 204 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 205 */ if (ret < 0) return ret; } break; case 4: if (!(eq_s_b(z, 2, s_37))) { z->lb = mlimit; return 0; } { int ret = slice_del(z); /* delete, line 206 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_un_double(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 212 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1069056 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_8, 5))) return 0; /* among, line 212 */ z->c = z->l - m_test; } z->ket = z->c; /* [, line 212 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 212 */ } z->bra = z->c; /* ], line 212 */ { int ret = slice_del(z); /* delete, line 212 */ if (ret < 0) return ret; } return 1; } static int r_un_accent(struct SN_env * z) { { int i = 1; while(1) { /* atleast, line 216 */ if (out_grouping_b_U(z, g_v, 97, 251, 0)) goto lab0; i--; continue; lab0: break; } if (i > 0) return 0; } z->ket = z->c; /* [, line 217 */ { int m1 = z->l - z->c; (void)m1; /* or, line 217 */ if (!(eq_s_b(z, 2, s_38))) goto lab2; goto lab1; lab2: z->c = z->l - m1; if (!(eq_s_b(z, 2, s_39))) return 0; } lab1: z->bra = z->c; /* ], line 217 */ { int ret = slice_from_s(z, 1, s_40); /* <-, line 217 */ if (ret < 0) return ret; } return 1; } extern int french_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 223 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 223 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 224 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 224 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 225 */ { int m3 = z->l - z->c; (void)m3; /* do, line 227 */ { int m4 = z->l - z->c; (void)m4; /* or, line 237 */ { int m5 = z->l - z->c; (void)m5; /* and, line 233 */ { int m6 = z->l - z->c; (void)m6; /* or, line 229 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab6; /* call standard_suffix, line 229 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m6; { int ret = r_i_verb_suffix(z); if (ret == 0) goto lab7; /* call i_verb_suffix, line 230 */ if (ret < 0) return ret; } goto lab5; lab7: z->c = z->l - m6; { int ret = r_verb_suffix(z); if (ret == 0) goto lab4; /* call verb_suffix, line 231 */ if (ret < 0) return ret; } } lab5: z->c = z->l - m5; { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 234 */ z->ket = z->c; /* [, line 234 */ { int m7 = z->l - z->c; (void)m7; /* or, line 234 */ if (!(eq_s_b(z, 1, s_41))) goto lab10; z->bra = z->c; /* ], line 234 */ { int ret = slice_from_s(z, 1, s_42); /* <-, line 234 */ if (ret < 0) return ret; } goto lab9; lab10: z->c = z->l - m7; if (!(eq_s_b(z, 2, s_43))) { z->c = z->l - m_keep; goto lab8; } z->bra = z->c; /* ], line 235 */ { int ret = slice_from_s(z, 1, s_44); /* <-, line 235 */ if (ret < 0) return ret; } } lab9: lab8: ; } } goto lab3; lab4: z->c = z->l - m4; { int ret = r_residual_suffix(z); if (ret == 0) goto lab2; /* call residual_suffix, line 238 */ if (ret < 0) return ret; } } lab3: lab2: z->c = z->l - m3; } { int m8 = z->l - z->c; (void)m8; /* do, line 243 */ { int ret = r_un_double(z); if (ret == 0) goto lab11; /* call un_double, line 243 */ if (ret < 0) return ret; } lab11: z->c = z->l - m8; } { int m9 = z->l - z->c; (void)m9; /* do, line 244 */ { int ret = r_un_accent(z); if (ret == 0) goto lab12; /* call un_accent, line 244 */ if (ret < 0) return ret; } lab12: z->c = z->l - m9; } z->c = z->lb; { int c10 = z->c; /* do, line 246 */ { int ret = r_postlude(z); if (ret == 0) goto lab13; /* call postlude, line 246 */ if (ret < 0) return ret; } lab13: z->c = c10; } return 1; } extern struct SN_env * french_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); } extern void french_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_french.h000066400000000000000000000004631456444476200305540ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* french_UTF_8_create_env(void); extern void french_UTF_8_close_env(struct SN_env* z); extern int french_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_german.c000066400000000000000000000431111456444476200305500ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int german_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_standard_suffix(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * german_UTF_8_create_env(void); extern void german_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[1] = { 'U' }; static const symbol s_0_2[1] = { 'Y' }; static const symbol s_0_3[2] = { 0xC3, 0xA4 }; static const symbol s_0_4[2] = { 0xC3, 0xB6 }; static const symbol s_0_5[2] = { 0xC3, 0xBC }; static const struct among a_0[6] = { /* 0 */ { 0, 0, -1, 6, 0}, /* 1 */ { 1, s_0_1, 0, 2, 0}, /* 2 */ { 1, s_0_2, 0, 1, 0}, /* 3 */ { 2, s_0_3, 0, 3, 0}, /* 4 */ { 2, s_0_4, 0, 4, 0}, /* 5 */ { 2, s_0_5, 0, 5, 0} }; static const symbol s_1_0[1] = { 'e' }; static const symbol s_1_1[2] = { 'e', 'm' }; static const symbol s_1_2[2] = { 'e', 'n' }; static const symbol s_1_3[3] = { 'e', 'r', 'n' }; static const symbol s_1_4[2] = { 'e', 'r' }; static const symbol s_1_5[1] = { 's' }; static const symbol s_1_6[2] = { 'e', 's' }; static const struct among a_1[7] = { /* 0 */ { 1, s_1_0, -1, 2, 0}, /* 1 */ { 2, s_1_1, -1, 1, 0}, /* 2 */ { 2, s_1_2, -1, 2, 0}, /* 3 */ { 3, s_1_3, -1, 1, 0}, /* 4 */ { 2, s_1_4, -1, 1, 0}, /* 5 */ { 1, s_1_5, -1, 3, 0}, /* 6 */ { 2, s_1_6, 5, 2, 0} }; static const symbol s_2_0[2] = { 'e', 'n' }; static const symbol s_2_1[2] = { 'e', 'r' }; static const symbol s_2_2[2] = { 's', 't' }; static const symbol s_2_3[3] = { 'e', 's', 't' }; static const struct among a_2[4] = { /* 0 */ { 2, s_2_0, -1, 1, 0}, /* 1 */ { 2, s_2_1, -1, 1, 0}, /* 2 */ { 2, s_2_2, -1, 2, 0}, /* 3 */ { 3, s_2_3, 2, 1, 0} }; static const symbol s_3_0[2] = { 'i', 'g' }; static const symbol s_3_1[4] = { 'l', 'i', 'c', 'h' }; static const struct among a_3[2] = { /* 0 */ { 2, s_3_0, -1, 1, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0} }; static const symbol s_4_0[3] = { 'e', 'n', 'd' }; static const symbol s_4_1[2] = { 'i', 'g' }; static const symbol s_4_2[3] = { 'u', 'n', 'g' }; static const symbol s_4_3[4] = { 'l', 'i', 'c', 'h' }; static const symbol s_4_4[4] = { 'i', 's', 'c', 'h' }; static const symbol s_4_5[2] = { 'i', 'k' }; static const symbol s_4_6[4] = { 'h', 'e', 'i', 't' }; static const symbol s_4_7[4] = { 'k', 'e', 'i', 't' }; static const struct among a_4[8] = { /* 0 */ { 3, s_4_0, -1, 1, 0}, /* 1 */ { 2, s_4_1, -1, 2, 0}, /* 2 */ { 3, s_4_2, -1, 1, 0}, /* 3 */ { 4, s_4_3, -1, 3, 0}, /* 4 */ { 4, s_4_4, -1, 2, 0}, /* 5 */ { 2, s_4_5, -1, 2, 0}, /* 6 */ { 4, s_4_6, -1, 3, 0}, /* 7 */ { 4, s_4_7, -1, 4, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 }; static const unsigned char g_s_ending[] = { 117, 30, 5 }; static const unsigned char g_st_ending[] = { 117, 30, 4 }; static const symbol s_0[] = { 0xC3, 0x9F }; static const symbol s_1[] = { 's', 's' }; static const symbol s_2[] = { 'u' }; static const symbol s_3[] = { 'U' }; static const symbol s_4[] = { 'y' }; static const symbol s_5[] = { 'Y' }; static const symbol s_6[] = { 'y' }; static const symbol s_7[] = { 'u' }; static const symbol s_8[] = { 'a' }; static const symbol s_9[] = { 'o' }; static const symbol s_10[] = { 'u' }; static const symbol s_11[] = { 's' }; static const symbol s_12[] = { 'n', 'i', 's' }; static const symbol s_13[] = { 'i', 'g' }; static const symbol s_14[] = { 'e' }; static const symbol s_15[] = { 'e' }; static const symbol s_16[] = { 'e', 'r' }; static const symbol s_17[] = { 'e', 'n' }; static int r_prelude(struct SN_env * z) { { int c_test = z->c; /* test, line 35 */ while(1) { /* repeat, line 35 */ int c1 = z->c; { int c2 = z->c; /* or, line 38 */ z->bra = z->c; /* [, line 37 */ if (!(eq_s(z, 2, s_0))) goto lab2; z->ket = z->c; /* ], line 37 */ { int ret = slice_from_s(z, 2, s_1); /* <-, line 37 */ if (ret < 0) return ret; } goto lab1; lab2: z->c = c2; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 38 */ } } lab1: continue; lab0: z->c = c1; break; } z->c = c_test; } while(1) { /* repeat, line 41 */ int c3 = z->c; while(1) { /* goto, line 41 */ int c4 = z->c; if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab4; z->bra = z->c; /* [, line 42 */ { int c5 = z->c; /* or, line 42 */ if (!(eq_s(z, 1, s_2))) goto lab6; z->ket = z->c; /* ], line 42 */ if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab6; { int ret = slice_from_s(z, 1, s_3); /* <-, line 42 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = c5; if (!(eq_s(z, 1, s_4))) goto lab4; z->ket = z->c; /* ], line 43 */ if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab4; { int ret = slice_from_s(z, 1, s_5); /* <-, line 43 */ if (ret < 0) return ret; } } lab5: z->c = c4; break; lab4: z->c = c4; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab3; z->c = ret; /* goto, line 41 */ } } continue; lab3: z->c = c3; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; { int c_test = z->c; /* test, line 52 */ { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); if (ret < 0) return 0; z->c = ret; /* hop, line 52 */ } z->I[2] = z->c; /* setmark x, line 52 */ z->c = c_test; } { /* gopast */ /* grouping v, line 54 */ int ret = out_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } { /* gopast */ /* non v, line 54 */ int ret = in_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 54 */ /* try, line 55 */ if (!(z->I[0] < z->I[2])) goto lab0; z->I[0] = z->I[2]; lab0: { /* gopast */ /* grouping v, line 56 */ int ret = out_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } { /* gopast */ /* non v, line 56 */ int ret = in_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 56 */ return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 60 */ int c1 = z->c; z->bra = z->c; /* [, line 62 */ among_var = find_among(z, a_0, 6); /* substring, line 62 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 62 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_6); /* <-, line 63 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_7); /* <-, line 64 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_8); /* <-, line 65 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_9); /* <-, line 66 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_10); /* <-, line 67 */ if (ret < 0) return ret; } break; case 6: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 68 */ } break; } continue; lab0: z->c = c1; break; } return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; { int m1 = z->l - z->c; (void)m1; /* do, line 79 */ z->ket = z->c; /* [, line 80 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((811040 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab0; among_var = find_among_b(z, a_1, 7); /* substring, line 80 */ if (!(among_var)) goto lab0; z->bra = z->c; /* ], line 80 */ { int ret = r_R1(z); if (ret == 0) goto lab0; /* call R1, line 80 */ if (ret < 0) return ret; } switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_del(z); /* delete, line 82 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 85 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 86 */ z->ket = z->c; /* [, line 86 */ if (!(eq_s_b(z, 1, s_11))) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 86 */ if (!(eq_s_b(z, 3, s_12))) { z->c = z->l - m_keep; goto lab1; } { int ret = slice_del(z); /* delete, line 86 */ if (ret < 0) return ret; } lab1: ; } break; case 3: if (in_grouping_b_U(z, g_s_ending, 98, 116, 0)) goto lab0; { int ret = slice_del(z); /* delete, line 89 */ if (ret < 0) return ret; } break; } lab0: z->c = z->l - m1; } { int m2 = z->l - z->c; (void)m2; /* do, line 93 */ z->ket = z->c; /* [, line 94 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1327104 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab2; among_var = find_among_b(z, a_2, 4); /* substring, line 94 */ if (!(among_var)) goto lab2; z->bra = z->c; /* ], line 94 */ { int ret = r_R1(z); if (ret == 0) goto lab2; /* call R1, line 94 */ if (ret < 0) return ret; } switch(among_var) { case 0: goto lab2; case 1: { int ret = slice_del(z); /* delete, line 96 */ if (ret < 0) return ret; } break; case 2: if (in_grouping_b_U(z, g_st_ending, 98, 116, 0)) goto lab2; { int ret = skip_utf8(z->p, z->c, z->lb, z->l, - 3); if (ret < 0) goto lab2; z->c = ret; /* hop, line 99 */ } { int ret = slice_del(z); /* delete, line 99 */ if (ret < 0) return ret; } break; } lab2: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 103 */ z->ket = z->c; /* [, line 104 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1051024 >> (z->p[z->c - 1] & 0x1f)) & 1)) goto lab3; among_var = find_among_b(z, a_4, 8); /* substring, line 104 */ if (!(among_var)) goto lab3; z->bra = z->c; /* ], line 104 */ { int ret = r_R2(z); if (ret == 0) goto lab3; /* call R2, line 104 */ if (ret < 0) return ret; } switch(among_var) { case 0: goto lab3; case 1: { int ret = slice_del(z); /* delete, line 106 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 107 */ z->ket = z->c; /* [, line 107 */ if (!(eq_s_b(z, 2, s_13))) { z->c = z->l - m_keep; goto lab4; } z->bra = z->c; /* ], line 107 */ { int m4 = z->l - z->c; (void)m4; /* not, line 107 */ if (!(eq_s_b(z, 1, s_14))) goto lab5; { z->c = z->l - m_keep; goto lab4; } lab5: z->c = z->l - m4; } { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call R2, line 107 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 107 */ if (ret < 0) return ret; } lab4: ; } break; case 2: { int m5 = z->l - z->c; (void)m5; /* not, line 110 */ if (!(eq_s_b(z, 1, s_15))) goto lab6; goto lab3; lab6: z->c = z->l - m5; } { int ret = slice_del(z); /* delete, line 110 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 113 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 114 */ z->ket = z->c; /* [, line 115 */ { int m6 = z->l - z->c; (void)m6; /* or, line 115 */ if (!(eq_s_b(z, 2, s_16))) goto lab9; goto lab8; lab9: z->c = z->l - m6; if (!(eq_s_b(z, 2, s_17))) { z->c = z->l - m_keep; goto lab7; } } lab8: z->bra = z->c; /* ], line 115 */ { int ret = r_R1(z); if (ret == 0) { z->c = z->l - m_keep; goto lab7; } /* call R1, line 115 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 115 */ if (ret < 0) return ret; } lab7: ; } break; case 4: { int ret = slice_del(z); /* delete, line 119 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 120 */ z->ket = z->c; /* [, line 121 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 103 && z->p[z->c - 1] != 104)) { z->c = z->l - m_keep; goto lab10; } among_var = find_among_b(z, a_3, 2); /* substring, line 121 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab10; } z->bra = z->c; /* ], line 121 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab10; } /* call R2, line 121 */ if (ret < 0) return ret; } switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab10; } case 1: { int ret = slice_del(z); /* delete, line 123 */ if (ret < 0) return ret; } break; } lab10: ; } break; } lab3: z->c = z->l - m3; } return 1; } extern int german_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 134 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 134 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 135 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 135 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 136 */ { int m3 = z->l - z->c; (void)m3; /* do, line 137 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab2; /* call standard_suffix, line 137 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } z->c = z->lb; { int c4 = z->c; /* do, line 138 */ { int ret = r_postlude(z); if (ret == 0) goto lab3; /* call postlude, line 138 */ if (ret < 0) return ret; } lab3: z->c = c4; } return 1; } extern struct SN_env * german_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); } extern void german_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_german.h000066400000000000000000000004631456444476200305600ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* german_UTF_8_create_env(void); extern void german_UTF_8_close_env(struct SN_env* z); extern int german_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_hungarian.c000066400000000000000000001210131456444476200312510ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int hungarian_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_double(struct SN_env * z); static int r_undouble(struct SN_env * z); static int r_factive(struct SN_env * z); static int r_instrum(struct SN_env * z); static int r_plur_owner(struct SN_env * z); static int r_sing_owner(struct SN_env * z); static int r_owned(struct SN_env * z); static int r_plural(struct SN_env * z); static int r_case_other(struct SN_env * z); static int r_case_special(struct SN_env * z); static int r_case(struct SN_env * z); static int r_v_ending(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * hungarian_UTF_8_create_env(void); extern void hungarian_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[2] = { 'c', 's' }; static const symbol s_0_1[3] = { 'd', 'z', 's' }; static const symbol s_0_2[2] = { 'g', 'y' }; static const symbol s_0_3[2] = { 'l', 'y' }; static const symbol s_0_4[2] = { 'n', 'y' }; static const symbol s_0_5[2] = { 's', 'z' }; static const symbol s_0_6[2] = { 't', 'y' }; static const symbol s_0_7[2] = { 'z', 's' }; static const struct among a_0[8] = { /* 0 */ { 2, s_0_0, -1, -1, 0}, /* 1 */ { 3, s_0_1, -1, -1, 0}, /* 2 */ { 2, s_0_2, -1, -1, 0}, /* 3 */ { 2, s_0_3, -1, -1, 0}, /* 4 */ { 2, s_0_4, -1, -1, 0}, /* 5 */ { 2, s_0_5, -1, -1, 0}, /* 6 */ { 2, s_0_6, -1, -1, 0}, /* 7 */ { 2, s_0_7, -1, -1, 0} }; static const symbol s_1_0[2] = { 0xC3, 0xA1 }; static const symbol s_1_1[2] = { 0xC3, 0xA9 }; static const struct among a_1[2] = { /* 0 */ { 2, s_1_0, -1, 1, 0}, /* 1 */ { 2, s_1_1, -1, 2, 0} }; static const symbol s_2_0[2] = { 'b', 'b' }; static const symbol s_2_1[2] = { 'c', 'c' }; static const symbol s_2_2[2] = { 'd', 'd' }; static const symbol s_2_3[2] = { 'f', 'f' }; static const symbol s_2_4[2] = { 'g', 'g' }; static const symbol s_2_5[2] = { 'j', 'j' }; static const symbol s_2_6[2] = { 'k', 'k' }; static const symbol s_2_7[2] = { 'l', 'l' }; static const symbol s_2_8[2] = { 'm', 'm' }; static const symbol s_2_9[2] = { 'n', 'n' }; static const symbol s_2_10[2] = { 'p', 'p' }; static const symbol s_2_11[2] = { 'r', 'r' }; static const symbol s_2_12[3] = { 'c', 'c', 's' }; static const symbol s_2_13[2] = { 's', 's' }; static const symbol s_2_14[3] = { 'z', 'z', 's' }; static const symbol s_2_15[2] = { 't', 't' }; static const symbol s_2_16[2] = { 'v', 'v' }; static const symbol s_2_17[3] = { 'g', 'g', 'y' }; static const symbol s_2_18[3] = { 'l', 'l', 'y' }; static const symbol s_2_19[3] = { 'n', 'n', 'y' }; static const symbol s_2_20[3] = { 't', 't', 'y' }; static const symbol s_2_21[3] = { 's', 's', 'z' }; static const symbol s_2_22[2] = { 'z', 'z' }; static const struct among a_2[23] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 2, s_2_1, -1, -1, 0}, /* 2 */ { 2, s_2_2, -1, -1, 0}, /* 3 */ { 2, s_2_3, -1, -1, 0}, /* 4 */ { 2, s_2_4, -1, -1, 0}, /* 5 */ { 2, s_2_5, -1, -1, 0}, /* 6 */ { 2, s_2_6, -1, -1, 0}, /* 7 */ { 2, s_2_7, -1, -1, 0}, /* 8 */ { 2, s_2_8, -1, -1, 0}, /* 9 */ { 2, s_2_9, -1, -1, 0}, /* 10 */ { 2, s_2_10, -1, -1, 0}, /* 11 */ { 2, s_2_11, -1, -1, 0}, /* 12 */ { 3, s_2_12, -1, -1, 0}, /* 13 */ { 2, s_2_13, -1, -1, 0}, /* 14 */ { 3, s_2_14, -1, -1, 0}, /* 15 */ { 2, s_2_15, -1, -1, 0}, /* 16 */ { 2, s_2_16, -1, -1, 0}, /* 17 */ { 3, s_2_17, -1, -1, 0}, /* 18 */ { 3, s_2_18, -1, -1, 0}, /* 19 */ { 3, s_2_19, -1, -1, 0}, /* 20 */ { 3, s_2_20, -1, -1, 0}, /* 21 */ { 3, s_2_21, -1, -1, 0}, /* 22 */ { 2, s_2_22, -1, -1, 0} }; static const symbol s_3_0[2] = { 'a', 'l' }; static const symbol s_3_1[2] = { 'e', 'l' }; static const struct among a_3[2] = { /* 0 */ { 2, s_3_0, -1, 1, 0}, /* 1 */ { 2, s_3_1, -1, 2, 0} }; static const symbol s_4_0[2] = { 'b', 'a' }; static const symbol s_4_1[2] = { 'r', 'a' }; static const symbol s_4_2[2] = { 'b', 'e' }; static const symbol s_4_3[2] = { 'r', 'e' }; static const symbol s_4_4[2] = { 'i', 'g' }; static const symbol s_4_5[3] = { 'n', 'a', 'k' }; static const symbol s_4_6[3] = { 'n', 'e', 'k' }; static const symbol s_4_7[3] = { 'v', 'a', 'l' }; static const symbol s_4_8[3] = { 'v', 'e', 'l' }; static const symbol s_4_9[2] = { 'u', 'l' }; static const symbol s_4_10[4] = { 'n', 0xC3, 0xA1, 'l' }; static const symbol s_4_11[4] = { 'n', 0xC3, 0xA9, 'l' }; static const symbol s_4_12[4] = { 'b', 0xC3, 0xB3, 'l' }; static const symbol s_4_13[4] = { 'r', 0xC3, 0xB3, 'l' }; static const symbol s_4_14[4] = { 't', 0xC3, 0xB3, 'l' }; static const symbol s_4_15[4] = { 'b', 0xC3, 0xB5, 'l' }; static const symbol s_4_16[4] = { 'r', 0xC3, 0xB5, 'l' }; static const symbol s_4_17[4] = { 't', 0xC3, 0xB5, 'l' }; static const symbol s_4_18[3] = { 0xC3, 0xBC, 'l' }; static const symbol s_4_19[1] = { 'n' }; static const symbol s_4_20[2] = { 'a', 'n' }; static const symbol s_4_21[3] = { 'b', 'a', 'n' }; static const symbol s_4_22[2] = { 'e', 'n' }; static const symbol s_4_23[3] = { 'b', 'e', 'n' }; static const symbol s_4_24[7] = { 'k', 0xC3, 0xA9, 'p', 'p', 'e', 'n' }; static const symbol s_4_25[2] = { 'o', 'n' }; static const symbol s_4_26[3] = { 0xC3, 0xB6, 'n' }; static const symbol s_4_27[5] = { 'k', 0xC3, 0xA9, 'p', 'p' }; static const symbol s_4_28[3] = { 'k', 'o', 'r' }; static const symbol s_4_29[1] = { 't' }; static const symbol s_4_30[2] = { 'a', 't' }; static const symbol s_4_31[2] = { 'e', 't' }; static const symbol s_4_32[5] = { 'k', 0xC3, 0xA9, 'n', 't' }; static const symbol s_4_33[7] = { 'a', 'n', 'k', 0xC3, 0xA9, 'n', 't' }; static const symbol s_4_34[7] = { 'e', 'n', 'k', 0xC3, 0xA9, 'n', 't' }; static const symbol s_4_35[7] = { 'o', 'n', 'k', 0xC3, 0xA9, 'n', 't' }; static const symbol s_4_36[2] = { 'o', 't' }; static const symbol s_4_37[4] = { 0xC3, 0xA9, 'r', 't' }; static const symbol s_4_38[3] = { 0xC3, 0xB6, 't' }; static const symbol s_4_39[3] = { 'h', 'e', 'z' }; static const symbol s_4_40[3] = { 'h', 'o', 'z' }; static const symbol s_4_41[4] = { 'h', 0xC3, 0xB6, 'z' }; static const symbol s_4_42[3] = { 'v', 0xC3, 0xA1 }; static const symbol s_4_43[3] = { 'v', 0xC3, 0xA9 }; static const struct among a_4[44] = { /* 0 */ { 2, s_4_0, -1, -1, 0}, /* 1 */ { 2, s_4_1, -1, -1, 0}, /* 2 */ { 2, s_4_2, -1, -1, 0}, /* 3 */ { 2, s_4_3, -1, -1, 0}, /* 4 */ { 2, s_4_4, -1, -1, 0}, /* 5 */ { 3, s_4_5, -1, -1, 0}, /* 6 */ { 3, s_4_6, -1, -1, 0}, /* 7 */ { 3, s_4_7, -1, -1, 0}, /* 8 */ { 3, s_4_8, -1, -1, 0}, /* 9 */ { 2, s_4_9, -1, -1, 0}, /* 10 */ { 4, s_4_10, -1, -1, 0}, /* 11 */ { 4, s_4_11, -1, -1, 0}, /* 12 */ { 4, s_4_12, -1, -1, 0}, /* 13 */ { 4, s_4_13, -1, -1, 0}, /* 14 */ { 4, s_4_14, -1, -1, 0}, /* 15 */ { 4, s_4_15, -1, -1, 0}, /* 16 */ { 4, s_4_16, -1, -1, 0}, /* 17 */ { 4, s_4_17, -1, -1, 0}, /* 18 */ { 3, s_4_18, -1, -1, 0}, /* 19 */ { 1, s_4_19, -1, -1, 0}, /* 20 */ { 2, s_4_20, 19, -1, 0}, /* 21 */ { 3, s_4_21, 20, -1, 0}, /* 22 */ { 2, s_4_22, 19, -1, 0}, /* 23 */ { 3, s_4_23, 22, -1, 0}, /* 24 */ { 7, s_4_24, 22, -1, 0}, /* 25 */ { 2, s_4_25, 19, -1, 0}, /* 26 */ { 3, s_4_26, 19, -1, 0}, /* 27 */ { 5, s_4_27, -1, -1, 0}, /* 28 */ { 3, s_4_28, -1, -1, 0}, /* 29 */ { 1, s_4_29, -1, -1, 0}, /* 30 */ { 2, s_4_30, 29, -1, 0}, /* 31 */ { 2, s_4_31, 29, -1, 0}, /* 32 */ { 5, s_4_32, 29, -1, 0}, /* 33 */ { 7, s_4_33, 32, -1, 0}, /* 34 */ { 7, s_4_34, 32, -1, 0}, /* 35 */ { 7, s_4_35, 32, -1, 0}, /* 36 */ { 2, s_4_36, 29, -1, 0}, /* 37 */ { 4, s_4_37, 29, -1, 0}, /* 38 */ { 3, s_4_38, 29, -1, 0}, /* 39 */ { 3, s_4_39, -1, -1, 0}, /* 40 */ { 3, s_4_40, -1, -1, 0}, /* 41 */ { 4, s_4_41, -1, -1, 0}, /* 42 */ { 3, s_4_42, -1, -1, 0}, /* 43 */ { 3, s_4_43, -1, -1, 0} }; static const symbol s_5_0[3] = { 0xC3, 0xA1, 'n' }; static const symbol s_5_1[3] = { 0xC3, 0xA9, 'n' }; static const symbol s_5_2[8] = { 0xC3, 0xA1, 'n', 'k', 0xC3, 0xA9, 'n', 't' }; static const struct among a_5[3] = { /* 0 */ { 3, s_5_0, -1, 2, 0}, /* 1 */ { 3, s_5_1, -1, 1, 0}, /* 2 */ { 8, s_5_2, -1, 3, 0} }; static const symbol s_6_0[4] = { 's', 't', 'u', 'l' }; static const symbol s_6_1[5] = { 'a', 's', 't', 'u', 'l' }; static const symbol s_6_2[6] = { 0xC3, 0xA1, 's', 't', 'u', 'l' }; static const symbol s_6_3[5] = { 's', 't', 0xC3, 0xBC, 'l' }; static const symbol s_6_4[6] = { 'e', 's', 't', 0xC3, 0xBC, 'l' }; static const symbol s_6_5[7] = { 0xC3, 0xA9, 's', 't', 0xC3, 0xBC, 'l' }; static const struct among a_6[6] = { /* 0 */ { 4, s_6_0, -1, 2, 0}, /* 1 */ { 5, s_6_1, 0, 1, 0}, /* 2 */ { 6, s_6_2, 0, 3, 0}, /* 3 */ { 5, s_6_3, -1, 2, 0}, /* 4 */ { 6, s_6_4, 3, 1, 0}, /* 5 */ { 7, s_6_5, 3, 4, 0} }; static const symbol s_7_0[2] = { 0xC3, 0xA1 }; static const symbol s_7_1[2] = { 0xC3, 0xA9 }; static const struct among a_7[2] = { /* 0 */ { 2, s_7_0, -1, 1, 0}, /* 1 */ { 2, s_7_1, -1, 2, 0} }; static const symbol s_8_0[1] = { 'k' }; static const symbol s_8_1[2] = { 'a', 'k' }; static const symbol s_8_2[2] = { 'e', 'k' }; static const symbol s_8_3[2] = { 'o', 'k' }; static const symbol s_8_4[3] = { 0xC3, 0xA1, 'k' }; static const symbol s_8_5[3] = { 0xC3, 0xA9, 'k' }; static const symbol s_8_6[3] = { 0xC3, 0xB6, 'k' }; static const struct among a_8[7] = { /* 0 */ { 1, s_8_0, -1, 7, 0}, /* 1 */ { 2, s_8_1, 0, 4, 0}, /* 2 */ { 2, s_8_2, 0, 6, 0}, /* 3 */ { 2, s_8_3, 0, 5, 0}, /* 4 */ { 3, s_8_4, 0, 1, 0}, /* 5 */ { 3, s_8_5, 0, 2, 0}, /* 6 */ { 3, s_8_6, 0, 3, 0} }; static const symbol s_9_0[3] = { 0xC3, 0xA9, 'i' }; static const symbol s_9_1[5] = { 0xC3, 0xA1, 0xC3, 0xA9, 'i' }; static const symbol s_9_2[5] = { 0xC3, 0xA9, 0xC3, 0xA9, 'i' }; static const symbol s_9_3[2] = { 0xC3, 0xA9 }; static const symbol s_9_4[3] = { 'k', 0xC3, 0xA9 }; static const symbol s_9_5[4] = { 'a', 'k', 0xC3, 0xA9 }; static const symbol s_9_6[4] = { 'e', 'k', 0xC3, 0xA9 }; static const symbol s_9_7[4] = { 'o', 'k', 0xC3, 0xA9 }; static const symbol s_9_8[5] = { 0xC3, 0xA1, 'k', 0xC3, 0xA9 }; static const symbol s_9_9[5] = { 0xC3, 0xA9, 'k', 0xC3, 0xA9 }; static const symbol s_9_10[5] = { 0xC3, 0xB6, 'k', 0xC3, 0xA9 }; static const symbol s_9_11[4] = { 0xC3, 0xA9, 0xC3, 0xA9 }; static const struct among a_9[12] = { /* 0 */ { 3, s_9_0, -1, 7, 0}, /* 1 */ { 5, s_9_1, 0, 6, 0}, /* 2 */ { 5, s_9_2, 0, 5, 0}, /* 3 */ { 2, s_9_3, -1, 9, 0}, /* 4 */ { 3, s_9_4, 3, 4, 0}, /* 5 */ { 4, s_9_5, 4, 1, 0}, /* 6 */ { 4, s_9_6, 4, 1, 0}, /* 7 */ { 4, s_9_7, 4, 1, 0}, /* 8 */ { 5, s_9_8, 4, 3, 0}, /* 9 */ { 5, s_9_9, 4, 2, 0}, /* 10 */ { 5, s_9_10, 4, 1, 0}, /* 11 */ { 4, s_9_11, 3, 8, 0} }; static const symbol s_10_0[1] = { 'a' }; static const symbol s_10_1[2] = { 'j', 'a' }; static const symbol s_10_2[1] = { 'd' }; static const symbol s_10_3[2] = { 'a', 'd' }; static const symbol s_10_4[2] = { 'e', 'd' }; static const symbol s_10_5[2] = { 'o', 'd' }; static const symbol s_10_6[3] = { 0xC3, 0xA1, 'd' }; static const symbol s_10_7[3] = { 0xC3, 0xA9, 'd' }; static const symbol s_10_8[3] = { 0xC3, 0xB6, 'd' }; static const symbol s_10_9[1] = { 'e' }; static const symbol s_10_10[2] = { 'j', 'e' }; static const symbol s_10_11[2] = { 'n', 'k' }; static const symbol s_10_12[3] = { 'u', 'n', 'k' }; static const symbol s_10_13[4] = { 0xC3, 0xA1, 'n', 'k' }; static const symbol s_10_14[4] = { 0xC3, 0xA9, 'n', 'k' }; static const symbol s_10_15[4] = { 0xC3, 0xBC, 'n', 'k' }; static const symbol s_10_16[2] = { 'u', 'k' }; static const symbol s_10_17[3] = { 'j', 'u', 'k' }; static const symbol s_10_18[5] = { 0xC3, 0xA1, 'j', 'u', 'k' }; static const symbol s_10_19[3] = { 0xC3, 0xBC, 'k' }; static const symbol s_10_20[4] = { 'j', 0xC3, 0xBC, 'k' }; static const symbol s_10_21[6] = { 0xC3, 0xA9, 'j', 0xC3, 0xBC, 'k' }; static const symbol s_10_22[1] = { 'm' }; static const symbol s_10_23[2] = { 'a', 'm' }; static const symbol s_10_24[2] = { 'e', 'm' }; static const symbol s_10_25[2] = { 'o', 'm' }; static const symbol s_10_26[3] = { 0xC3, 0xA1, 'm' }; static const symbol s_10_27[3] = { 0xC3, 0xA9, 'm' }; static const symbol s_10_28[1] = { 'o' }; static const symbol s_10_29[2] = { 0xC3, 0xA1 }; static const symbol s_10_30[2] = { 0xC3, 0xA9 }; static const struct among a_10[31] = { /* 0 */ { 1, s_10_0, -1, 18, 0}, /* 1 */ { 2, s_10_1, 0, 17, 0}, /* 2 */ { 1, s_10_2, -1, 16, 0}, /* 3 */ { 2, s_10_3, 2, 13, 0}, /* 4 */ { 2, s_10_4, 2, 13, 0}, /* 5 */ { 2, s_10_5, 2, 13, 0}, /* 6 */ { 3, s_10_6, 2, 14, 0}, /* 7 */ { 3, s_10_7, 2, 15, 0}, /* 8 */ { 3, s_10_8, 2, 13, 0}, /* 9 */ { 1, s_10_9, -1, 18, 0}, /* 10 */ { 2, s_10_10, 9, 17, 0}, /* 11 */ { 2, s_10_11, -1, 4, 0}, /* 12 */ { 3, s_10_12, 11, 1, 0}, /* 13 */ { 4, s_10_13, 11, 2, 0}, /* 14 */ { 4, s_10_14, 11, 3, 0}, /* 15 */ { 4, s_10_15, 11, 1, 0}, /* 16 */ { 2, s_10_16, -1, 8, 0}, /* 17 */ { 3, s_10_17, 16, 7, 0}, /* 18 */ { 5, s_10_18, 17, 5, 0}, /* 19 */ { 3, s_10_19, -1, 8, 0}, /* 20 */ { 4, s_10_20, 19, 7, 0}, /* 21 */ { 6, s_10_21, 20, 6, 0}, /* 22 */ { 1, s_10_22, -1, 12, 0}, /* 23 */ { 2, s_10_23, 22, 9, 0}, /* 24 */ { 2, s_10_24, 22, 9, 0}, /* 25 */ { 2, s_10_25, 22, 9, 0}, /* 26 */ { 3, s_10_26, 22, 10, 0}, /* 27 */ { 3, s_10_27, 22, 11, 0}, /* 28 */ { 1, s_10_28, -1, 18, 0}, /* 29 */ { 2, s_10_29, -1, 19, 0}, /* 30 */ { 2, s_10_30, -1, 20, 0} }; static const symbol s_11_0[2] = { 'i', 'd' }; static const symbol s_11_1[3] = { 'a', 'i', 'd' }; static const symbol s_11_2[4] = { 'j', 'a', 'i', 'd' }; static const symbol s_11_3[3] = { 'e', 'i', 'd' }; static const symbol s_11_4[4] = { 'j', 'e', 'i', 'd' }; static const symbol s_11_5[4] = { 0xC3, 0xA1, 'i', 'd' }; static const symbol s_11_6[4] = { 0xC3, 0xA9, 'i', 'd' }; static const symbol s_11_7[1] = { 'i' }; static const symbol s_11_8[2] = { 'a', 'i' }; static const symbol s_11_9[3] = { 'j', 'a', 'i' }; static const symbol s_11_10[2] = { 'e', 'i' }; static const symbol s_11_11[3] = { 'j', 'e', 'i' }; static const symbol s_11_12[3] = { 0xC3, 0xA1, 'i' }; static const symbol s_11_13[3] = { 0xC3, 0xA9, 'i' }; static const symbol s_11_14[4] = { 'i', 't', 'e', 'k' }; static const symbol s_11_15[5] = { 'e', 'i', 't', 'e', 'k' }; static const symbol s_11_16[6] = { 'j', 'e', 'i', 't', 'e', 'k' }; static const symbol s_11_17[6] = { 0xC3, 0xA9, 'i', 't', 'e', 'k' }; static const symbol s_11_18[2] = { 'i', 'k' }; static const symbol s_11_19[3] = { 'a', 'i', 'k' }; static const symbol s_11_20[4] = { 'j', 'a', 'i', 'k' }; static const symbol s_11_21[3] = { 'e', 'i', 'k' }; static const symbol s_11_22[4] = { 'j', 'e', 'i', 'k' }; static const symbol s_11_23[4] = { 0xC3, 0xA1, 'i', 'k' }; static const symbol s_11_24[4] = { 0xC3, 0xA9, 'i', 'k' }; static const symbol s_11_25[3] = { 'i', 'n', 'k' }; static const symbol s_11_26[4] = { 'a', 'i', 'n', 'k' }; static const symbol s_11_27[5] = { 'j', 'a', 'i', 'n', 'k' }; static const symbol s_11_28[4] = { 'e', 'i', 'n', 'k' }; static const symbol s_11_29[5] = { 'j', 'e', 'i', 'n', 'k' }; static const symbol s_11_30[5] = { 0xC3, 0xA1, 'i', 'n', 'k' }; static const symbol s_11_31[5] = { 0xC3, 0xA9, 'i', 'n', 'k' }; static const symbol s_11_32[5] = { 'a', 'i', 't', 'o', 'k' }; static const symbol s_11_33[6] = { 'j', 'a', 'i', 't', 'o', 'k' }; static const symbol s_11_34[6] = { 0xC3, 0xA1, 'i', 't', 'o', 'k' }; static const symbol s_11_35[2] = { 'i', 'm' }; static const symbol s_11_36[3] = { 'a', 'i', 'm' }; static const symbol s_11_37[4] = { 'j', 'a', 'i', 'm' }; static const symbol s_11_38[3] = { 'e', 'i', 'm' }; static const symbol s_11_39[4] = { 'j', 'e', 'i', 'm' }; static const symbol s_11_40[4] = { 0xC3, 0xA1, 'i', 'm' }; static const symbol s_11_41[4] = { 0xC3, 0xA9, 'i', 'm' }; static const struct among a_11[42] = { /* 0 */ { 2, s_11_0, -1, 10, 0}, /* 1 */ { 3, s_11_1, 0, 9, 0}, /* 2 */ { 4, s_11_2, 1, 6, 0}, /* 3 */ { 3, s_11_3, 0, 9, 0}, /* 4 */ { 4, s_11_4, 3, 6, 0}, /* 5 */ { 4, s_11_5, 0, 7, 0}, /* 6 */ { 4, s_11_6, 0, 8, 0}, /* 7 */ { 1, s_11_7, -1, 15, 0}, /* 8 */ { 2, s_11_8, 7, 14, 0}, /* 9 */ { 3, s_11_9, 8, 11, 0}, /* 10 */ { 2, s_11_10, 7, 14, 0}, /* 11 */ { 3, s_11_11, 10, 11, 0}, /* 12 */ { 3, s_11_12, 7, 12, 0}, /* 13 */ { 3, s_11_13, 7, 13, 0}, /* 14 */ { 4, s_11_14, -1, 24, 0}, /* 15 */ { 5, s_11_15, 14, 21, 0}, /* 16 */ { 6, s_11_16, 15, 20, 0}, /* 17 */ { 6, s_11_17, 14, 23, 0}, /* 18 */ { 2, s_11_18, -1, 29, 0}, /* 19 */ { 3, s_11_19, 18, 26, 0}, /* 20 */ { 4, s_11_20, 19, 25, 0}, /* 21 */ { 3, s_11_21, 18, 26, 0}, /* 22 */ { 4, s_11_22, 21, 25, 0}, /* 23 */ { 4, s_11_23, 18, 27, 0}, /* 24 */ { 4, s_11_24, 18, 28, 0}, /* 25 */ { 3, s_11_25, -1, 20, 0}, /* 26 */ { 4, s_11_26, 25, 17, 0}, /* 27 */ { 5, s_11_27, 26, 16, 0}, /* 28 */ { 4, s_11_28, 25, 17, 0}, /* 29 */ { 5, s_11_29, 28, 16, 0}, /* 30 */ { 5, s_11_30, 25, 18, 0}, /* 31 */ { 5, s_11_31, 25, 19, 0}, /* 32 */ { 5, s_11_32, -1, 21, 0}, /* 33 */ { 6, s_11_33, 32, 20, 0}, /* 34 */ { 6, s_11_34, -1, 22, 0}, /* 35 */ { 2, s_11_35, -1, 5, 0}, /* 36 */ { 3, s_11_36, 35, 4, 0}, /* 37 */ { 4, s_11_37, 36, 1, 0}, /* 38 */ { 3, s_11_38, 35, 4, 0}, /* 39 */ { 4, s_11_39, 38, 1, 0}, /* 40 */ { 4, s_11_40, 35, 2, 0}, /* 41 */ { 4, s_11_41, 35, 3, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 52, 14 }; static const symbol s_0[] = { 'a' }; static const symbol s_1[] = { 'e' }; static const symbol s_2[] = { 'e' }; static const symbol s_3[] = { 'a' }; static const symbol s_4[] = { 'a' }; static const symbol s_5[] = { 'a' }; static const symbol s_6[] = { 'e' }; static const symbol s_7[] = { 'a' }; static const symbol s_8[] = { 'e' }; static const symbol s_9[] = { 'e' }; static const symbol s_10[] = { 'a' }; static const symbol s_11[] = { 'e' }; static const symbol s_12[] = { 'a' }; static const symbol s_13[] = { 'e' }; static const symbol s_14[] = { 'a' }; static const symbol s_15[] = { 'e' }; static const symbol s_16[] = { 'a' }; static const symbol s_17[] = { 'e' }; static const symbol s_18[] = { 'a' }; static const symbol s_19[] = { 'e' }; static const symbol s_20[] = { 'a' }; static const symbol s_21[] = { 'e' }; static const symbol s_22[] = { 'a' }; static const symbol s_23[] = { 'e' }; static const symbol s_24[] = { 'a' }; static const symbol s_25[] = { 'e' }; static const symbol s_26[] = { 'a' }; static const symbol s_27[] = { 'e' }; static const symbol s_28[] = { 'a' }; static const symbol s_29[] = { 'e' }; static const symbol s_30[] = { 'a' }; static const symbol s_31[] = { 'e' }; static const symbol s_32[] = { 'a' }; static const symbol s_33[] = { 'e' }; static const symbol s_34[] = { 'a' }; static const symbol s_35[] = { 'e' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; { int c1 = z->c; /* or, line 51 */ if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab1; if (in_grouping_U(z, g_v, 97, 252, 1) < 0) goto lab1; /* goto */ /* non v, line 48 */ { int c2 = z->c; /* or, line 49 */ if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 3 || !((101187584 >> (z->p[z->c + 1] & 0x1f)) & 1)) goto lab3; if (!(find_among(z, a_0, 8))) goto lab3; /* among, line 49 */ goto lab2; lab3: z->c = c2; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab1; z->c = ret; /* next, line 49 */ } } lab2: z->I[0] = z->c; /* setmark p1, line 50 */ goto lab0; lab1: z->c = c1; if (out_grouping_U(z, g_v, 97, 252, 0)) return 0; { /* gopast */ /* grouping v, line 53 */ int ret = out_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 53 */ } lab0: return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_v_ending(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 61 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 161 && z->p[z->c - 1] != 169)) return 0; among_var = find_among_b(z, a_1, 2); /* substring, line 61 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 61 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 61 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 1, s_0); /* <-, line 62 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 63 */ if (ret < 0) return ret; } break; } return 1; } static int r_double(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 68 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((106790108 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_2, 23))) return 0; /* among, line 68 */ z->c = z->l - m_test; } return 1; } static int r_undouble(struct SN_env * z) { { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 73 */ } z->ket = z->c; /* [, line 73 */ { int ret = skip_utf8(z->p, z->c, z->lb, z->l, - 1); if (ret < 0) return 0; z->c = ret; /* hop, line 73 */ } z->bra = z->c; /* ], line 73 */ { int ret = slice_del(z); /* delete, line 73 */ if (ret < 0) return ret; } return 1; } static int r_instrum(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 77 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] != 108) return 0; among_var = find_among_b(z, a_3, 2); /* substring, line 77 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 77 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 77 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = r_double(z); if (ret == 0) return 0; /* call double, line 78 */ if (ret < 0) return ret; } break; case 2: { int ret = r_double(z); if (ret == 0) return 0; /* call double, line 79 */ if (ret < 0) return ret; } break; } { int ret = slice_del(z); /* delete, line 81 */ if (ret < 0) return ret; } { int ret = r_undouble(z); if (ret == 0) return 0; /* call undouble, line 82 */ if (ret < 0) return ret; } return 1; } static int r_case(struct SN_env * z) { z->ket = z->c; /* [, line 87 */ if (!(find_among_b(z, a_4, 44))) return 0; /* substring, line 87 */ z->bra = z->c; /* ], line 87 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 87 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 111 */ if (ret < 0) return ret; } { int ret = r_v_ending(z); if (ret == 0) return 0; /* call v_ending, line 112 */ if (ret < 0) return ret; } return 1; } static int r_case_special(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 116 */ if (z->c - 2 <= z->lb || (z->p[z->c - 1] != 110 && z->p[z->c - 1] != 116)) return 0; among_var = find_among_b(z, a_5, 3); /* substring, line 116 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 116 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 116 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 1, s_2); /* <-, line 117 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_3); /* <-, line 118 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_4); /* <-, line 119 */ if (ret < 0) return ret; } break; } return 1; } static int r_case_other(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 124 */ if (z->c - 3 <= z->lb || z->p[z->c - 1] != 108) return 0; among_var = find_among_b(z, a_6, 6); /* substring, line 124 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 124 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 124 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 125 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 126 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_5); /* <-, line 127 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_6); /* <-, line 128 */ if (ret < 0) return ret; } break; } return 1; } static int r_factive(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 133 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 161 && z->p[z->c - 1] != 169)) return 0; among_var = find_among_b(z, a_7, 2); /* substring, line 133 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 133 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 133 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = r_double(z); if (ret == 0) return 0; /* call double, line 134 */ if (ret < 0) return ret; } break; case 2: { int ret = r_double(z); if (ret == 0) return 0; /* call double, line 135 */ if (ret < 0) return ret; } break; } { int ret = slice_del(z); /* delete, line 137 */ if (ret < 0) return ret; } { int ret = r_undouble(z); if (ret == 0) return 0; /* call undouble, line 138 */ if (ret < 0) return ret; } return 1; } static int r_plural(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 142 */ if (z->c <= z->lb || z->p[z->c - 1] != 107) return 0; among_var = find_among_b(z, a_8, 7); /* substring, line 142 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 142 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 142 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 1, s_7); /* <-, line 143 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_8); /* <-, line 144 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 145 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_del(z); /* delete, line 146 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_del(z); /* delete, line 147 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_del(z); /* delete, line 148 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_del(z); /* delete, line 149 */ if (ret < 0) return ret; } break; } return 1; } static int r_owned(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 154 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 169)) return 0; among_var = find_among_b(z, a_9, 12); /* substring, line 154 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 154 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 154 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 155 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_9); /* <-, line 156 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_10); /* <-, line 157 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_del(z); /* delete, line 158 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_11); /* <-, line 159 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 1, s_12); /* <-, line 160 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_del(z); /* delete, line 161 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 1, s_13); /* <-, line 162 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_del(z); /* delete, line 163 */ if (ret < 0) return ret; } break; } return 1; } static int r_sing_owner(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 168 */ among_var = find_among_b(z, a_10, 31); /* substring, line 168 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 168 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 168 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 169 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_14); /* <-, line 170 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_15); /* <-, line 171 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_del(z); /* delete, line 172 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_16); /* <-, line 173 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 1, s_17); /* <-, line 174 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_del(z); /* delete, line 175 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_del(z); /* delete, line 176 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_del(z); /* delete, line 177 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_from_s(z, 1, s_18); /* <-, line 178 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_from_s(z, 1, s_19); /* <-, line 179 */ if (ret < 0) return ret; } break; case 12: { int ret = slice_del(z); /* delete, line 180 */ if (ret < 0) return ret; } break; case 13: { int ret = slice_del(z); /* delete, line 181 */ if (ret < 0) return ret; } break; case 14: { int ret = slice_from_s(z, 1, s_20); /* <-, line 182 */ if (ret < 0) return ret; } break; case 15: { int ret = slice_from_s(z, 1, s_21); /* <-, line 183 */ if (ret < 0) return ret; } break; case 16: { int ret = slice_del(z); /* delete, line 184 */ if (ret < 0) return ret; } break; case 17: { int ret = slice_del(z); /* delete, line 185 */ if (ret < 0) return ret; } break; case 18: { int ret = slice_del(z); /* delete, line 186 */ if (ret < 0) return ret; } break; case 19: { int ret = slice_from_s(z, 1, s_22); /* <-, line 187 */ if (ret < 0) return ret; } break; case 20: { int ret = slice_from_s(z, 1, s_23); /* <-, line 188 */ if (ret < 0) return ret; } break; } return 1; } static int r_plur_owner(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 193 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((10768 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_11, 42); /* substring, line 193 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 193 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 193 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 194 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_24); /* <-, line 195 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_25); /* <-, line 196 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_del(z); /* delete, line 197 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_del(z); /* delete, line 198 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_del(z); /* delete, line 199 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 1, s_26); /* <-, line 200 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 1, s_27); /* <-, line 201 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_del(z); /* delete, line 202 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_del(z); /* delete, line 203 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_del(z); /* delete, line 204 */ if (ret < 0) return ret; } break; case 12: { int ret = slice_from_s(z, 1, s_28); /* <-, line 205 */ if (ret < 0) return ret; } break; case 13: { int ret = slice_from_s(z, 1, s_29); /* <-, line 206 */ if (ret < 0) return ret; } break; case 14: { int ret = slice_del(z); /* delete, line 207 */ if (ret < 0) return ret; } break; case 15: { int ret = slice_del(z); /* delete, line 208 */ if (ret < 0) return ret; } break; case 16: { int ret = slice_del(z); /* delete, line 209 */ if (ret < 0) return ret; } break; case 17: { int ret = slice_del(z); /* delete, line 210 */ if (ret < 0) return ret; } break; case 18: { int ret = slice_from_s(z, 1, s_30); /* <-, line 211 */ if (ret < 0) return ret; } break; case 19: { int ret = slice_from_s(z, 1, s_31); /* <-, line 212 */ if (ret < 0) return ret; } break; case 20: { int ret = slice_del(z); /* delete, line 214 */ if (ret < 0) return ret; } break; case 21: { int ret = slice_del(z); /* delete, line 215 */ if (ret < 0) return ret; } break; case 22: { int ret = slice_from_s(z, 1, s_32); /* <-, line 216 */ if (ret < 0) return ret; } break; case 23: { int ret = slice_from_s(z, 1, s_33); /* <-, line 217 */ if (ret < 0) return ret; } break; case 24: { int ret = slice_del(z); /* delete, line 218 */ if (ret < 0) return ret; } break; case 25: { int ret = slice_del(z); /* delete, line 219 */ if (ret < 0) return ret; } break; case 26: { int ret = slice_del(z); /* delete, line 220 */ if (ret < 0) return ret; } break; case 27: { int ret = slice_from_s(z, 1, s_34); /* <-, line 221 */ if (ret < 0) return ret; } break; case 28: { int ret = slice_from_s(z, 1, s_35); /* <-, line 222 */ if (ret < 0) return ret; } break; case 29: { int ret = slice_del(z); /* delete, line 223 */ if (ret < 0) return ret; } break; } return 1; } extern int hungarian_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 229 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 229 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 230 */ { int m2 = z->l - z->c; (void)m2; /* do, line 231 */ { int ret = r_instrum(z); if (ret == 0) goto lab1; /* call instrum, line 231 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 232 */ { int ret = r_case(z); if (ret == 0) goto lab2; /* call case, line 232 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 233 */ { int ret = r_case_special(z); if (ret == 0) goto lab3; /* call case_special, line 233 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 234 */ { int ret = r_case_other(z); if (ret == 0) goto lab4; /* call case_other, line 234 */ if (ret < 0) return ret; } lab4: z->c = z->l - m5; } { int m6 = z->l - z->c; (void)m6; /* do, line 235 */ { int ret = r_factive(z); if (ret == 0) goto lab5; /* call factive, line 235 */ if (ret < 0) return ret; } lab5: z->c = z->l - m6; } { int m7 = z->l - z->c; (void)m7; /* do, line 236 */ { int ret = r_owned(z); if (ret == 0) goto lab6; /* call owned, line 236 */ if (ret < 0) return ret; } lab6: z->c = z->l - m7; } { int m8 = z->l - z->c; (void)m8; /* do, line 237 */ { int ret = r_sing_owner(z); if (ret == 0) goto lab7; /* call sing_owner, line 237 */ if (ret < 0) return ret; } lab7: z->c = z->l - m8; } { int m9 = z->l - z->c; (void)m9; /* do, line 238 */ { int ret = r_plur_owner(z); if (ret == 0) goto lab8; /* call plur_owner, line 238 */ if (ret < 0) return ret; } lab8: z->c = z->l - m9; } { int m10 = z->l - z->c; (void)m10; /* do, line 239 */ { int ret = r_plural(z); if (ret == 0) goto lab9; /* call plural, line 239 */ if (ret < 0) return ret; } lab9: z->c = z->l - m10; } z->c = z->lb; return 1; } extern struct SN_env * hungarian_UTF_8_create_env(void) { return SN_create_env(0, 1, 0); } extern void hungarian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_hungarian.h000066400000000000000000000004741456444476200312650ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* hungarian_UTF_8_create_env(void); extern void hungarian_UTF_8_close_env(struct SN_env* z); extern int hungarian_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_italian.c000066400000000000000000001170371456444476200307310ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int italian_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_vowel_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_attached_pronoun(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * italian_UTF_8_create_env(void); extern void italian_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[2] = { 'q', 'u' }; static const symbol s_0_2[2] = { 0xC3, 0xA1 }; static const symbol s_0_3[2] = { 0xC3, 0xA9 }; static const symbol s_0_4[2] = { 0xC3, 0xAD }; static const symbol s_0_5[2] = { 0xC3, 0xB3 }; static const symbol s_0_6[2] = { 0xC3, 0xBA }; static const struct among a_0[7] = { /* 0 */ { 0, 0, -1, 7, 0}, /* 1 */ { 2, s_0_1, 0, 6, 0}, /* 2 */ { 2, s_0_2, 0, 1, 0}, /* 3 */ { 2, s_0_3, 0, 2, 0}, /* 4 */ { 2, s_0_4, 0, 3, 0}, /* 5 */ { 2, s_0_5, 0, 4, 0}, /* 6 */ { 2, s_0_6, 0, 5, 0} }; static const symbol s_1_1[1] = { 'I' }; static const symbol s_1_2[1] = { 'U' }; static const struct among a_1[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 1, s_1_1, 0, 1, 0}, /* 2 */ { 1, s_1_2, 0, 2, 0} }; static const symbol s_2_0[2] = { 'l', 'a' }; static const symbol s_2_1[4] = { 'c', 'e', 'l', 'a' }; static const symbol s_2_2[6] = { 'g', 'l', 'i', 'e', 'l', 'a' }; static const symbol s_2_3[4] = { 'm', 'e', 'l', 'a' }; static const symbol s_2_4[4] = { 't', 'e', 'l', 'a' }; static const symbol s_2_5[4] = { 'v', 'e', 'l', 'a' }; static const symbol s_2_6[2] = { 'l', 'e' }; static const symbol s_2_7[4] = { 'c', 'e', 'l', 'e' }; static const symbol s_2_8[6] = { 'g', 'l', 'i', 'e', 'l', 'e' }; static const symbol s_2_9[4] = { 'm', 'e', 'l', 'e' }; static const symbol s_2_10[4] = { 't', 'e', 'l', 'e' }; static const symbol s_2_11[4] = { 'v', 'e', 'l', 'e' }; static const symbol s_2_12[2] = { 'n', 'e' }; static const symbol s_2_13[4] = { 'c', 'e', 'n', 'e' }; static const symbol s_2_14[6] = { 'g', 'l', 'i', 'e', 'n', 'e' }; static const symbol s_2_15[4] = { 'm', 'e', 'n', 'e' }; static const symbol s_2_16[4] = { 's', 'e', 'n', 'e' }; static const symbol s_2_17[4] = { 't', 'e', 'n', 'e' }; static const symbol s_2_18[4] = { 'v', 'e', 'n', 'e' }; static const symbol s_2_19[2] = { 'c', 'i' }; static const symbol s_2_20[2] = { 'l', 'i' }; static const symbol s_2_21[4] = { 'c', 'e', 'l', 'i' }; static const symbol s_2_22[6] = { 'g', 'l', 'i', 'e', 'l', 'i' }; static const symbol s_2_23[4] = { 'm', 'e', 'l', 'i' }; static const symbol s_2_24[4] = { 't', 'e', 'l', 'i' }; static const symbol s_2_25[4] = { 'v', 'e', 'l', 'i' }; static const symbol s_2_26[3] = { 'g', 'l', 'i' }; static const symbol s_2_27[2] = { 'm', 'i' }; static const symbol s_2_28[2] = { 's', 'i' }; static const symbol s_2_29[2] = { 't', 'i' }; static const symbol s_2_30[2] = { 'v', 'i' }; static const symbol s_2_31[2] = { 'l', 'o' }; static const symbol s_2_32[4] = { 'c', 'e', 'l', 'o' }; static const symbol s_2_33[6] = { 'g', 'l', 'i', 'e', 'l', 'o' }; static const symbol s_2_34[4] = { 'm', 'e', 'l', 'o' }; static const symbol s_2_35[4] = { 't', 'e', 'l', 'o' }; static const symbol s_2_36[4] = { 'v', 'e', 'l', 'o' }; static const struct among a_2[37] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 4, s_2_1, 0, -1, 0}, /* 2 */ { 6, s_2_2, 0, -1, 0}, /* 3 */ { 4, s_2_3, 0, -1, 0}, /* 4 */ { 4, s_2_4, 0, -1, 0}, /* 5 */ { 4, s_2_5, 0, -1, 0}, /* 6 */ { 2, s_2_6, -1, -1, 0}, /* 7 */ { 4, s_2_7, 6, -1, 0}, /* 8 */ { 6, s_2_8, 6, -1, 0}, /* 9 */ { 4, s_2_9, 6, -1, 0}, /* 10 */ { 4, s_2_10, 6, -1, 0}, /* 11 */ { 4, s_2_11, 6, -1, 0}, /* 12 */ { 2, s_2_12, -1, -1, 0}, /* 13 */ { 4, s_2_13, 12, -1, 0}, /* 14 */ { 6, s_2_14, 12, -1, 0}, /* 15 */ { 4, s_2_15, 12, -1, 0}, /* 16 */ { 4, s_2_16, 12, -1, 0}, /* 17 */ { 4, s_2_17, 12, -1, 0}, /* 18 */ { 4, s_2_18, 12, -1, 0}, /* 19 */ { 2, s_2_19, -1, -1, 0}, /* 20 */ { 2, s_2_20, -1, -1, 0}, /* 21 */ { 4, s_2_21, 20, -1, 0}, /* 22 */ { 6, s_2_22, 20, -1, 0}, /* 23 */ { 4, s_2_23, 20, -1, 0}, /* 24 */ { 4, s_2_24, 20, -1, 0}, /* 25 */ { 4, s_2_25, 20, -1, 0}, /* 26 */ { 3, s_2_26, 20, -1, 0}, /* 27 */ { 2, s_2_27, -1, -1, 0}, /* 28 */ { 2, s_2_28, -1, -1, 0}, /* 29 */ { 2, s_2_29, -1, -1, 0}, /* 30 */ { 2, s_2_30, -1, -1, 0}, /* 31 */ { 2, s_2_31, -1, -1, 0}, /* 32 */ { 4, s_2_32, 31, -1, 0}, /* 33 */ { 6, s_2_33, 31, -1, 0}, /* 34 */ { 4, s_2_34, 31, -1, 0}, /* 35 */ { 4, s_2_35, 31, -1, 0}, /* 36 */ { 4, s_2_36, 31, -1, 0} }; static const symbol s_3_0[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_3_1[4] = { 'e', 'n', 'd', 'o' }; static const symbol s_3_2[2] = { 'a', 'r' }; static const symbol s_3_3[2] = { 'e', 'r' }; static const symbol s_3_4[2] = { 'i', 'r' }; static const struct among a_3[5] = { /* 0 */ { 4, s_3_0, -1, 1, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0}, /* 2 */ { 2, s_3_2, -1, 2, 0}, /* 3 */ { 2, s_3_3, -1, 2, 0}, /* 4 */ { 2, s_3_4, -1, 2, 0} }; static const symbol s_4_0[2] = { 'i', 'c' }; static const symbol s_4_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_4_2[2] = { 'o', 's' }; static const symbol s_4_3[2] = { 'i', 'v' }; static const struct among a_4[4] = { /* 0 */ { 2, s_4_0, -1, -1, 0}, /* 1 */ { 4, s_4_1, -1, -1, 0}, /* 2 */ { 2, s_4_2, -1, -1, 0}, /* 3 */ { 2, s_4_3, -1, 1, 0} }; static const symbol s_5_0[2] = { 'i', 'c' }; static const symbol s_5_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_5_2[2] = { 'i', 'v' }; static const struct among a_5[3] = { /* 0 */ { 2, s_5_0, -1, 1, 0}, /* 1 */ { 4, s_5_1, -1, 1, 0}, /* 2 */ { 2, s_5_2, -1, 1, 0} }; static const symbol s_6_0[3] = { 'i', 'c', 'a' }; static const symbol s_6_1[5] = { 'l', 'o', 'g', 'i', 'a' }; static const symbol s_6_2[3] = { 'o', 's', 'a' }; static const symbol s_6_3[4] = { 'i', 's', 't', 'a' }; static const symbol s_6_4[3] = { 'i', 'v', 'a' }; static const symbol s_6_5[4] = { 'a', 'n', 'z', 'a' }; static const symbol s_6_6[4] = { 'e', 'n', 'z', 'a' }; static const symbol s_6_7[3] = { 'i', 'c', 'e' }; static const symbol s_6_8[6] = { 'a', 't', 'r', 'i', 'c', 'e' }; static const symbol s_6_9[4] = { 'i', 'c', 'h', 'e' }; static const symbol s_6_10[5] = { 'l', 'o', 'g', 'i', 'e' }; static const symbol s_6_11[5] = { 'a', 'b', 'i', 'l', 'e' }; static const symbol s_6_12[5] = { 'i', 'b', 'i', 'l', 'e' }; static const symbol s_6_13[6] = { 'u', 's', 'i', 'o', 'n', 'e' }; static const symbol s_6_14[6] = { 'a', 'z', 'i', 'o', 'n', 'e' }; static const symbol s_6_15[6] = { 'u', 'z', 'i', 'o', 'n', 'e' }; static const symbol s_6_16[5] = { 'a', 't', 'o', 'r', 'e' }; static const symbol s_6_17[3] = { 'o', 's', 'e' }; static const symbol s_6_18[4] = { 'a', 'n', 't', 'e' }; static const symbol s_6_19[5] = { 'm', 'e', 'n', 't', 'e' }; static const symbol s_6_20[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; static const symbol s_6_21[4] = { 'i', 's', 't', 'e' }; static const symbol s_6_22[3] = { 'i', 'v', 'e' }; static const symbol s_6_23[4] = { 'a', 'n', 'z', 'e' }; static const symbol s_6_24[4] = { 'e', 'n', 'z', 'e' }; static const symbol s_6_25[3] = { 'i', 'c', 'i' }; static const symbol s_6_26[6] = { 'a', 't', 'r', 'i', 'c', 'i' }; static const symbol s_6_27[4] = { 'i', 'c', 'h', 'i' }; static const symbol s_6_28[5] = { 'a', 'b', 'i', 'l', 'i' }; static const symbol s_6_29[5] = { 'i', 'b', 'i', 'l', 'i' }; static const symbol s_6_30[4] = { 'i', 's', 'm', 'i' }; static const symbol s_6_31[6] = { 'u', 's', 'i', 'o', 'n', 'i' }; static const symbol s_6_32[6] = { 'a', 'z', 'i', 'o', 'n', 'i' }; static const symbol s_6_33[6] = { 'u', 'z', 'i', 'o', 'n', 'i' }; static const symbol s_6_34[5] = { 'a', 't', 'o', 'r', 'i' }; static const symbol s_6_35[3] = { 'o', 's', 'i' }; static const symbol s_6_36[4] = { 'a', 'n', 't', 'i' }; static const symbol s_6_37[6] = { 'a', 'm', 'e', 'n', 't', 'i' }; static const symbol s_6_38[6] = { 'i', 'm', 'e', 'n', 't', 'i' }; static const symbol s_6_39[4] = { 'i', 's', 't', 'i' }; static const symbol s_6_40[3] = { 'i', 'v', 'i' }; static const symbol s_6_41[3] = { 'i', 'c', 'o' }; static const symbol s_6_42[4] = { 'i', 's', 'm', 'o' }; static const symbol s_6_43[3] = { 'o', 's', 'o' }; static const symbol s_6_44[6] = { 'a', 'm', 'e', 'n', 't', 'o' }; static const symbol s_6_45[6] = { 'i', 'm', 'e', 'n', 't', 'o' }; static const symbol s_6_46[3] = { 'i', 'v', 'o' }; static const symbol s_6_47[4] = { 'i', 't', 0xC3, 0xA0 }; static const symbol s_6_48[5] = { 'i', 's', 't', 0xC3, 0xA0 }; static const symbol s_6_49[5] = { 'i', 's', 't', 0xC3, 0xA8 }; static const symbol s_6_50[5] = { 'i', 's', 't', 0xC3, 0xAC }; static const struct among a_6[51] = { /* 0 */ { 3, s_6_0, -1, 1, 0}, /* 1 */ { 5, s_6_1, -1, 3, 0}, /* 2 */ { 3, s_6_2, -1, 1, 0}, /* 3 */ { 4, s_6_3, -1, 1, 0}, /* 4 */ { 3, s_6_4, -1, 9, 0}, /* 5 */ { 4, s_6_5, -1, 1, 0}, /* 6 */ { 4, s_6_6, -1, 5, 0}, /* 7 */ { 3, s_6_7, -1, 1, 0}, /* 8 */ { 6, s_6_8, 7, 1, 0}, /* 9 */ { 4, s_6_9, -1, 1, 0}, /* 10 */ { 5, s_6_10, -1, 3, 0}, /* 11 */ { 5, s_6_11, -1, 1, 0}, /* 12 */ { 5, s_6_12, -1, 1, 0}, /* 13 */ { 6, s_6_13, -1, 4, 0}, /* 14 */ { 6, s_6_14, -1, 2, 0}, /* 15 */ { 6, s_6_15, -1, 4, 0}, /* 16 */ { 5, s_6_16, -1, 2, 0}, /* 17 */ { 3, s_6_17, -1, 1, 0}, /* 18 */ { 4, s_6_18, -1, 1, 0}, /* 19 */ { 5, s_6_19, -1, 1, 0}, /* 20 */ { 6, s_6_20, 19, 7, 0}, /* 21 */ { 4, s_6_21, -1, 1, 0}, /* 22 */ { 3, s_6_22, -1, 9, 0}, /* 23 */ { 4, s_6_23, -1, 1, 0}, /* 24 */ { 4, s_6_24, -1, 5, 0}, /* 25 */ { 3, s_6_25, -1, 1, 0}, /* 26 */ { 6, s_6_26, 25, 1, 0}, /* 27 */ { 4, s_6_27, -1, 1, 0}, /* 28 */ { 5, s_6_28, -1, 1, 0}, /* 29 */ { 5, s_6_29, -1, 1, 0}, /* 30 */ { 4, s_6_30, -1, 1, 0}, /* 31 */ { 6, s_6_31, -1, 4, 0}, /* 32 */ { 6, s_6_32, -1, 2, 0}, /* 33 */ { 6, s_6_33, -1, 4, 0}, /* 34 */ { 5, s_6_34, -1, 2, 0}, /* 35 */ { 3, s_6_35, -1, 1, 0}, /* 36 */ { 4, s_6_36, -1, 1, 0}, /* 37 */ { 6, s_6_37, -1, 6, 0}, /* 38 */ { 6, s_6_38, -1, 6, 0}, /* 39 */ { 4, s_6_39, -1, 1, 0}, /* 40 */ { 3, s_6_40, -1, 9, 0}, /* 41 */ { 3, s_6_41, -1, 1, 0}, /* 42 */ { 4, s_6_42, -1, 1, 0}, /* 43 */ { 3, s_6_43, -1, 1, 0}, /* 44 */ { 6, s_6_44, -1, 6, 0}, /* 45 */ { 6, s_6_45, -1, 6, 0}, /* 46 */ { 3, s_6_46, -1, 9, 0}, /* 47 */ { 4, s_6_47, -1, 8, 0}, /* 48 */ { 5, s_6_48, -1, 1, 0}, /* 49 */ { 5, s_6_49, -1, 1, 0}, /* 50 */ { 5, s_6_50, -1, 1, 0} }; static const symbol s_7_0[4] = { 'i', 's', 'c', 'a' }; static const symbol s_7_1[4] = { 'e', 'n', 'd', 'a' }; static const symbol s_7_2[3] = { 'a', 't', 'a' }; static const symbol s_7_3[3] = { 'i', 't', 'a' }; static const symbol s_7_4[3] = { 'u', 't', 'a' }; static const symbol s_7_5[3] = { 'a', 'v', 'a' }; static const symbol s_7_6[3] = { 'e', 'v', 'a' }; static const symbol s_7_7[3] = { 'i', 'v', 'a' }; static const symbol s_7_8[6] = { 'e', 'r', 'e', 'b', 'b', 'e' }; static const symbol s_7_9[6] = { 'i', 'r', 'e', 'b', 'b', 'e' }; static const symbol s_7_10[4] = { 'i', 's', 'c', 'e' }; static const symbol s_7_11[4] = { 'e', 'n', 'd', 'e' }; static const symbol s_7_12[3] = { 'a', 'r', 'e' }; static const symbol s_7_13[3] = { 'e', 'r', 'e' }; static const symbol s_7_14[3] = { 'i', 'r', 'e' }; static const symbol s_7_15[4] = { 'a', 's', 's', 'e' }; static const symbol s_7_16[3] = { 'a', 't', 'e' }; static const symbol s_7_17[5] = { 'a', 'v', 'a', 't', 'e' }; static const symbol s_7_18[5] = { 'e', 'v', 'a', 't', 'e' }; static const symbol s_7_19[5] = { 'i', 'v', 'a', 't', 'e' }; static const symbol s_7_20[3] = { 'e', 't', 'e' }; static const symbol s_7_21[5] = { 'e', 'r', 'e', 't', 'e' }; static const symbol s_7_22[5] = { 'i', 'r', 'e', 't', 'e' }; static const symbol s_7_23[3] = { 'i', 't', 'e' }; static const symbol s_7_24[6] = { 'e', 'r', 'e', 's', 't', 'e' }; static const symbol s_7_25[6] = { 'i', 'r', 'e', 's', 't', 'e' }; static const symbol s_7_26[3] = { 'u', 't', 'e' }; static const symbol s_7_27[4] = { 'e', 'r', 'a', 'i' }; static const symbol s_7_28[4] = { 'i', 'r', 'a', 'i' }; static const symbol s_7_29[4] = { 'i', 's', 'c', 'i' }; static const symbol s_7_30[4] = { 'e', 'n', 'd', 'i' }; static const symbol s_7_31[4] = { 'e', 'r', 'e', 'i' }; static const symbol s_7_32[4] = { 'i', 'r', 'e', 'i' }; static const symbol s_7_33[4] = { 'a', 's', 's', 'i' }; static const symbol s_7_34[3] = { 'a', 't', 'i' }; static const symbol s_7_35[3] = { 'i', 't', 'i' }; static const symbol s_7_36[6] = { 'e', 'r', 'e', 's', 't', 'i' }; static const symbol s_7_37[6] = { 'i', 'r', 'e', 's', 't', 'i' }; static const symbol s_7_38[3] = { 'u', 't', 'i' }; static const symbol s_7_39[3] = { 'a', 'v', 'i' }; static const symbol s_7_40[3] = { 'e', 'v', 'i' }; static const symbol s_7_41[3] = { 'i', 'v', 'i' }; static const symbol s_7_42[4] = { 'i', 's', 'c', 'o' }; static const symbol s_7_43[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_7_44[4] = { 'e', 'n', 'd', 'o' }; static const symbol s_7_45[4] = { 'Y', 'a', 'm', 'o' }; static const symbol s_7_46[4] = { 'i', 'a', 'm', 'o' }; static const symbol s_7_47[5] = { 'a', 'v', 'a', 'm', 'o' }; static const symbol s_7_48[5] = { 'e', 'v', 'a', 'm', 'o' }; static const symbol s_7_49[5] = { 'i', 'v', 'a', 'm', 'o' }; static const symbol s_7_50[5] = { 'e', 'r', 'e', 'm', 'o' }; static const symbol s_7_51[5] = { 'i', 'r', 'e', 'm', 'o' }; static const symbol s_7_52[6] = { 'a', 's', 's', 'i', 'm', 'o' }; static const symbol s_7_53[4] = { 'a', 'm', 'm', 'o' }; static const symbol s_7_54[4] = { 'e', 'm', 'm', 'o' }; static const symbol s_7_55[6] = { 'e', 'r', 'e', 'm', 'm', 'o' }; static const symbol s_7_56[6] = { 'i', 'r', 'e', 'm', 'm', 'o' }; static const symbol s_7_57[4] = { 'i', 'm', 'm', 'o' }; static const symbol s_7_58[3] = { 'a', 'n', 'o' }; static const symbol s_7_59[6] = { 'i', 's', 'c', 'a', 'n', 'o' }; static const symbol s_7_60[5] = { 'a', 'v', 'a', 'n', 'o' }; static const symbol s_7_61[5] = { 'e', 'v', 'a', 'n', 'o' }; static const symbol s_7_62[5] = { 'i', 'v', 'a', 'n', 'o' }; static const symbol s_7_63[6] = { 'e', 'r', 'a', 'n', 'n', 'o' }; static const symbol s_7_64[6] = { 'i', 'r', 'a', 'n', 'n', 'o' }; static const symbol s_7_65[3] = { 'o', 'n', 'o' }; static const symbol s_7_66[6] = { 'i', 's', 'c', 'o', 'n', 'o' }; static const symbol s_7_67[5] = { 'a', 'r', 'o', 'n', 'o' }; static const symbol s_7_68[5] = { 'e', 'r', 'o', 'n', 'o' }; static const symbol s_7_69[5] = { 'i', 'r', 'o', 'n', 'o' }; static const symbol s_7_70[8] = { 'e', 'r', 'e', 'b', 'b', 'e', 'r', 'o' }; static const symbol s_7_71[8] = { 'i', 'r', 'e', 'b', 'b', 'e', 'r', 'o' }; static const symbol s_7_72[6] = { 'a', 's', 's', 'e', 'r', 'o' }; static const symbol s_7_73[6] = { 'e', 's', 's', 'e', 'r', 'o' }; static const symbol s_7_74[6] = { 'i', 's', 's', 'e', 'r', 'o' }; static const symbol s_7_75[3] = { 'a', 't', 'o' }; static const symbol s_7_76[3] = { 'i', 't', 'o' }; static const symbol s_7_77[3] = { 'u', 't', 'o' }; static const symbol s_7_78[3] = { 'a', 'v', 'o' }; static const symbol s_7_79[3] = { 'e', 'v', 'o' }; static const symbol s_7_80[3] = { 'i', 'v', 'o' }; static const symbol s_7_81[2] = { 'a', 'r' }; static const symbol s_7_82[2] = { 'i', 'r' }; static const symbol s_7_83[4] = { 'e', 'r', 0xC3, 0xA0 }; static const symbol s_7_84[4] = { 'i', 'r', 0xC3, 0xA0 }; static const symbol s_7_85[4] = { 'e', 'r', 0xC3, 0xB2 }; static const symbol s_7_86[4] = { 'i', 'r', 0xC3, 0xB2 }; static const struct among a_7[87] = { /* 0 */ { 4, s_7_0, -1, 1, 0}, /* 1 */ { 4, s_7_1, -1, 1, 0}, /* 2 */ { 3, s_7_2, -1, 1, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0}, /* 4 */ { 3, s_7_4, -1, 1, 0}, /* 5 */ { 3, s_7_5, -1, 1, 0}, /* 6 */ { 3, s_7_6, -1, 1, 0}, /* 7 */ { 3, s_7_7, -1, 1, 0}, /* 8 */ { 6, s_7_8, -1, 1, 0}, /* 9 */ { 6, s_7_9, -1, 1, 0}, /* 10 */ { 4, s_7_10, -1, 1, 0}, /* 11 */ { 4, s_7_11, -1, 1, 0}, /* 12 */ { 3, s_7_12, -1, 1, 0}, /* 13 */ { 3, s_7_13, -1, 1, 0}, /* 14 */ { 3, s_7_14, -1, 1, 0}, /* 15 */ { 4, s_7_15, -1, 1, 0}, /* 16 */ { 3, s_7_16, -1, 1, 0}, /* 17 */ { 5, s_7_17, 16, 1, 0}, /* 18 */ { 5, s_7_18, 16, 1, 0}, /* 19 */ { 5, s_7_19, 16, 1, 0}, /* 20 */ { 3, s_7_20, -1, 1, 0}, /* 21 */ { 5, s_7_21, 20, 1, 0}, /* 22 */ { 5, s_7_22, 20, 1, 0}, /* 23 */ { 3, s_7_23, -1, 1, 0}, /* 24 */ { 6, s_7_24, -1, 1, 0}, /* 25 */ { 6, s_7_25, -1, 1, 0}, /* 26 */ { 3, s_7_26, -1, 1, 0}, /* 27 */ { 4, s_7_27, -1, 1, 0}, /* 28 */ { 4, s_7_28, -1, 1, 0}, /* 29 */ { 4, s_7_29, -1, 1, 0}, /* 30 */ { 4, s_7_30, -1, 1, 0}, /* 31 */ { 4, s_7_31, -1, 1, 0}, /* 32 */ { 4, s_7_32, -1, 1, 0}, /* 33 */ { 4, s_7_33, -1, 1, 0}, /* 34 */ { 3, s_7_34, -1, 1, 0}, /* 35 */ { 3, s_7_35, -1, 1, 0}, /* 36 */ { 6, s_7_36, -1, 1, 0}, /* 37 */ { 6, s_7_37, -1, 1, 0}, /* 38 */ { 3, s_7_38, -1, 1, 0}, /* 39 */ { 3, s_7_39, -1, 1, 0}, /* 40 */ { 3, s_7_40, -1, 1, 0}, /* 41 */ { 3, s_7_41, -1, 1, 0}, /* 42 */ { 4, s_7_42, -1, 1, 0}, /* 43 */ { 4, s_7_43, -1, 1, 0}, /* 44 */ { 4, s_7_44, -1, 1, 0}, /* 45 */ { 4, s_7_45, -1, 1, 0}, /* 46 */ { 4, s_7_46, -1, 1, 0}, /* 47 */ { 5, s_7_47, -1, 1, 0}, /* 48 */ { 5, s_7_48, -1, 1, 0}, /* 49 */ { 5, s_7_49, -1, 1, 0}, /* 50 */ { 5, s_7_50, -1, 1, 0}, /* 51 */ { 5, s_7_51, -1, 1, 0}, /* 52 */ { 6, s_7_52, -1, 1, 0}, /* 53 */ { 4, s_7_53, -1, 1, 0}, /* 54 */ { 4, s_7_54, -1, 1, 0}, /* 55 */ { 6, s_7_55, 54, 1, 0}, /* 56 */ { 6, s_7_56, 54, 1, 0}, /* 57 */ { 4, s_7_57, -1, 1, 0}, /* 58 */ { 3, s_7_58, -1, 1, 0}, /* 59 */ { 6, s_7_59, 58, 1, 0}, /* 60 */ { 5, s_7_60, 58, 1, 0}, /* 61 */ { 5, s_7_61, 58, 1, 0}, /* 62 */ { 5, s_7_62, 58, 1, 0}, /* 63 */ { 6, s_7_63, -1, 1, 0}, /* 64 */ { 6, s_7_64, -1, 1, 0}, /* 65 */ { 3, s_7_65, -1, 1, 0}, /* 66 */ { 6, s_7_66, 65, 1, 0}, /* 67 */ { 5, s_7_67, 65, 1, 0}, /* 68 */ { 5, s_7_68, 65, 1, 0}, /* 69 */ { 5, s_7_69, 65, 1, 0}, /* 70 */ { 8, s_7_70, -1, 1, 0}, /* 71 */ { 8, s_7_71, -1, 1, 0}, /* 72 */ { 6, s_7_72, -1, 1, 0}, /* 73 */ { 6, s_7_73, -1, 1, 0}, /* 74 */ { 6, s_7_74, -1, 1, 0}, /* 75 */ { 3, s_7_75, -1, 1, 0}, /* 76 */ { 3, s_7_76, -1, 1, 0}, /* 77 */ { 3, s_7_77, -1, 1, 0}, /* 78 */ { 3, s_7_78, -1, 1, 0}, /* 79 */ { 3, s_7_79, -1, 1, 0}, /* 80 */ { 3, s_7_80, -1, 1, 0}, /* 81 */ { 2, s_7_81, -1, 1, 0}, /* 82 */ { 2, s_7_82, -1, 1, 0}, /* 83 */ { 4, s_7_83, -1, 1, 0}, /* 84 */ { 4, s_7_84, -1, 1, 0}, /* 85 */ { 4, s_7_85, -1, 1, 0}, /* 86 */ { 4, s_7_86, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2, 1 }; static const unsigned char g_AEIO[] = { 17, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2 }; static const unsigned char g_CG[] = { 17 }; static const symbol s_0[] = { 0xC3, 0xA0 }; static const symbol s_1[] = { 0xC3, 0xA8 }; static const symbol s_2[] = { 0xC3, 0xAC }; static const symbol s_3[] = { 0xC3, 0xB2 }; static const symbol s_4[] = { 0xC3, 0xB9 }; static const symbol s_5[] = { 'q', 'U' }; static const symbol s_6[] = { 'u' }; static const symbol s_7[] = { 'U' }; static const symbol s_8[] = { 'i' }; static const symbol s_9[] = { 'I' }; static const symbol s_10[] = { 'i' }; static const symbol s_11[] = { 'u' }; static const symbol s_12[] = { 'e' }; static const symbol s_13[] = { 'i', 'c' }; static const symbol s_14[] = { 'l', 'o', 'g' }; static const symbol s_15[] = { 'u' }; static const symbol s_16[] = { 'e', 'n', 't', 'e' }; static const symbol s_17[] = { 'a', 't' }; static const symbol s_18[] = { 'a', 't' }; static const symbol s_19[] = { 'i', 'c' }; static const symbol s_20[] = { 'i' }; static const symbol s_21[] = { 'h' }; static int r_prelude(struct SN_env * z) { int among_var; { int c_test = z->c; /* test, line 35 */ while(1) { /* repeat, line 35 */ int c1 = z->c; z->bra = z->c; /* [, line 36 */ among_var = find_among(z, a_0, 7); /* substring, line 36 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 36 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 2, s_0); /* <-, line 37 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 2, s_1); /* <-, line 38 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 2, s_2); /* <-, line 39 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 2, s_3); /* <-, line 40 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 2, s_4); /* <-, line 41 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 2, s_5); /* <-, line 42 */ if (ret < 0) return ret; } break; case 7: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 43 */ } break; } continue; lab0: z->c = c1; break; } z->c = c_test; } while(1) { /* repeat, line 46 */ int c2 = z->c; while(1) { /* goto, line 46 */ int c3 = z->c; if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab2; z->bra = z->c; /* [, line 47 */ { int c4 = z->c; /* or, line 47 */ if (!(eq_s(z, 1, s_6))) goto lab4; z->ket = z->c; /* ], line 47 */ if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab4; { int ret = slice_from_s(z, 1, s_7); /* <-, line 47 */ if (ret < 0) return ret; } goto lab3; lab4: z->c = c4; if (!(eq_s(z, 1, s_8))) goto lab2; z->ket = z->c; /* ], line 48 */ if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab2; { int ret = slice_from_s(z, 1, s_9); /* <-, line 48 */ if (ret < 0) return ret; } } lab3: z->c = c3; break; lab2: z->c = c3; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab1; z->c = ret; /* goto, line 46 */ } } continue; lab1: z->c = c2; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 58 */ { int c2 = z->c; /* or, line 60 */ if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab2; { int c3 = z->c; /* or, line 59 */ if (out_grouping_U(z, g_v, 97, 249, 0)) goto lab4; { /* gopast */ /* grouping v, line 59 */ int ret = out_grouping_U(z, g_v, 97, 249, 1); if (ret < 0) goto lab4; z->c += ret; } goto lab3; lab4: z->c = c3; if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab2; { /* gopast */ /* non v, line 59 */ int ret = in_grouping_U(z, g_v, 97, 249, 1); if (ret < 0) goto lab2; z->c += ret; } } lab3: goto lab1; lab2: z->c = c2; if (out_grouping_U(z, g_v, 97, 249, 0)) goto lab0; { int c4 = z->c; /* or, line 61 */ if (out_grouping_U(z, g_v, 97, 249, 0)) goto lab6; { /* gopast */ /* grouping v, line 61 */ int ret = out_grouping_U(z, g_v, 97, 249, 1); if (ret < 0) goto lab6; z->c += ret; } goto lab5; lab6: z->c = c4; if (in_grouping_U(z, g_v, 97, 249, 0)) goto lab0; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 61 */ } } lab5: ; } lab1: z->I[0] = z->c; /* setmark pV, line 62 */ lab0: z->c = c1; } { int c5 = z->c; /* do, line 64 */ { /* gopast */ /* grouping v, line 65 */ int ret = out_grouping_U(z, g_v, 97, 249, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 65 */ int ret = in_grouping_U(z, g_v, 97, 249, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 65 */ { /* gopast */ /* grouping v, line 66 */ int ret = out_grouping_U(z, g_v, 97, 249, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 66 */ int ret = in_grouping_U(z, g_v, 97, 249, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 66 */ lab7: z->c = c5; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 70 */ int c1 = z->c; z->bra = z->c; /* [, line 72 */ if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 85)) among_var = 3; else among_var = find_among(z, a_1, 3); /* substring, line 72 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 72 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_10); /* <-, line 73 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_11); /* <-, line 74 */ if (ret < 0) return ret; } break; case 3: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 75 */ } break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_attached_pronoun(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 87 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((33314 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_2, 37))) return 0; /* substring, line 87 */ z->bra = z->c; /* ], line 87 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 111 && z->p[z->c - 1] != 114)) return 0; among_var = find_among_b(z, a_3, 5); /* among, line 97 */ if (!(among_var)) return 0; { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 97 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 98 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_12); /* <-, line 99 */ if (ret < 0) return ret; } break; } return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 104 */ among_var = find_among_b(z, a_6, 51); /* substring, line 104 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 104 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 111 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 111 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 113 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 113 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 114 */ z->ket = z->c; /* [, line 114 */ if (!(eq_s_b(z, 2, s_13))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 114 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 114 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 114 */ if (ret < 0) return ret; } lab0: ; } break; case 3: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 117 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_14); /* <-, line 117 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 119 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_15); /* <-, line 119 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 121 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 4, s_16); /* <-, line 121 */ if (ret < 0) return ret; } break; case 6: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 123 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 123 */ if (ret < 0) return ret; } break; case 7: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 125 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 125 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 126 */ z->ket = z->c; /* [, line 127 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4722696 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab1; } among_var = find_among_b(z, a_4, 4); /* substring, line 127 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 127 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 127 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 127 */ if (ret < 0) return ret; } switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab1; } case 1: z->ket = z->c; /* [, line 128 */ if (!(eq_s_b(z, 2, s_17))) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 128 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 128 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 128 */ if (ret < 0) return ret; } break; } lab1: ; } break; case 8: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 134 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 135 */ z->ket = z->c; /* [, line 136 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab2; } among_var = find_among_b(z, a_5, 3); /* substring, line 136 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } z->bra = z->c; /* ], line 136 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab2; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 137 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 137 */ if (ret < 0) return ret; } break; } lab2: ; } break; case 9: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 142 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 142 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 143 */ z->ket = z->c; /* [, line 143 */ if (!(eq_s_b(z, 2, s_18))) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 143 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 143 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 143 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 143 */ if (!(eq_s_b(z, 2, s_19))) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 143 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 143 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 143 */ if (ret < 0) return ret; } lab3: ; } break; } return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 148 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 148 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 149 */ among_var = find_among_b(z, a_7, 87); /* substring, line 149 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 149 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = slice_del(z); /* delete, line 163 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_vowel_suffix(struct SN_env * z) { { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 171 */ z->ket = z->c; /* [, line 172 */ if (in_grouping_b_U(z, g_AEIO, 97, 242, 0)) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 172 */ { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 172 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 172 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 173 */ if (!(eq_s_b(z, 1, s_20))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 173 */ { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 173 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 173 */ if (ret < 0) return ret; } lab0: ; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 175 */ z->ket = z->c; /* [, line 176 */ if (!(eq_s_b(z, 1, s_21))) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 176 */ if (in_grouping_b_U(z, g_CG, 99, 103, 0)) { z->c = z->l - m_keep; goto lab1; } { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call RV, line 176 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 176 */ if (ret < 0) return ret; } lab1: ; } return 1; } extern int italian_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 182 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 182 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 183 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 183 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 184 */ { int m3 = z->l - z->c; (void)m3; /* do, line 185 */ { int ret = r_attached_pronoun(z); if (ret == 0) goto lab2; /* call attached_pronoun, line 185 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 186 */ { int m5 = z->l - z->c; (void)m5; /* or, line 186 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab5; /* call standard_suffix, line 186 */ if (ret < 0) return ret; } goto lab4; lab5: z->c = z->l - m5; { int ret = r_verb_suffix(z); if (ret == 0) goto lab3; /* call verb_suffix, line 186 */ if (ret < 0) return ret; } } lab4: lab3: z->c = z->l - m4; } { int m6 = z->l - z->c; (void)m6; /* do, line 187 */ { int ret = r_vowel_suffix(z); if (ret == 0) goto lab6; /* call vowel_suffix, line 187 */ if (ret < 0) return ret; } lab6: z->c = z->l - m6; } z->c = z->lb; { int c7 = z->c; /* do, line 189 */ { int ret = r_postlude(z); if (ret == 0) goto lab7; /* call postlude, line 189 */ if (ret < 0) return ret; } lab7: z->c = c7; } return 1; } extern struct SN_env * italian_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); } extern void italian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_italian.h000066400000000000000000000004661456444476200307330ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* italian_UTF_8_create_env(void); extern void italian_UTF_8_close_env(struct SN_env* z); extern int italian_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_norwegian.c000066400000000000000000000234751456444476200313030ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int norwegian_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_other_suffix(struct SN_env * z); static int r_consonant_pair(struct SN_env * z); static int r_main_suffix(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * norwegian_UTF_8_create_env(void); extern void norwegian_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[1] = { 'a' }; static const symbol s_0_1[1] = { 'e' }; static const symbol s_0_2[3] = { 'e', 'd', 'e' }; static const symbol s_0_3[4] = { 'a', 'n', 'd', 'e' }; static const symbol s_0_4[4] = { 'e', 'n', 'd', 'e' }; static const symbol s_0_5[3] = { 'a', 'n', 'e' }; static const symbol s_0_6[3] = { 'e', 'n', 'e' }; static const symbol s_0_7[6] = { 'h', 'e', 't', 'e', 'n', 'e' }; static const symbol s_0_8[4] = { 'e', 'r', 't', 'e' }; static const symbol s_0_9[2] = { 'e', 'n' }; static const symbol s_0_10[5] = { 'h', 'e', 't', 'e', 'n' }; static const symbol s_0_11[2] = { 'a', 'r' }; static const symbol s_0_12[2] = { 'e', 'r' }; static const symbol s_0_13[5] = { 'h', 'e', 't', 'e', 'r' }; static const symbol s_0_14[1] = { 's' }; static const symbol s_0_15[2] = { 'a', 's' }; static const symbol s_0_16[2] = { 'e', 's' }; static const symbol s_0_17[4] = { 'e', 'd', 'e', 's' }; static const symbol s_0_18[5] = { 'e', 'n', 'd', 'e', 's' }; static const symbol s_0_19[4] = { 'e', 'n', 'e', 's' }; static const symbol s_0_20[7] = { 'h', 'e', 't', 'e', 'n', 'e', 's' }; static const symbol s_0_21[3] = { 'e', 'n', 's' }; static const symbol s_0_22[6] = { 'h', 'e', 't', 'e', 'n', 's' }; static const symbol s_0_23[3] = { 'e', 'r', 's' }; static const symbol s_0_24[3] = { 'e', 't', 's' }; static const symbol s_0_25[2] = { 'e', 't' }; static const symbol s_0_26[3] = { 'h', 'e', 't' }; static const symbol s_0_27[3] = { 'e', 'r', 't' }; static const symbol s_0_28[3] = { 'a', 's', 't' }; static const struct among a_0[29] = { /* 0 */ { 1, s_0_0, -1, 1, 0}, /* 1 */ { 1, s_0_1, -1, 1, 0}, /* 2 */ { 3, s_0_2, 1, 1, 0}, /* 3 */ { 4, s_0_3, 1, 1, 0}, /* 4 */ { 4, s_0_4, 1, 1, 0}, /* 5 */ { 3, s_0_5, 1, 1, 0}, /* 6 */ { 3, s_0_6, 1, 1, 0}, /* 7 */ { 6, s_0_7, 6, 1, 0}, /* 8 */ { 4, s_0_8, 1, 3, 0}, /* 9 */ { 2, s_0_9, -1, 1, 0}, /* 10 */ { 5, s_0_10, 9, 1, 0}, /* 11 */ { 2, s_0_11, -1, 1, 0}, /* 12 */ { 2, s_0_12, -1, 1, 0}, /* 13 */ { 5, s_0_13, 12, 1, 0}, /* 14 */ { 1, s_0_14, -1, 2, 0}, /* 15 */ { 2, s_0_15, 14, 1, 0}, /* 16 */ { 2, s_0_16, 14, 1, 0}, /* 17 */ { 4, s_0_17, 16, 1, 0}, /* 18 */ { 5, s_0_18, 16, 1, 0}, /* 19 */ { 4, s_0_19, 16, 1, 0}, /* 20 */ { 7, s_0_20, 19, 1, 0}, /* 21 */ { 3, s_0_21, 14, 1, 0}, /* 22 */ { 6, s_0_22, 21, 1, 0}, /* 23 */ { 3, s_0_23, 14, 1, 0}, /* 24 */ { 3, s_0_24, 14, 1, 0}, /* 25 */ { 2, s_0_25, -1, 1, 0}, /* 26 */ { 3, s_0_26, 25, 1, 0}, /* 27 */ { 3, s_0_27, -1, 3, 0}, /* 28 */ { 3, s_0_28, -1, 1, 0} }; static const symbol s_1_0[2] = { 'd', 't' }; static const symbol s_1_1[2] = { 'v', 't' }; static const struct among a_1[2] = { /* 0 */ { 2, s_1_0, -1, -1, 0}, /* 1 */ { 2, s_1_1, -1, -1, 0} }; static const symbol s_2_0[3] = { 'l', 'e', 'g' }; static const symbol s_2_1[4] = { 'e', 'l', 'e', 'g' }; static const symbol s_2_2[2] = { 'i', 'g' }; static const symbol s_2_3[3] = { 'e', 'i', 'g' }; static const symbol s_2_4[3] = { 'l', 'i', 'g' }; static const symbol s_2_5[4] = { 'e', 'l', 'i', 'g' }; static const symbol s_2_6[3] = { 'e', 'l', 's' }; static const symbol s_2_7[3] = { 'l', 'o', 'v' }; static const symbol s_2_8[4] = { 'e', 'l', 'o', 'v' }; static const symbol s_2_9[4] = { 's', 'l', 'o', 'v' }; static const symbol s_2_10[7] = { 'h', 'e', 't', 's', 'l', 'o', 'v' }; static const struct among a_2[11] = { /* 0 */ { 3, s_2_0, -1, 1, 0}, /* 1 */ { 4, s_2_1, 0, 1, 0}, /* 2 */ { 2, s_2_2, -1, 1, 0}, /* 3 */ { 3, s_2_3, 2, 1, 0}, /* 4 */ { 3, s_2_4, 2, 1, 0}, /* 5 */ { 4, s_2_5, 4, 1, 0}, /* 6 */ { 3, s_2_6, -1, 1, 0}, /* 7 */ { 3, s_2_7, -1, 1, 0}, /* 8 */ { 4, s_2_8, 7, 1, 0}, /* 9 */ { 4, s_2_9, 7, 1, 0}, /* 10 */ { 7, s_2_10, 9, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; static const unsigned char g_s_ending[] = { 119, 125, 149, 1 }; static const symbol s_0[] = { 'k' }; static const symbol s_1[] = { 'e', 'r' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; { int c_test = z->c; /* test, line 30 */ { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); if (ret < 0) return 0; z->c = ret; /* hop, line 30 */ } z->I[1] = z->c; /* setmark x, line 30 */ z->c = c_test; } if (out_grouping_U(z, g_v, 97, 248, 1) < 0) return 0; /* goto */ /* grouping v, line 31 */ { /* gopast */ /* non v, line 31 */ int ret = in_grouping_U(z, g_v, 97, 248, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 31 */ /* try, line 32 */ if (!(z->I[0] < z->I[1])) goto lab0; z->I[0] = z->I[1]; lab0: return 1; } static int r_main_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 38 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 38 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 38 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851426 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_0, 29); /* substring, line 38 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 38 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 44 */ if (ret < 0) return ret; } break; case 2: { int m2 = z->l - z->c; (void)m2; /* or, line 46 */ if (in_grouping_b_U(z, g_s_ending, 98, 122, 0)) goto lab1; goto lab0; lab1: z->c = z->l - m2; if (!(eq_s_b(z, 1, s_0))) return 0; if (out_grouping_b_U(z, g_v, 97, 248, 0)) return 0; } lab0: { int ret = slice_del(z); /* delete, line 46 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 2, s_1); /* <-, line 48 */ if (ret < 0) return ret; } break; } return 1; } static int r_consonant_pair(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 53 */ { int mlimit; /* setlimit, line 54 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 54 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 54 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] != 116) { z->lb = mlimit; return 0; } if (!(find_among_b(z, a_1, 2))) { z->lb = mlimit; return 0; } /* substring, line 54 */ z->bra = z->c; /* ], line 54 */ z->lb = mlimit; } z->c = z->l - m_test; } { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 59 */ } z->bra = z->c; /* ], line 59 */ { int ret = slice_del(z); /* delete, line 59 */ if (ret < 0) return ret; } return 1; } static int r_other_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 63 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 63 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 63 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718720 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_2, 11); /* substring, line 63 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 63 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 67 */ if (ret < 0) return ret; } break; } return 1; } extern int norwegian_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 74 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 74 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 75 */ { int m2 = z->l - z->c; (void)m2; /* do, line 76 */ { int ret = r_main_suffix(z); if (ret == 0) goto lab1; /* call main_suffix, line 76 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 77 */ { int ret = r_consonant_pair(z); if (ret == 0) goto lab2; /* call consonant_pair, line 77 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 78 */ { int ret = r_other_suffix(z); if (ret == 0) goto lab3; /* call other_suffix, line 78 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } z->c = z->lb; return 1; } extern struct SN_env * norwegian_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); } extern void norwegian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_norwegian.h000066400000000000000000000004741456444476200313020ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* norwegian_UTF_8_create_env(void); extern void norwegian_UTF_8_close_env(struct SN_env* z); extern int norwegian_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_porter.c000066400000000000000000000611731456444476200306220ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int porter_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_Step_5b(struct SN_env * z); static int r_Step_5a(struct SN_env * z); static int r_Step_4(struct SN_env * z); static int r_Step_3(struct SN_env * z); static int r_Step_2(struct SN_env * z); static int r_Step_1c(struct SN_env * z); static int r_Step_1b(struct SN_env * z); static int r_Step_1a(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_shortv(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * porter_UTF_8_create_env(void); extern void porter_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[1] = { 's' }; static const symbol s_0_1[3] = { 'i', 'e', 's' }; static const symbol s_0_2[4] = { 's', 's', 'e', 's' }; static const symbol s_0_3[2] = { 's', 's' }; static const struct among a_0[4] = { /* 0 */ { 1, s_0_0, -1, 3, 0}, /* 1 */ { 3, s_0_1, 0, 2, 0}, /* 2 */ { 4, s_0_2, 0, 1, 0}, /* 3 */ { 2, s_0_3, 0, -1, 0} }; static const symbol s_1_1[2] = { 'b', 'b' }; static const symbol s_1_2[2] = { 'd', 'd' }; static const symbol s_1_3[2] = { 'f', 'f' }; static const symbol s_1_4[2] = { 'g', 'g' }; static const symbol s_1_5[2] = { 'b', 'l' }; static const symbol s_1_6[2] = { 'm', 'm' }; static const symbol s_1_7[2] = { 'n', 'n' }; static const symbol s_1_8[2] = { 'p', 'p' }; static const symbol s_1_9[2] = { 'r', 'r' }; static const symbol s_1_10[2] = { 'a', 't' }; static const symbol s_1_11[2] = { 't', 't' }; static const symbol s_1_12[2] = { 'i', 'z' }; static const struct among a_1[13] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 2, s_1_1, 0, 2, 0}, /* 2 */ { 2, s_1_2, 0, 2, 0}, /* 3 */ { 2, s_1_3, 0, 2, 0}, /* 4 */ { 2, s_1_4, 0, 2, 0}, /* 5 */ { 2, s_1_5, 0, 1, 0}, /* 6 */ { 2, s_1_6, 0, 2, 0}, /* 7 */ { 2, s_1_7, 0, 2, 0}, /* 8 */ { 2, s_1_8, 0, 2, 0}, /* 9 */ { 2, s_1_9, 0, 2, 0}, /* 10 */ { 2, s_1_10, 0, 1, 0}, /* 11 */ { 2, s_1_11, 0, 2, 0}, /* 12 */ { 2, s_1_12, 0, 1, 0} }; static const symbol s_2_0[2] = { 'e', 'd' }; static const symbol s_2_1[3] = { 'e', 'e', 'd' }; static const symbol s_2_2[3] = { 'i', 'n', 'g' }; static const struct among a_2[3] = { /* 0 */ { 2, s_2_0, -1, 2, 0}, /* 1 */ { 3, s_2_1, 0, 1, 0}, /* 2 */ { 3, s_2_2, -1, 2, 0} }; static const symbol s_3_0[4] = { 'a', 'n', 'c', 'i' }; static const symbol s_3_1[4] = { 'e', 'n', 'c', 'i' }; static const symbol s_3_2[4] = { 'a', 'b', 'l', 'i' }; static const symbol s_3_3[3] = { 'e', 'l', 'i' }; static const symbol s_3_4[4] = { 'a', 'l', 'l', 'i' }; static const symbol s_3_5[5] = { 'o', 'u', 's', 'l', 'i' }; static const symbol s_3_6[5] = { 'e', 'n', 't', 'l', 'i' }; static const symbol s_3_7[5] = { 'a', 'l', 'i', 't', 'i' }; static const symbol s_3_8[6] = { 'b', 'i', 'l', 'i', 't', 'i' }; static const symbol s_3_9[5] = { 'i', 'v', 'i', 't', 'i' }; static const symbol s_3_10[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_3_11[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; static const symbol s_3_12[5] = { 'a', 'l', 'i', 's', 'm' }; static const symbol s_3_13[5] = { 'a', 't', 'i', 'o', 'n' }; static const symbol s_3_14[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' }; static const symbol s_3_15[4] = { 'i', 'z', 'e', 'r' }; static const symbol s_3_16[4] = { 'a', 't', 'o', 'r' }; static const symbol s_3_17[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' }; static const symbol s_3_18[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' }; static const symbol s_3_19[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' }; static const struct among a_3[20] = { /* 0 */ { 4, s_3_0, -1, 3, 0}, /* 1 */ { 4, s_3_1, -1, 2, 0}, /* 2 */ { 4, s_3_2, -1, 4, 0}, /* 3 */ { 3, s_3_3, -1, 6, 0}, /* 4 */ { 4, s_3_4, -1, 9, 0}, /* 5 */ { 5, s_3_5, -1, 12, 0}, /* 6 */ { 5, s_3_6, -1, 5, 0}, /* 7 */ { 5, s_3_7, -1, 10, 0}, /* 8 */ { 6, s_3_8, -1, 14, 0}, /* 9 */ { 5, s_3_9, -1, 13, 0}, /* 10 */ { 6, s_3_10, -1, 1, 0}, /* 11 */ { 7, s_3_11, 10, 8, 0}, /* 12 */ { 5, s_3_12, -1, 10, 0}, /* 13 */ { 5, s_3_13, -1, 8, 0}, /* 14 */ { 7, s_3_14, 13, 7, 0}, /* 15 */ { 4, s_3_15, -1, 7, 0}, /* 16 */ { 4, s_3_16, -1, 8, 0}, /* 17 */ { 7, s_3_17, -1, 13, 0}, /* 18 */ { 7, s_3_18, -1, 11, 0}, /* 19 */ { 7, s_3_19, -1, 12, 0} }; static const symbol s_4_0[5] = { 'i', 'c', 'a', 't', 'e' }; static const symbol s_4_1[5] = { 'a', 't', 'i', 'v', 'e' }; static const symbol s_4_2[5] = { 'a', 'l', 'i', 'z', 'e' }; static const symbol s_4_3[5] = { 'i', 'c', 'i', 't', 'i' }; static const symbol s_4_4[4] = { 'i', 'c', 'a', 'l' }; static const symbol s_4_5[3] = { 'f', 'u', 'l' }; static const symbol s_4_6[4] = { 'n', 'e', 's', 's' }; static const struct among a_4[7] = { /* 0 */ { 5, s_4_0, -1, 2, 0}, /* 1 */ { 5, s_4_1, -1, 3, 0}, /* 2 */ { 5, s_4_2, -1, 1, 0}, /* 3 */ { 5, s_4_3, -1, 2, 0}, /* 4 */ { 4, s_4_4, -1, 2, 0}, /* 5 */ { 3, s_4_5, -1, 3, 0}, /* 6 */ { 4, s_4_6, -1, 3, 0} }; static const symbol s_5_0[2] = { 'i', 'c' }; static const symbol s_5_1[4] = { 'a', 'n', 'c', 'e' }; static const symbol s_5_2[4] = { 'e', 'n', 'c', 'e' }; static const symbol s_5_3[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_5_4[4] = { 'i', 'b', 'l', 'e' }; static const symbol s_5_5[3] = { 'a', 't', 'e' }; static const symbol s_5_6[3] = { 'i', 'v', 'e' }; static const symbol s_5_7[3] = { 'i', 'z', 'e' }; static const symbol s_5_8[3] = { 'i', 't', 'i' }; static const symbol s_5_9[2] = { 'a', 'l' }; static const symbol s_5_10[3] = { 'i', 's', 'm' }; static const symbol s_5_11[3] = { 'i', 'o', 'n' }; static const symbol s_5_12[2] = { 'e', 'r' }; static const symbol s_5_13[3] = { 'o', 'u', 's' }; static const symbol s_5_14[3] = { 'a', 'n', 't' }; static const symbol s_5_15[3] = { 'e', 'n', 't' }; static const symbol s_5_16[4] = { 'm', 'e', 'n', 't' }; static const symbol s_5_17[5] = { 'e', 'm', 'e', 'n', 't' }; static const symbol s_5_18[2] = { 'o', 'u' }; static const struct among a_5[19] = { /* 0 */ { 2, s_5_0, -1, 1, 0}, /* 1 */ { 4, s_5_1, -1, 1, 0}, /* 2 */ { 4, s_5_2, -1, 1, 0}, /* 3 */ { 4, s_5_3, -1, 1, 0}, /* 4 */ { 4, s_5_4, -1, 1, 0}, /* 5 */ { 3, s_5_5, -1, 1, 0}, /* 6 */ { 3, s_5_6, -1, 1, 0}, /* 7 */ { 3, s_5_7, -1, 1, 0}, /* 8 */ { 3, s_5_8, -1, 1, 0}, /* 9 */ { 2, s_5_9, -1, 1, 0}, /* 10 */ { 3, s_5_10, -1, 1, 0}, /* 11 */ { 3, s_5_11, -1, 2, 0}, /* 12 */ { 2, s_5_12, -1, 1, 0}, /* 13 */ { 3, s_5_13, -1, 1, 0}, /* 14 */ { 3, s_5_14, -1, 1, 0}, /* 15 */ { 3, s_5_15, -1, 1, 0}, /* 16 */ { 4, s_5_16, 15, 1, 0}, /* 17 */ { 5, s_5_17, 16, 1, 0}, /* 18 */ { 2, s_5_18, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1 }; static const unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 }; static const symbol s_0[] = { 's', 's' }; static const symbol s_1[] = { 'i' }; static const symbol s_2[] = { 'e', 'e' }; static const symbol s_3[] = { 'e' }; static const symbol s_4[] = { 'e' }; static const symbol s_5[] = { 'y' }; static const symbol s_6[] = { 'Y' }; static const symbol s_7[] = { 'i' }; static const symbol s_8[] = { 't', 'i', 'o', 'n' }; static const symbol s_9[] = { 'e', 'n', 'c', 'e' }; static const symbol s_10[] = { 'a', 'n', 'c', 'e' }; static const symbol s_11[] = { 'a', 'b', 'l', 'e' }; static const symbol s_12[] = { 'e', 'n', 't' }; static const symbol s_13[] = { 'e' }; static const symbol s_14[] = { 'i', 'z', 'e' }; static const symbol s_15[] = { 'a', 't', 'e' }; static const symbol s_16[] = { 'a', 'l' }; static const symbol s_17[] = { 'a', 'l' }; static const symbol s_18[] = { 'f', 'u', 'l' }; static const symbol s_19[] = { 'o', 'u', 's' }; static const symbol s_20[] = { 'i', 'v', 'e' }; static const symbol s_21[] = { 'b', 'l', 'e' }; static const symbol s_22[] = { 'a', 'l' }; static const symbol s_23[] = { 'i', 'c' }; static const symbol s_24[] = { 's' }; static const symbol s_25[] = { 't' }; static const symbol s_26[] = { 'e' }; static const symbol s_27[] = { 'l' }; static const symbol s_28[] = { 'l' }; static const symbol s_29[] = { 'y' }; static const symbol s_30[] = { 'Y' }; static const symbol s_31[] = { 'y' }; static const symbol s_32[] = { 'Y' }; static const symbol s_33[] = { 'Y' }; static const symbol s_34[] = { 'y' }; static int r_shortv(struct SN_env * z) { if (out_grouping_b_U(z, g_v_WXY, 89, 121, 0)) return 0; if (in_grouping_b_U(z, g_v, 97, 121, 0)) return 0; if (out_grouping_b_U(z, g_v, 97, 121, 0)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_Step_1a(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 25 */ if (z->c <= z->lb || z->p[z->c - 1] != 115) return 0; among_var = find_among_b(z, a_0, 4); /* substring, line 25 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 25 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 2, s_0); /* <-, line 26 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 27 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 29 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_1b(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 34 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 100 && z->p[z->c - 1] != 103)) return 0; among_var = find_among_b(z, a_2, 3); /* substring, line 34 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 34 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 35 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 2, s_2); /* <-, line 35 */ if (ret < 0) return ret; } break; case 2: { int m_test = z->l - z->c; /* test, line 38 */ { /* gopast */ /* grouping v, line 38 */ int ret = out_grouping_b_U(z, g_v, 97, 121, 1); if (ret < 0) return 0; z->c -= ret; } z->c = z->l - m_test; } { int ret = slice_del(z); /* delete, line 38 */ if (ret < 0) return ret; } { int m_test = z->l - z->c; /* test, line 39 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((68514004 >> (z->p[z->c - 1] & 0x1f)) & 1)) among_var = 3; else among_var = find_among_b(z, a_1, 13); /* substring, line 39 */ if (!(among_var)) return 0; z->c = z->l - m_test; } switch(among_var) { case 0: return 0; case 1: { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_3); /* <+, line 41 */ z->c = c_keep; if (ret < 0) return ret; } break; case 2: z->ket = z->c; /* [, line 44 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 44 */ } z->bra = z->c; /* ], line 44 */ { int ret = slice_del(z); /* delete, line 44 */ if (ret < 0) return ret; } break; case 3: if (z->c != z->I[0]) return 0; /* atmark, line 45 */ { int m_test = z->l - z->c; /* test, line 45 */ { int ret = r_shortv(z); if (ret == 0) return 0; /* call shortv, line 45 */ if (ret < 0) return ret; } z->c = z->l - m_test; } { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_4); /* <+, line 45 */ z->c = c_keep; if (ret < 0) return ret; } break; } break; } return 1; } static int r_Step_1c(struct SN_env * z) { z->ket = z->c; /* [, line 52 */ { int m1 = z->l - z->c; (void)m1; /* or, line 52 */ if (!(eq_s_b(z, 1, s_5))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_6))) return 0; } lab0: z->bra = z->c; /* ], line 52 */ { /* gopast */ /* grouping v, line 53 */ int ret = out_grouping_b_U(z, g_v, 97, 121, 1); if (ret < 0) return 0; z->c -= ret; } { int ret = slice_from_s(z, 1, s_7); /* <-, line 54 */ if (ret < 0) return ret; } return 1; } static int r_Step_2(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 58 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((815616 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_3, 20); /* substring, line 58 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 58 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 58 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 4, s_8); /* <-, line 59 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 4, s_9); /* <-, line 60 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 4, s_10); /* <-, line 61 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 4, s_11); /* <-, line 62 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 3, s_12); /* <-, line 63 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 1, s_13); /* <-, line 64 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 3, s_14); /* <-, line 66 */ if (ret < 0) return ret; } break; case 8: { int ret = slice_from_s(z, 3, s_15); /* <-, line 68 */ if (ret < 0) return ret; } break; case 9: { int ret = slice_from_s(z, 2, s_16); /* <-, line 69 */ if (ret < 0) return ret; } break; case 10: { int ret = slice_from_s(z, 2, s_17); /* <-, line 71 */ if (ret < 0) return ret; } break; case 11: { int ret = slice_from_s(z, 3, s_18); /* <-, line 72 */ if (ret < 0) return ret; } break; case 12: { int ret = slice_from_s(z, 3, s_19); /* <-, line 74 */ if (ret < 0) return ret; } break; case 13: { int ret = slice_from_s(z, 3, s_20); /* <-, line 76 */ if (ret < 0) return ret; } break; case 14: { int ret = slice_from_s(z, 3, s_21); /* <-, line 77 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_3(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 82 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((528928 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_4, 7); /* substring, line 82 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 82 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 82 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 2, s_22); /* <-, line 83 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 2, s_23); /* <-, line 85 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 87 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_4(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 92 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((3961384 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_5, 19); /* substring, line 92 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 92 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 92 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 95 */ if (ret < 0) return ret; } break; case 2: { int m1 = z->l - z->c; (void)m1; /* or, line 96 */ if (!(eq_s_b(z, 1, s_24))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_25))) return 0; } lab0: { int ret = slice_del(z); /* delete, line 96 */ if (ret < 0) return ret; } break; } return 1; } static int r_Step_5a(struct SN_env * z) { z->ket = z->c; /* [, line 101 */ if (!(eq_s_b(z, 1, s_26))) return 0; z->bra = z->c; /* ], line 101 */ { int m1 = z->l - z->c; (void)m1; /* or, line 102 */ { int ret = r_R2(z); if (ret == 0) goto lab1; /* call R2, line 102 */ if (ret < 0) return ret; } goto lab0; lab1: z->c = z->l - m1; { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 102 */ if (ret < 0) return ret; } { int m2 = z->l - z->c; (void)m2; /* not, line 102 */ { int ret = r_shortv(z); if (ret == 0) goto lab2; /* call shortv, line 102 */ if (ret < 0) return ret; } return 0; lab2: z->c = z->l - m2; } } lab0: { int ret = slice_del(z); /* delete, line 103 */ if (ret < 0) return ret; } return 1; } static int r_Step_5b(struct SN_env * z) { z->ket = z->c; /* [, line 107 */ if (!(eq_s_b(z, 1, s_27))) return 0; z->bra = z->c; /* ], line 107 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 108 */ if (ret < 0) return ret; } if (!(eq_s_b(z, 1, s_28))) return 0; { int ret = slice_del(z); /* delete, line 109 */ if (ret < 0) return ret; } return 1; } extern int porter_UTF_8_stem(struct SN_env * z) { z->B[0] = 0; /* unset Y_found, line 115 */ { int c1 = z->c; /* do, line 116 */ z->bra = z->c; /* [, line 116 */ if (!(eq_s(z, 1, s_29))) goto lab0; z->ket = z->c; /* ], line 116 */ { int ret = slice_from_s(z, 1, s_30); /* <-, line 116 */ if (ret < 0) return ret; } z->B[0] = 1; /* set Y_found, line 116 */ lab0: z->c = c1; } { int c2 = z->c; /* do, line 117 */ while(1) { /* repeat, line 117 */ int c3 = z->c; while(1) { /* goto, line 117 */ int c4 = z->c; if (in_grouping_U(z, g_v, 97, 121, 0)) goto lab3; z->bra = z->c; /* [, line 117 */ if (!(eq_s(z, 1, s_31))) goto lab3; z->ket = z->c; /* ], line 117 */ z->c = c4; break; lab3: z->c = c4; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab2; z->c = ret; /* goto, line 117 */ } } { int ret = slice_from_s(z, 1, s_32); /* <-, line 117 */ if (ret < 0) return ret; } z->B[0] = 1; /* set Y_found, line 117 */ continue; lab2: z->c = c3; break; } z->c = c2; } z->I[0] = z->l; z->I[1] = z->l; { int c5 = z->c; /* do, line 121 */ { /* gopast */ /* grouping v, line 122 */ int ret = out_grouping_U(z, g_v, 97, 121, 1); if (ret < 0) goto lab4; z->c += ret; } { /* gopast */ /* non v, line 122 */ int ret = in_grouping_U(z, g_v, 97, 121, 1); if (ret < 0) goto lab4; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 122 */ { /* gopast */ /* grouping v, line 123 */ int ret = out_grouping_U(z, g_v, 97, 121, 1); if (ret < 0) goto lab4; z->c += ret; } { /* gopast */ /* non v, line 123 */ int ret = in_grouping_U(z, g_v, 97, 121, 1); if (ret < 0) goto lab4; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 123 */ lab4: z->c = c5; } z->lb = z->c; z->c = z->l; /* backwards, line 126 */ { int m6 = z->l - z->c; (void)m6; /* do, line 127 */ { int ret = r_Step_1a(z); if (ret == 0) goto lab5; /* call Step_1a, line 127 */ if (ret < 0) return ret; } lab5: z->c = z->l - m6; } { int m7 = z->l - z->c; (void)m7; /* do, line 128 */ { int ret = r_Step_1b(z); if (ret == 0) goto lab6; /* call Step_1b, line 128 */ if (ret < 0) return ret; } lab6: z->c = z->l - m7; } { int m8 = z->l - z->c; (void)m8; /* do, line 129 */ { int ret = r_Step_1c(z); if (ret == 0) goto lab7; /* call Step_1c, line 129 */ if (ret < 0) return ret; } lab7: z->c = z->l - m8; } { int m9 = z->l - z->c; (void)m9; /* do, line 130 */ { int ret = r_Step_2(z); if (ret == 0) goto lab8; /* call Step_2, line 130 */ if (ret < 0) return ret; } lab8: z->c = z->l - m9; } { int m10 = z->l - z->c; (void)m10; /* do, line 131 */ { int ret = r_Step_3(z); if (ret == 0) goto lab9; /* call Step_3, line 131 */ if (ret < 0) return ret; } lab9: z->c = z->l - m10; } { int m11 = z->l - z->c; (void)m11; /* do, line 132 */ { int ret = r_Step_4(z); if (ret == 0) goto lab10; /* call Step_4, line 132 */ if (ret < 0) return ret; } lab10: z->c = z->l - m11; } { int m12 = z->l - z->c; (void)m12; /* do, line 133 */ { int ret = r_Step_5a(z); if (ret == 0) goto lab11; /* call Step_5a, line 133 */ if (ret < 0) return ret; } lab11: z->c = z->l - m12; } { int m13 = z->l - z->c; (void)m13; /* do, line 134 */ { int ret = r_Step_5b(z); if (ret == 0) goto lab12; /* call Step_5b, line 134 */ if (ret < 0) return ret; } lab12: z->c = z->l - m13; } z->c = z->lb; { int c14 = z->c; /* do, line 137 */ if (!(z->B[0])) goto lab13; /* Boolean test Y_found, line 137 */ while(1) { /* repeat, line 137 */ int c15 = z->c; while(1) { /* goto, line 137 */ int c16 = z->c; z->bra = z->c; /* [, line 137 */ if (!(eq_s(z, 1, s_33))) goto lab15; z->ket = z->c; /* ], line 137 */ z->c = c16; break; lab15: z->c = c16; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab14; z->c = ret; /* goto, line 137 */ } } { int ret = slice_from_s(z, 1, s_34); /* <-, line 137 */ if (ret < 0) return ret; } continue; lab14: z->c = c15; break; } lab13: z->c = c14; } return 1; } extern struct SN_env * porter_UTF_8_create_env(void) { return SN_create_env(0, 2, 1); } extern void porter_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_porter.h000066400000000000000000000004631456444476200306220ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* porter_UTF_8_create_env(void); extern void porter_UTF_8_close_env(struct SN_env* z); extern int porter_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_portuguese.c000066400000000000000000001140401456444476200315010ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int portuguese_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_residual_form(struct SN_env * z); static int r_residual_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * portuguese_UTF_8_create_env(void); extern void portuguese_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[2] = { 0xC3, 0xA3 }; static const symbol s_0_2[2] = { 0xC3, 0xB5 }; static const struct among a_0[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 2, s_0_1, 0, 1, 0}, /* 2 */ { 2, s_0_2, 0, 2, 0} }; static const symbol s_1_1[2] = { 'a', '~' }; static const symbol s_1_2[2] = { 'o', '~' }; static const struct among a_1[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 2, s_1_1, 0, 1, 0}, /* 2 */ { 2, s_1_2, 0, 2, 0} }; static const symbol s_2_0[2] = { 'i', 'c' }; static const symbol s_2_1[2] = { 'a', 'd' }; static const symbol s_2_2[2] = { 'o', 's' }; static const symbol s_2_3[2] = { 'i', 'v' }; static const struct among a_2[4] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 2, s_2_1, -1, -1, 0}, /* 2 */ { 2, s_2_2, -1, -1, 0}, /* 3 */ { 2, s_2_3, -1, 1, 0} }; static const symbol s_3_0[4] = { 'a', 'n', 't', 'e' }; static const symbol s_3_1[4] = { 'a', 'v', 'e', 'l' }; static const symbol s_3_2[5] = { 0xC3, 0xAD, 'v', 'e', 'l' }; static const struct among a_3[3] = { /* 0 */ { 4, s_3_0, -1, 1, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0}, /* 2 */ { 5, s_3_2, -1, 1, 0} }; static const symbol s_4_0[2] = { 'i', 'c' }; static const symbol s_4_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_4_2[2] = { 'i', 'v' }; static const struct among a_4[3] = { /* 0 */ { 2, s_4_0, -1, 1, 0}, /* 1 */ { 4, s_4_1, -1, 1, 0}, /* 2 */ { 2, s_4_2, -1, 1, 0} }; static const symbol s_5_0[3] = { 'i', 'c', 'a' }; static const symbol s_5_1[6] = { 0xC3, 0xA2, 'n', 'c', 'i', 'a' }; static const symbol s_5_2[6] = { 0xC3, 0xAA, 'n', 'c', 'i', 'a' }; static const symbol s_5_3[3] = { 'i', 'r', 'a' }; static const symbol s_5_4[5] = { 'a', 'd', 'o', 'r', 'a' }; static const symbol s_5_5[3] = { 'o', 's', 'a' }; static const symbol s_5_6[4] = { 'i', 's', 't', 'a' }; static const symbol s_5_7[3] = { 'i', 'v', 'a' }; static const symbol s_5_8[3] = { 'e', 'z', 'a' }; static const symbol s_5_9[6] = { 'l', 'o', 'g', 0xC3, 0xAD, 'a' }; static const symbol s_5_10[5] = { 'i', 'd', 'a', 'd', 'e' }; static const symbol s_5_11[4] = { 'a', 'n', 't', 'e' }; static const symbol s_5_12[5] = { 'm', 'e', 'n', 't', 'e' }; static const symbol s_5_13[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; static const symbol s_5_14[5] = { 0xC3, 0xA1, 'v', 'e', 'l' }; static const symbol s_5_15[5] = { 0xC3, 0xAD, 'v', 'e', 'l' }; static const symbol s_5_16[6] = { 'u', 'c', 'i', 0xC3, 0xB3, 'n' }; static const symbol s_5_17[3] = { 'i', 'c', 'o' }; static const symbol s_5_18[4] = { 'i', 's', 'm', 'o' }; static const symbol s_5_19[3] = { 'o', 's', 'o' }; static const symbol s_5_20[6] = { 'a', 'm', 'e', 'n', 't', 'o' }; static const symbol s_5_21[6] = { 'i', 'm', 'e', 'n', 't', 'o' }; static const symbol s_5_22[3] = { 'i', 'v', 'o' }; static const symbol s_5_23[6] = { 'a', 0xC3, 0xA7, 'a', '~', 'o' }; static const symbol s_5_24[4] = { 'a', 'd', 'o', 'r' }; static const symbol s_5_25[4] = { 'i', 'c', 'a', 's' }; static const symbol s_5_26[7] = { 0xC3, 0xAA, 'n', 'c', 'i', 'a', 's' }; static const symbol s_5_27[4] = { 'i', 'r', 'a', 's' }; static const symbol s_5_28[6] = { 'a', 'd', 'o', 'r', 'a', 's' }; static const symbol s_5_29[4] = { 'o', 's', 'a', 's' }; static const symbol s_5_30[5] = { 'i', 's', 't', 'a', 's' }; static const symbol s_5_31[4] = { 'i', 'v', 'a', 's' }; static const symbol s_5_32[4] = { 'e', 'z', 'a', 's' }; static const symbol s_5_33[7] = { 'l', 'o', 'g', 0xC3, 0xAD, 'a', 's' }; static const symbol s_5_34[6] = { 'i', 'd', 'a', 'd', 'e', 's' }; static const symbol s_5_35[7] = { 'u', 'c', 'i', 'o', 'n', 'e', 's' }; static const symbol s_5_36[6] = { 'a', 'd', 'o', 'r', 'e', 's' }; static const symbol s_5_37[5] = { 'a', 'n', 't', 'e', 's' }; static const symbol s_5_38[7] = { 'a', 0xC3, 0xA7, 'o', '~', 'e', 's' }; static const symbol s_5_39[4] = { 'i', 'c', 'o', 's' }; static const symbol s_5_40[5] = { 'i', 's', 'm', 'o', 's' }; static const symbol s_5_41[4] = { 'o', 's', 'o', 's' }; static const symbol s_5_42[7] = { 'a', 'm', 'e', 'n', 't', 'o', 's' }; static const symbol s_5_43[7] = { 'i', 'm', 'e', 'n', 't', 'o', 's' }; static const symbol s_5_44[4] = { 'i', 'v', 'o', 's' }; static const struct among a_5[45] = { /* 0 */ { 3, s_5_0, -1, 1, 0}, /* 1 */ { 6, s_5_1, -1, 1, 0}, /* 2 */ { 6, s_5_2, -1, 4, 0}, /* 3 */ { 3, s_5_3, -1, 9, 0}, /* 4 */ { 5, s_5_4, -1, 1, 0}, /* 5 */ { 3, s_5_5, -1, 1, 0}, /* 6 */ { 4, s_5_6, -1, 1, 0}, /* 7 */ { 3, s_5_7, -1, 8, 0}, /* 8 */ { 3, s_5_8, -1, 1, 0}, /* 9 */ { 6, s_5_9, -1, 2, 0}, /* 10 */ { 5, s_5_10, -1, 7, 0}, /* 11 */ { 4, s_5_11, -1, 1, 0}, /* 12 */ { 5, s_5_12, -1, 6, 0}, /* 13 */ { 6, s_5_13, 12, 5, 0}, /* 14 */ { 5, s_5_14, -1, 1, 0}, /* 15 */ { 5, s_5_15, -1, 1, 0}, /* 16 */ { 6, s_5_16, -1, 3, 0}, /* 17 */ { 3, s_5_17, -1, 1, 0}, /* 18 */ { 4, s_5_18, -1, 1, 0}, /* 19 */ { 3, s_5_19, -1, 1, 0}, /* 20 */ { 6, s_5_20, -1, 1, 0}, /* 21 */ { 6, s_5_21, -1, 1, 0}, /* 22 */ { 3, s_5_22, -1, 8, 0}, /* 23 */ { 6, s_5_23, -1, 1, 0}, /* 24 */ { 4, s_5_24, -1, 1, 0}, /* 25 */ { 4, s_5_25, -1, 1, 0}, /* 26 */ { 7, s_5_26, -1, 4, 0}, /* 27 */ { 4, s_5_27, -1, 9, 0}, /* 28 */ { 6, s_5_28, -1, 1, 0}, /* 29 */ { 4, s_5_29, -1, 1, 0}, /* 30 */ { 5, s_5_30, -1, 1, 0}, /* 31 */ { 4, s_5_31, -1, 8, 0}, /* 32 */ { 4, s_5_32, -1, 1, 0}, /* 33 */ { 7, s_5_33, -1, 2, 0}, /* 34 */ { 6, s_5_34, -1, 7, 0}, /* 35 */ { 7, s_5_35, -1, 3, 0}, /* 36 */ { 6, s_5_36, -1, 1, 0}, /* 37 */ { 5, s_5_37, -1, 1, 0}, /* 38 */ { 7, s_5_38, -1, 1, 0}, /* 39 */ { 4, s_5_39, -1, 1, 0}, /* 40 */ { 5, s_5_40, -1, 1, 0}, /* 41 */ { 4, s_5_41, -1, 1, 0}, /* 42 */ { 7, s_5_42, -1, 1, 0}, /* 43 */ { 7, s_5_43, -1, 1, 0}, /* 44 */ { 4, s_5_44, -1, 8, 0} }; static const symbol s_6_0[3] = { 'a', 'd', 'a' }; static const symbol s_6_1[3] = { 'i', 'd', 'a' }; static const symbol s_6_2[2] = { 'i', 'a' }; static const symbol s_6_3[4] = { 'a', 'r', 'i', 'a' }; static const symbol s_6_4[4] = { 'e', 'r', 'i', 'a' }; static const symbol s_6_5[4] = { 'i', 'r', 'i', 'a' }; static const symbol s_6_6[3] = { 'a', 'r', 'a' }; static const symbol s_6_7[3] = { 'e', 'r', 'a' }; static const symbol s_6_8[3] = { 'i', 'r', 'a' }; static const symbol s_6_9[3] = { 'a', 'v', 'a' }; static const symbol s_6_10[4] = { 'a', 's', 's', 'e' }; static const symbol s_6_11[4] = { 'e', 's', 's', 'e' }; static const symbol s_6_12[4] = { 'i', 's', 's', 'e' }; static const symbol s_6_13[4] = { 'a', 's', 't', 'e' }; static const symbol s_6_14[4] = { 'e', 's', 't', 'e' }; static const symbol s_6_15[4] = { 'i', 's', 't', 'e' }; static const symbol s_6_16[2] = { 'e', 'i' }; static const symbol s_6_17[4] = { 'a', 'r', 'e', 'i' }; static const symbol s_6_18[4] = { 'e', 'r', 'e', 'i' }; static const symbol s_6_19[4] = { 'i', 'r', 'e', 'i' }; static const symbol s_6_20[2] = { 'a', 'm' }; static const symbol s_6_21[3] = { 'i', 'a', 'm' }; static const symbol s_6_22[5] = { 'a', 'r', 'i', 'a', 'm' }; static const symbol s_6_23[5] = { 'e', 'r', 'i', 'a', 'm' }; static const symbol s_6_24[5] = { 'i', 'r', 'i', 'a', 'm' }; static const symbol s_6_25[4] = { 'a', 'r', 'a', 'm' }; static const symbol s_6_26[4] = { 'e', 'r', 'a', 'm' }; static const symbol s_6_27[4] = { 'i', 'r', 'a', 'm' }; static const symbol s_6_28[4] = { 'a', 'v', 'a', 'm' }; static const symbol s_6_29[2] = { 'e', 'm' }; static const symbol s_6_30[4] = { 'a', 'r', 'e', 'm' }; static const symbol s_6_31[4] = { 'e', 'r', 'e', 'm' }; static const symbol s_6_32[4] = { 'i', 'r', 'e', 'm' }; static const symbol s_6_33[5] = { 'a', 's', 's', 'e', 'm' }; static const symbol s_6_34[5] = { 'e', 's', 's', 'e', 'm' }; static const symbol s_6_35[5] = { 'i', 's', 's', 'e', 'm' }; static const symbol s_6_36[3] = { 'a', 'd', 'o' }; static const symbol s_6_37[3] = { 'i', 'd', 'o' }; static const symbol s_6_38[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_6_39[4] = { 'e', 'n', 'd', 'o' }; static const symbol s_6_40[4] = { 'i', 'n', 'd', 'o' }; static const symbol s_6_41[5] = { 'a', 'r', 'a', '~', 'o' }; static const symbol s_6_42[5] = { 'e', 'r', 'a', '~', 'o' }; static const symbol s_6_43[5] = { 'i', 'r', 'a', '~', 'o' }; static const symbol s_6_44[2] = { 'a', 'r' }; static const symbol s_6_45[2] = { 'e', 'r' }; static const symbol s_6_46[2] = { 'i', 'r' }; static const symbol s_6_47[2] = { 'a', 's' }; static const symbol s_6_48[4] = { 'a', 'd', 'a', 's' }; static const symbol s_6_49[4] = { 'i', 'd', 'a', 's' }; static const symbol s_6_50[3] = { 'i', 'a', 's' }; static const symbol s_6_51[5] = { 'a', 'r', 'i', 'a', 's' }; static const symbol s_6_52[5] = { 'e', 'r', 'i', 'a', 's' }; static const symbol s_6_53[5] = { 'i', 'r', 'i', 'a', 's' }; static const symbol s_6_54[4] = { 'a', 'r', 'a', 's' }; static const symbol s_6_55[4] = { 'e', 'r', 'a', 's' }; static const symbol s_6_56[4] = { 'i', 'r', 'a', 's' }; static const symbol s_6_57[4] = { 'a', 'v', 'a', 's' }; static const symbol s_6_58[2] = { 'e', 's' }; static const symbol s_6_59[5] = { 'a', 'r', 'd', 'e', 's' }; static const symbol s_6_60[5] = { 'e', 'r', 'd', 'e', 's' }; static const symbol s_6_61[5] = { 'i', 'r', 'd', 'e', 's' }; static const symbol s_6_62[4] = { 'a', 'r', 'e', 's' }; static const symbol s_6_63[4] = { 'e', 'r', 'e', 's' }; static const symbol s_6_64[4] = { 'i', 'r', 'e', 's' }; static const symbol s_6_65[5] = { 'a', 's', 's', 'e', 's' }; static const symbol s_6_66[5] = { 'e', 's', 's', 'e', 's' }; static const symbol s_6_67[5] = { 'i', 's', 's', 'e', 's' }; static const symbol s_6_68[5] = { 'a', 's', 't', 'e', 's' }; static const symbol s_6_69[5] = { 'e', 's', 't', 'e', 's' }; static const symbol s_6_70[5] = { 'i', 's', 't', 'e', 's' }; static const symbol s_6_71[2] = { 'i', 's' }; static const symbol s_6_72[3] = { 'a', 'i', 's' }; static const symbol s_6_73[3] = { 'e', 'i', 's' }; static const symbol s_6_74[5] = { 'a', 'r', 'e', 'i', 's' }; static const symbol s_6_75[5] = { 'e', 'r', 'e', 'i', 's' }; static const symbol s_6_76[5] = { 'i', 'r', 'e', 'i', 's' }; static const symbol s_6_77[6] = { 0xC3, 0xA1, 'r', 'e', 'i', 's' }; static const symbol s_6_78[6] = { 0xC3, 0xA9, 'r', 'e', 'i', 's' }; static const symbol s_6_79[6] = { 0xC3, 0xAD, 'r', 'e', 'i', 's' }; static const symbol s_6_80[7] = { 0xC3, 0xA1, 's', 's', 'e', 'i', 's' }; static const symbol s_6_81[7] = { 0xC3, 0xA9, 's', 's', 'e', 'i', 's' }; static const symbol s_6_82[7] = { 0xC3, 0xAD, 's', 's', 'e', 'i', 's' }; static const symbol s_6_83[6] = { 0xC3, 0xA1, 'v', 'e', 'i', 's' }; static const symbol s_6_84[5] = { 0xC3, 0xAD, 'e', 'i', 's' }; static const symbol s_6_85[7] = { 'a', 'r', 0xC3, 0xAD, 'e', 'i', 's' }; static const symbol s_6_86[7] = { 'e', 'r', 0xC3, 0xAD, 'e', 'i', 's' }; static const symbol s_6_87[7] = { 'i', 'r', 0xC3, 0xAD, 'e', 'i', 's' }; static const symbol s_6_88[4] = { 'a', 'd', 'o', 's' }; static const symbol s_6_89[4] = { 'i', 'd', 'o', 's' }; static const symbol s_6_90[4] = { 'a', 'm', 'o', 's' }; static const symbol s_6_91[7] = { 0xC3, 0xA1, 'r', 'a', 'm', 'o', 's' }; static const symbol s_6_92[7] = { 0xC3, 0xA9, 'r', 'a', 'm', 'o', 's' }; static const symbol s_6_93[7] = { 0xC3, 0xAD, 'r', 'a', 'm', 'o', 's' }; static const symbol s_6_94[7] = { 0xC3, 0xA1, 'v', 'a', 'm', 'o', 's' }; static const symbol s_6_95[6] = { 0xC3, 0xAD, 'a', 'm', 'o', 's' }; static const symbol s_6_96[8] = { 'a', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; static const symbol s_6_97[8] = { 'e', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; static const symbol s_6_98[8] = { 'i', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; static const symbol s_6_99[4] = { 'e', 'm', 'o', 's' }; static const symbol s_6_100[6] = { 'a', 'r', 'e', 'm', 'o', 's' }; static const symbol s_6_101[6] = { 'e', 'r', 'e', 'm', 'o', 's' }; static const symbol s_6_102[6] = { 'i', 'r', 'e', 'm', 'o', 's' }; static const symbol s_6_103[8] = { 0xC3, 0xA1, 's', 's', 'e', 'm', 'o', 's' }; static const symbol s_6_104[8] = { 0xC3, 0xAA, 's', 's', 'e', 'm', 'o', 's' }; static const symbol s_6_105[8] = { 0xC3, 0xAD, 's', 's', 'e', 'm', 'o', 's' }; static const symbol s_6_106[4] = { 'i', 'm', 'o', 's' }; static const symbol s_6_107[5] = { 'a', 'r', 'm', 'o', 's' }; static const symbol s_6_108[5] = { 'e', 'r', 'm', 'o', 's' }; static const symbol s_6_109[5] = { 'i', 'r', 'm', 'o', 's' }; static const symbol s_6_110[5] = { 0xC3, 0xA1, 'm', 'o', 's' }; static const symbol s_6_111[5] = { 'a', 'r', 0xC3, 0xA1, 's' }; static const symbol s_6_112[5] = { 'e', 'r', 0xC3, 0xA1, 's' }; static const symbol s_6_113[5] = { 'i', 'r', 0xC3, 0xA1, 's' }; static const symbol s_6_114[2] = { 'e', 'u' }; static const symbol s_6_115[2] = { 'i', 'u' }; static const symbol s_6_116[2] = { 'o', 'u' }; static const symbol s_6_117[4] = { 'a', 'r', 0xC3, 0xA1 }; static const symbol s_6_118[4] = { 'e', 'r', 0xC3, 0xA1 }; static const symbol s_6_119[4] = { 'i', 'r', 0xC3, 0xA1 }; static const struct among a_6[120] = { /* 0 */ { 3, s_6_0, -1, 1, 0}, /* 1 */ { 3, s_6_1, -1, 1, 0}, /* 2 */ { 2, s_6_2, -1, 1, 0}, /* 3 */ { 4, s_6_3, 2, 1, 0}, /* 4 */ { 4, s_6_4, 2, 1, 0}, /* 5 */ { 4, s_6_5, 2, 1, 0}, /* 6 */ { 3, s_6_6, -1, 1, 0}, /* 7 */ { 3, s_6_7, -1, 1, 0}, /* 8 */ { 3, s_6_8, -1, 1, 0}, /* 9 */ { 3, s_6_9, -1, 1, 0}, /* 10 */ { 4, s_6_10, -1, 1, 0}, /* 11 */ { 4, s_6_11, -1, 1, 0}, /* 12 */ { 4, s_6_12, -1, 1, 0}, /* 13 */ { 4, s_6_13, -1, 1, 0}, /* 14 */ { 4, s_6_14, -1, 1, 0}, /* 15 */ { 4, s_6_15, -1, 1, 0}, /* 16 */ { 2, s_6_16, -1, 1, 0}, /* 17 */ { 4, s_6_17, 16, 1, 0}, /* 18 */ { 4, s_6_18, 16, 1, 0}, /* 19 */ { 4, s_6_19, 16, 1, 0}, /* 20 */ { 2, s_6_20, -1, 1, 0}, /* 21 */ { 3, s_6_21, 20, 1, 0}, /* 22 */ { 5, s_6_22, 21, 1, 0}, /* 23 */ { 5, s_6_23, 21, 1, 0}, /* 24 */ { 5, s_6_24, 21, 1, 0}, /* 25 */ { 4, s_6_25, 20, 1, 0}, /* 26 */ { 4, s_6_26, 20, 1, 0}, /* 27 */ { 4, s_6_27, 20, 1, 0}, /* 28 */ { 4, s_6_28, 20, 1, 0}, /* 29 */ { 2, s_6_29, -1, 1, 0}, /* 30 */ { 4, s_6_30, 29, 1, 0}, /* 31 */ { 4, s_6_31, 29, 1, 0}, /* 32 */ { 4, s_6_32, 29, 1, 0}, /* 33 */ { 5, s_6_33, 29, 1, 0}, /* 34 */ { 5, s_6_34, 29, 1, 0}, /* 35 */ { 5, s_6_35, 29, 1, 0}, /* 36 */ { 3, s_6_36, -1, 1, 0}, /* 37 */ { 3, s_6_37, -1, 1, 0}, /* 38 */ { 4, s_6_38, -1, 1, 0}, /* 39 */ { 4, s_6_39, -1, 1, 0}, /* 40 */ { 4, s_6_40, -1, 1, 0}, /* 41 */ { 5, s_6_41, -1, 1, 0}, /* 42 */ { 5, s_6_42, -1, 1, 0}, /* 43 */ { 5, s_6_43, -1, 1, 0}, /* 44 */ { 2, s_6_44, -1, 1, 0}, /* 45 */ { 2, s_6_45, -1, 1, 0}, /* 46 */ { 2, s_6_46, -1, 1, 0}, /* 47 */ { 2, s_6_47, -1, 1, 0}, /* 48 */ { 4, s_6_48, 47, 1, 0}, /* 49 */ { 4, s_6_49, 47, 1, 0}, /* 50 */ { 3, s_6_50, 47, 1, 0}, /* 51 */ { 5, s_6_51, 50, 1, 0}, /* 52 */ { 5, s_6_52, 50, 1, 0}, /* 53 */ { 5, s_6_53, 50, 1, 0}, /* 54 */ { 4, s_6_54, 47, 1, 0}, /* 55 */ { 4, s_6_55, 47, 1, 0}, /* 56 */ { 4, s_6_56, 47, 1, 0}, /* 57 */ { 4, s_6_57, 47, 1, 0}, /* 58 */ { 2, s_6_58, -1, 1, 0}, /* 59 */ { 5, s_6_59, 58, 1, 0}, /* 60 */ { 5, s_6_60, 58, 1, 0}, /* 61 */ { 5, s_6_61, 58, 1, 0}, /* 62 */ { 4, s_6_62, 58, 1, 0}, /* 63 */ { 4, s_6_63, 58, 1, 0}, /* 64 */ { 4, s_6_64, 58, 1, 0}, /* 65 */ { 5, s_6_65, 58, 1, 0}, /* 66 */ { 5, s_6_66, 58, 1, 0}, /* 67 */ { 5, s_6_67, 58, 1, 0}, /* 68 */ { 5, s_6_68, 58, 1, 0}, /* 69 */ { 5, s_6_69, 58, 1, 0}, /* 70 */ { 5, s_6_70, 58, 1, 0}, /* 71 */ { 2, s_6_71, -1, 1, 0}, /* 72 */ { 3, s_6_72, 71, 1, 0}, /* 73 */ { 3, s_6_73, 71, 1, 0}, /* 74 */ { 5, s_6_74, 73, 1, 0}, /* 75 */ { 5, s_6_75, 73, 1, 0}, /* 76 */ { 5, s_6_76, 73, 1, 0}, /* 77 */ { 6, s_6_77, 73, 1, 0}, /* 78 */ { 6, s_6_78, 73, 1, 0}, /* 79 */ { 6, s_6_79, 73, 1, 0}, /* 80 */ { 7, s_6_80, 73, 1, 0}, /* 81 */ { 7, s_6_81, 73, 1, 0}, /* 82 */ { 7, s_6_82, 73, 1, 0}, /* 83 */ { 6, s_6_83, 73, 1, 0}, /* 84 */ { 5, s_6_84, 73, 1, 0}, /* 85 */ { 7, s_6_85, 84, 1, 0}, /* 86 */ { 7, s_6_86, 84, 1, 0}, /* 87 */ { 7, s_6_87, 84, 1, 0}, /* 88 */ { 4, s_6_88, -1, 1, 0}, /* 89 */ { 4, s_6_89, -1, 1, 0}, /* 90 */ { 4, s_6_90, -1, 1, 0}, /* 91 */ { 7, s_6_91, 90, 1, 0}, /* 92 */ { 7, s_6_92, 90, 1, 0}, /* 93 */ { 7, s_6_93, 90, 1, 0}, /* 94 */ { 7, s_6_94, 90, 1, 0}, /* 95 */ { 6, s_6_95, 90, 1, 0}, /* 96 */ { 8, s_6_96, 95, 1, 0}, /* 97 */ { 8, s_6_97, 95, 1, 0}, /* 98 */ { 8, s_6_98, 95, 1, 0}, /* 99 */ { 4, s_6_99, -1, 1, 0}, /*100 */ { 6, s_6_100, 99, 1, 0}, /*101 */ { 6, s_6_101, 99, 1, 0}, /*102 */ { 6, s_6_102, 99, 1, 0}, /*103 */ { 8, s_6_103, 99, 1, 0}, /*104 */ { 8, s_6_104, 99, 1, 0}, /*105 */ { 8, s_6_105, 99, 1, 0}, /*106 */ { 4, s_6_106, -1, 1, 0}, /*107 */ { 5, s_6_107, -1, 1, 0}, /*108 */ { 5, s_6_108, -1, 1, 0}, /*109 */ { 5, s_6_109, -1, 1, 0}, /*110 */ { 5, s_6_110, -1, 1, 0}, /*111 */ { 5, s_6_111, -1, 1, 0}, /*112 */ { 5, s_6_112, -1, 1, 0}, /*113 */ { 5, s_6_113, -1, 1, 0}, /*114 */ { 2, s_6_114, -1, 1, 0}, /*115 */ { 2, s_6_115, -1, 1, 0}, /*116 */ { 2, s_6_116, -1, 1, 0}, /*117 */ { 4, s_6_117, -1, 1, 0}, /*118 */ { 4, s_6_118, -1, 1, 0}, /*119 */ { 4, s_6_119, -1, 1, 0} }; static const symbol s_7_0[1] = { 'a' }; static const symbol s_7_1[1] = { 'i' }; static const symbol s_7_2[1] = { 'o' }; static const symbol s_7_3[2] = { 'o', 's' }; static const symbol s_7_4[2] = { 0xC3, 0xA1 }; static const symbol s_7_5[2] = { 0xC3, 0xAD }; static const symbol s_7_6[2] = { 0xC3, 0xB3 }; static const struct among a_7[7] = { /* 0 */ { 1, s_7_0, -1, 1, 0}, /* 1 */ { 1, s_7_1, -1, 1, 0}, /* 2 */ { 1, s_7_2, -1, 1, 0}, /* 3 */ { 2, s_7_3, -1, 1, 0}, /* 4 */ { 2, s_7_4, -1, 1, 0}, /* 5 */ { 2, s_7_5, -1, 1, 0}, /* 6 */ { 2, s_7_6, -1, 1, 0} }; static const symbol s_8_0[1] = { 'e' }; static const symbol s_8_1[2] = { 0xC3, 0xA7 }; static const symbol s_8_2[2] = { 0xC3, 0xA9 }; static const symbol s_8_3[2] = { 0xC3, 0xAA }; static const struct among a_8[4] = { /* 0 */ { 1, s_8_0, -1, 1, 0}, /* 1 */ { 2, s_8_1, -1, 2, 0}, /* 2 */ { 2, s_8_2, -1, 1, 0}, /* 3 */ { 2, s_8_3, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 19, 12, 2 }; static const symbol s_0[] = { 'a', '~' }; static const symbol s_1[] = { 'o', '~' }; static const symbol s_2[] = { 0xC3, 0xA3 }; static const symbol s_3[] = { 0xC3, 0xB5 }; static const symbol s_4[] = { 'l', 'o', 'g' }; static const symbol s_5[] = { 'u' }; static const symbol s_6[] = { 'e', 'n', 't', 'e' }; static const symbol s_7[] = { 'a', 't' }; static const symbol s_8[] = { 'a', 't' }; static const symbol s_9[] = { 'e' }; static const symbol s_10[] = { 'i', 'r' }; static const symbol s_11[] = { 'u' }; static const symbol s_12[] = { 'g' }; static const symbol s_13[] = { 'i' }; static const symbol s_14[] = { 'c' }; static const symbol s_15[] = { 'c' }; static const symbol s_16[] = { 'i' }; static const symbol s_17[] = { 'c' }; static int r_prelude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 36 */ int c1 = z->c; z->bra = z->c; /* [, line 37 */ if (z->c + 1 >= z->l || (z->p[z->c + 1] != 163 && z->p[z->c + 1] != 181)) among_var = 3; else among_var = find_among(z, a_0, 3); /* substring, line 37 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 37 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 2, s_0); /* <-, line 38 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 2, s_1); /* <-, line 39 */ if (ret < 0) return ret; } break; case 3: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 40 */ } break; } continue; lab0: z->c = c1; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 50 */ { int c2 = z->c; /* or, line 52 */ if (in_grouping_U(z, g_v, 97, 250, 0)) goto lab2; { int c3 = z->c; /* or, line 51 */ if (out_grouping_U(z, g_v, 97, 250, 0)) goto lab4; { /* gopast */ /* grouping v, line 51 */ int ret = out_grouping_U(z, g_v, 97, 250, 1); if (ret < 0) goto lab4; z->c += ret; } goto lab3; lab4: z->c = c3; if (in_grouping_U(z, g_v, 97, 250, 0)) goto lab2; { /* gopast */ /* non v, line 51 */ int ret = in_grouping_U(z, g_v, 97, 250, 1); if (ret < 0) goto lab2; z->c += ret; } } lab3: goto lab1; lab2: z->c = c2; if (out_grouping_U(z, g_v, 97, 250, 0)) goto lab0; { int c4 = z->c; /* or, line 53 */ if (out_grouping_U(z, g_v, 97, 250, 0)) goto lab6; { /* gopast */ /* grouping v, line 53 */ int ret = out_grouping_U(z, g_v, 97, 250, 1); if (ret < 0) goto lab6; z->c += ret; } goto lab5; lab6: z->c = c4; if (in_grouping_U(z, g_v, 97, 250, 0)) goto lab0; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 53 */ } } lab5: ; } lab1: z->I[0] = z->c; /* setmark pV, line 54 */ lab0: z->c = c1; } { int c5 = z->c; /* do, line 56 */ { /* gopast */ /* grouping v, line 57 */ int ret = out_grouping_U(z, g_v, 97, 250, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 57 */ int ret = in_grouping_U(z, g_v, 97, 250, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 57 */ { /* gopast */ /* grouping v, line 58 */ int ret = out_grouping_U(z, g_v, 97, 250, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 58 */ int ret = in_grouping_U(z, g_v, 97, 250, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 58 */ lab7: z->c = c5; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 62 */ int c1 = z->c; z->bra = z->c; /* [, line 63 */ if (z->c + 1 >= z->l || z->p[z->c + 1] != 126) among_var = 3; else among_var = find_among(z, a_1, 3); /* substring, line 63 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 63 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 2, s_2); /* <-, line 64 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 2, s_3); /* <-, line 65 */ if (ret < 0) return ret; } break; case 3: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 66 */ } break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 77 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((839714 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_5, 45); /* substring, line 77 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 77 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 93 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 93 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 98 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_4); /* <-, line 98 */ if (ret < 0) return ret; } break; case 3: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 102 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_5); /* <-, line 102 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 106 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 4, s_6); /* <-, line 106 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 110 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 110 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 111 */ z->ket = z->c; /* [, line 112 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718616 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab0; } among_var = find_among_b(z, a_2, 4); /* substring, line 112 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 112 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 112 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 112 */ if (ret < 0) return ret; } switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab0; } case 1: z->ket = z->c; /* [, line 113 */ if (!(eq_s_b(z, 2, s_7))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 113 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 113 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 113 */ if (ret < 0) return ret; } break; } lab0: ; } break; case 6: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 122 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 122 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 123 */ z->ket = z->c; /* [, line 124 */ if (z->c - 3 <= z->lb || (z->p[z->c - 1] != 101 && z->p[z->c - 1] != 108)) { z->c = z->l - m_keep; goto lab1; } among_var = find_among_b(z, a_3, 3); /* substring, line 124 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 124 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab1; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 127 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 127 */ if (ret < 0) return ret; } break; } lab1: ; } break; case 7: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 134 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 134 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 135 */ z->ket = z->c; /* [, line 136 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab2; } among_var = find_among_b(z, a_4, 3); /* substring, line 136 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } z->bra = z->c; /* ], line 136 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab2; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 139 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 139 */ if (ret < 0) return ret; } break; } lab2: ; } break; case 8: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 146 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 146 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 147 */ z->ket = z->c; /* [, line 148 */ if (!(eq_s_b(z, 2, s_8))) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 148 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 148 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 148 */ if (ret < 0) return ret; } lab3: ; } break; case 9: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 153 */ if (ret < 0) return ret; } if (!(eq_s_b(z, 1, s_9))) return 0; { int ret = slice_from_s(z, 2, s_10); /* <-, line 154 */ if (ret < 0) return ret; } break; } return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 159 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 159 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 160 */ among_var = find_among_b(z, a_6, 120); /* substring, line 160 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 160 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = slice_del(z); /* delete, line 179 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_residual_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 184 */ among_var = find_among_b(z, a_7, 7); /* substring, line 184 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 184 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 187 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 187 */ if (ret < 0) return ret; } break; } return 1; } static int r_residual_form(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 192 */ among_var = find_among_b(z, a_8, 4); /* substring, line 192 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 192 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 194 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 194 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 194 */ { int m1 = z->l - z->c; (void)m1; /* or, line 194 */ if (!(eq_s_b(z, 1, s_11))) goto lab1; z->bra = z->c; /* ], line 194 */ { int m_test = z->l - z->c; /* test, line 194 */ if (!(eq_s_b(z, 1, s_12))) goto lab1; z->c = z->l - m_test; } goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_13))) return 0; z->bra = z->c; /* ], line 195 */ { int m_test = z->l - z->c; /* test, line 195 */ if (!(eq_s_b(z, 1, s_14))) return 0; z->c = z->l - m_test; } } lab0: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 195 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 195 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_15); /* <-, line 196 */ if (ret < 0) return ret; } break; } return 1; } extern int portuguese_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 202 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 202 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 203 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 203 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 204 */ { int m3 = z->l - z->c; (void)m3; /* do, line 205 */ { int m4 = z->l - z->c; (void)m4; /* or, line 209 */ { int m5 = z->l - z->c; (void)m5; /* and, line 207 */ { int m6 = z->l - z->c; (void)m6; /* or, line 206 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab6; /* call standard_suffix, line 206 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m6; { int ret = r_verb_suffix(z); if (ret == 0) goto lab4; /* call verb_suffix, line 206 */ if (ret < 0) return ret; } } lab5: z->c = z->l - m5; { int m7 = z->l - z->c; (void)m7; /* do, line 207 */ z->ket = z->c; /* [, line 207 */ if (!(eq_s_b(z, 1, s_16))) goto lab7; z->bra = z->c; /* ], line 207 */ { int m_test = z->l - z->c; /* test, line 207 */ if (!(eq_s_b(z, 1, s_17))) goto lab7; z->c = z->l - m_test; } { int ret = r_RV(z); if (ret == 0) goto lab7; /* call RV, line 207 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 207 */ if (ret < 0) return ret; } lab7: z->c = z->l - m7; } } goto lab3; lab4: z->c = z->l - m4; { int ret = r_residual_suffix(z); if (ret == 0) goto lab2; /* call residual_suffix, line 209 */ if (ret < 0) return ret; } } lab3: lab2: z->c = z->l - m3; } { int m8 = z->l - z->c; (void)m8; /* do, line 211 */ { int ret = r_residual_form(z); if (ret == 0) goto lab8; /* call residual_form, line 211 */ if (ret < 0) return ret; } lab8: z->c = z->l - m8; } z->c = z->lb; { int c9 = z->c; /* do, line 213 */ { int ret = r_postlude(z); if (ret == 0) goto lab9; /* call postlude, line 213 */ if (ret < 0) return ret; } lab9: z->c = c9; } return 1; } extern struct SN_env * portuguese_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); } extern void portuguese_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_portuguese.h000066400000000000000000000004771456444476200315160ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* portuguese_UTF_8_create_env(void); extern void portuguese_UTF_8_close_env(struct SN_env* z); extern int portuguese_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_romanian.c000066400000000000000000001117521456444476200311120ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int romanian_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_vowel_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_combo_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_step_0(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_prelude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * romanian_UTF_8_create_env(void); extern void romanian_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[1] = { 'I' }; static const symbol s_0_2[1] = { 'U' }; static const struct among a_0[3] = { /* 0 */ { 0, 0, -1, 3, 0}, /* 1 */ { 1, s_0_1, 0, 1, 0}, /* 2 */ { 1, s_0_2, 0, 2, 0} }; static const symbol s_1_0[2] = { 'e', 'a' }; static const symbol s_1_1[5] = { 'a', 0xC5, 0xA3, 'i', 'a' }; static const symbol s_1_2[3] = { 'a', 'u', 'a' }; static const symbol s_1_3[3] = { 'i', 'u', 'a' }; static const symbol s_1_4[5] = { 'a', 0xC5, 0xA3, 'i', 'e' }; static const symbol s_1_5[3] = { 'e', 'l', 'e' }; static const symbol s_1_6[3] = { 'i', 'l', 'e' }; static const symbol s_1_7[4] = { 'i', 'i', 'l', 'e' }; static const symbol s_1_8[3] = { 'i', 'e', 'i' }; static const symbol s_1_9[4] = { 'a', 't', 'e', 'i' }; static const symbol s_1_10[2] = { 'i', 'i' }; static const symbol s_1_11[4] = { 'u', 'l', 'u', 'i' }; static const symbol s_1_12[2] = { 'u', 'l' }; static const symbol s_1_13[4] = { 'e', 'l', 'o', 'r' }; static const symbol s_1_14[4] = { 'i', 'l', 'o', 'r' }; static const symbol s_1_15[5] = { 'i', 'i', 'l', 'o', 'r' }; static const struct among a_1[16] = { /* 0 */ { 2, s_1_0, -1, 3, 0}, /* 1 */ { 5, s_1_1, -1, 7, 0}, /* 2 */ { 3, s_1_2, -1, 2, 0}, /* 3 */ { 3, s_1_3, -1, 4, 0}, /* 4 */ { 5, s_1_4, -1, 7, 0}, /* 5 */ { 3, s_1_5, -1, 3, 0}, /* 6 */ { 3, s_1_6, -1, 5, 0}, /* 7 */ { 4, s_1_7, 6, 4, 0}, /* 8 */ { 3, s_1_8, -1, 4, 0}, /* 9 */ { 4, s_1_9, -1, 6, 0}, /* 10 */ { 2, s_1_10, -1, 4, 0}, /* 11 */ { 4, s_1_11, -1, 1, 0}, /* 12 */ { 2, s_1_12, -1, 1, 0}, /* 13 */ { 4, s_1_13, -1, 3, 0}, /* 14 */ { 4, s_1_14, -1, 4, 0}, /* 15 */ { 5, s_1_15, 14, 4, 0} }; static const symbol s_2_0[5] = { 'i', 'c', 'a', 'l', 'a' }; static const symbol s_2_1[5] = { 'i', 'c', 'i', 'v', 'a' }; static const symbol s_2_2[5] = { 'a', 't', 'i', 'v', 'a' }; static const symbol s_2_3[5] = { 'i', 't', 'i', 'v', 'a' }; static const symbol s_2_4[5] = { 'i', 'c', 'a', 'l', 'e' }; static const symbol s_2_5[7] = { 'a', 0xC5, 0xA3, 'i', 'u', 'n', 'e' }; static const symbol s_2_6[7] = { 'i', 0xC5, 0xA3, 'i', 'u', 'n', 'e' }; static const symbol s_2_7[6] = { 'a', 't', 'o', 'a', 'r', 'e' }; static const symbol s_2_8[6] = { 'i', 't', 'o', 'a', 'r', 'e' }; static const symbol s_2_9[7] = { 0xC4, 0x83, 't', 'o', 'a', 'r', 'e' }; static const symbol s_2_10[7] = { 'i', 'c', 'i', 't', 'a', 't', 'e' }; static const symbol s_2_11[9] = { 'a', 'b', 'i', 'l', 'i', 't', 'a', 't', 'e' }; static const symbol s_2_12[9] = { 'i', 'b', 'i', 'l', 'i', 't', 'a', 't', 'e' }; static const symbol s_2_13[7] = { 'i', 'v', 'i', 't', 'a', 't', 'e' }; static const symbol s_2_14[5] = { 'i', 'c', 'i', 'v', 'e' }; static const symbol s_2_15[5] = { 'a', 't', 'i', 'v', 'e' }; static const symbol s_2_16[5] = { 'i', 't', 'i', 'v', 'e' }; static const symbol s_2_17[5] = { 'i', 'c', 'a', 'l', 'i' }; static const symbol s_2_18[5] = { 'a', 't', 'o', 'r', 'i' }; static const symbol s_2_19[7] = { 'i', 'c', 'a', 't', 'o', 'r', 'i' }; static const symbol s_2_20[5] = { 'i', 't', 'o', 'r', 'i' }; static const symbol s_2_21[6] = { 0xC4, 0x83, 't', 'o', 'r', 'i' }; static const symbol s_2_22[7] = { 'i', 'c', 'i', 't', 'a', 't', 'i' }; static const symbol s_2_23[9] = { 'a', 'b', 'i', 'l', 'i', 't', 'a', 't', 'i' }; static const symbol s_2_24[7] = { 'i', 'v', 'i', 't', 'a', 't', 'i' }; static const symbol s_2_25[5] = { 'i', 'c', 'i', 'v', 'i' }; static const symbol s_2_26[5] = { 'a', 't', 'i', 'v', 'i' }; static const symbol s_2_27[5] = { 'i', 't', 'i', 'v', 'i' }; static const symbol s_2_28[7] = { 'i', 'c', 'i', 't', 0xC4, 0x83, 'i' }; static const symbol s_2_29[9] = { 'a', 'b', 'i', 'l', 'i', 't', 0xC4, 0x83, 'i' }; static const symbol s_2_30[7] = { 'i', 'v', 'i', 't', 0xC4, 0x83, 'i' }; static const symbol s_2_31[9] = { 'i', 'c', 'i', 't', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_2_32[11] = { 'a', 'b', 'i', 'l', 'i', 't', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_2_33[9] = { 'i', 'v', 'i', 't', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_2_34[4] = { 'i', 'c', 'a', 'l' }; static const symbol s_2_35[4] = { 'a', 't', 'o', 'r' }; static const symbol s_2_36[6] = { 'i', 'c', 'a', 't', 'o', 'r' }; static const symbol s_2_37[4] = { 'i', 't', 'o', 'r' }; static const symbol s_2_38[5] = { 0xC4, 0x83, 't', 'o', 'r' }; static const symbol s_2_39[4] = { 'i', 'c', 'i', 'v' }; static const symbol s_2_40[4] = { 'a', 't', 'i', 'v' }; static const symbol s_2_41[4] = { 'i', 't', 'i', 'v' }; static const symbol s_2_42[6] = { 'i', 'c', 'a', 'l', 0xC4, 0x83 }; static const symbol s_2_43[6] = { 'i', 'c', 'i', 'v', 0xC4, 0x83 }; static const symbol s_2_44[6] = { 'a', 't', 'i', 'v', 0xC4, 0x83 }; static const symbol s_2_45[6] = { 'i', 't', 'i', 'v', 0xC4, 0x83 }; static const struct among a_2[46] = { /* 0 */ { 5, s_2_0, -1, 4, 0}, /* 1 */ { 5, s_2_1, -1, 4, 0}, /* 2 */ { 5, s_2_2, -1, 5, 0}, /* 3 */ { 5, s_2_3, -1, 6, 0}, /* 4 */ { 5, s_2_4, -1, 4, 0}, /* 5 */ { 7, s_2_5, -1, 5, 0}, /* 6 */ { 7, s_2_6, -1, 6, 0}, /* 7 */ { 6, s_2_7, -1, 5, 0}, /* 8 */ { 6, s_2_8, -1, 6, 0}, /* 9 */ { 7, s_2_9, -1, 5, 0}, /* 10 */ { 7, s_2_10, -1, 4, 0}, /* 11 */ { 9, s_2_11, -1, 1, 0}, /* 12 */ { 9, s_2_12, -1, 2, 0}, /* 13 */ { 7, s_2_13, -1, 3, 0}, /* 14 */ { 5, s_2_14, -1, 4, 0}, /* 15 */ { 5, s_2_15, -1, 5, 0}, /* 16 */ { 5, s_2_16, -1, 6, 0}, /* 17 */ { 5, s_2_17, -1, 4, 0}, /* 18 */ { 5, s_2_18, -1, 5, 0}, /* 19 */ { 7, s_2_19, 18, 4, 0}, /* 20 */ { 5, s_2_20, -1, 6, 0}, /* 21 */ { 6, s_2_21, -1, 5, 0}, /* 22 */ { 7, s_2_22, -1, 4, 0}, /* 23 */ { 9, s_2_23, -1, 1, 0}, /* 24 */ { 7, s_2_24, -1, 3, 0}, /* 25 */ { 5, s_2_25, -1, 4, 0}, /* 26 */ { 5, s_2_26, -1, 5, 0}, /* 27 */ { 5, s_2_27, -1, 6, 0}, /* 28 */ { 7, s_2_28, -1, 4, 0}, /* 29 */ { 9, s_2_29, -1, 1, 0}, /* 30 */ { 7, s_2_30, -1, 3, 0}, /* 31 */ { 9, s_2_31, -1, 4, 0}, /* 32 */ { 11, s_2_32, -1, 1, 0}, /* 33 */ { 9, s_2_33, -1, 3, 0}, /* 34 */ { 4, s_2_34, -1, 4, 0}, /* 35 */ { 4, s_2_35, -1, 5, 0}, /* 36 */ { 6, s_2_36, 35, 4, 0}, /* 37 */ { 4, s_2_37, -1, 6, 0}, /* 38 */ { 5, s_2_38, -1, 5, 0}, /* 39 */ { 4, s_2_39, -1, 4, 0}, /* 40 */ { 4, s_2_40, -1, 5, 0}, /* 41 */ { 4, s_2_41, -1, 6, 0}, /* 42 */ { 6, s_2_42, -1, 4, 0}, /* 43 */ { 6, s_2_43, -1, 4, 0}, /* 44 */ { 6, s_2_44, -1, 5, 0}, /* 45 */ { 6, s_2_45, -1, 6, 0} }; static const symbol s_3_0[3] = { 'i', 'c', 'a' }; static const symbol s_3_1[5] = { 'a', 'b', 'i', 'l', 'a' }; static const symbol s_3_2[5] = { 'i', 'b', 'i', 'l', 'a' }; static const symbol s_3_3[4] = { 'o', 'a', 's', 'a' }; static const symbol s_3_4[3] = { 'a', 't', 'a' }; static const symbol s_3_5[3] = { 'i', 't', 'a' }; static const symbol s_3_6[4] = { 'a', 'n', 't', 'a' }; static const symbol s_3_7[4] = { 'i', 's', 't', 'a' }; static const symbol s_3_8[3] = { 'u', 't', 'a' }; static const symbol s_3_9[3] = { 'i', 'v', 'a' }; static const symbol s_3_10[2] = { 'i', 'c' }; static const symbol s_3_11[3] = { 'i', 'c', 'e' }; static const symbol s_3_12[5] = { 'a', 'b', 'i', 'l', 'e' }; static const symbol s_3_13[5] = { 'i', 'b', 'i', 'l', 'e' }; static const symbol s_3_14[4] = { 'i', 's', 'm', 'e' }; static const symbol s_3_15[4] = { 'i', 'u', 'n', 'e' }; static const symbol s_3_16[4] = { 'o', 'a', 's', 'e' }; static const symbol s_3_17[3] = { 'a', 't', 'e' }; static const symbol s_3_18[5] = { 'i', 't', 'a', 't', 'e' }; static const symbol s_3_19[3] = { 'i', 't', 'e' }; static const symbol s_3_20[4] = { 'a', 'n', 't', 'e' }; static const symbol s_3_21[4] = { 'i', 's', 't', 'e' }; static const symbol s_3_22[3] = { 'u', 't', 'e' }; static const symbol s_3_23[3] = { 'i', 'v', 'e' }; static const symbol s_3_24[3] = { 'i', 'c', 'i' }; static const symbol s_3_25[5] = { 'a', 'b', 'i', 'l', 'i' }; static const symbol s_3_26[5] = { 'i', 'b', 'i', 'l', 'i' }; static const symbol s_3_27[4] = { 'i', 'u', 'n', 'i' }; static const symbol s_3_28[5] = { 'a', 't', 'o', 'r', 'i' }; static const symbol s_3_29[3] = { 'o', 's', 'i' }; static const symbol s_3_30[3] = { 'a', 't', 'i' }; static const symbol s_3_31[5] = { 'i', 't', 'a', 't', 'i' }; static const symbol s_3_32[3] = { 'i', 't', 'i' }; static const symbol s_3_33[4] = { 'a', 'n', 't', 'i' }; static const symbol s_3_34[4] = { 'i', 's', 't', 'i' }; static const symbol s_3_35[3] = { 'u', 't', 'i' }; static const symbol s_3_36[5] = { 'i', 0xC5, 0x9F, 't', 'i' }; static const symbol s_3_37[3] = { 'i', 'v', 'i' }; static const symbol s_3_38[5] = { 'i', 't', 0xC4, 0x83, 'i' }; static const symbol s_3_39[4] = { 'o', 0xC5, 0x9F, 'i' }; static const symbol s_3_40[7] = { 'i', 't', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_3_41[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_3_42[4] = { 'i', 'b', 'i', 'l' }; static const symbol s_3_43[3] = { 'i', 's', 'm' }; static const symbol s_3_44[4] = { 'a', 't', 'o', 'r' }; static const symbol s_3_45[2] = { 'o', 's' }; static const symbol s_3_46[2] = { 'a', 't' }; static const symbol s_3_47[2] = { 'i', 't' }; static const symbol s_3_48[3] = { 'a', 'n', 't' }; static const symbol s_3_49[3] = { 'i', 's', 't' }; static const symbol s_3_50[2] = { 'u', 't' }; static const symbol s_3_51[2] = { 'i', 'v' }; static const symbol s_3_52[4] = { 'i', 'c', 0xC4, 0x83 }; static const symbol s_3_53[6] = { 'a', 'b', 'i', 'l', 0xC4, 0x83 }; static const symbol s_3_54[6] = { 'i', 'b', 'i', 'l', 0xC4, 0x83 }; static const symbol s_3_55[5] = { 'o', 'a', 's', 0xC4, 0x83 }; static const symbol s_3_56[4] = { 'a', 't', 0xC4, 0x83 }; static const symbol s_3_57[4] = { 'i', 't', 0xC4, 0x83 }; static const symbol s_3_58[5] = { 'a', 'n', 't', 0xC4, 0x83 }; static const symbol s_3_59[5] = { 'i', 's', 't', 0xC4, 0x83 }; static const symbol s_3_60[4] = { 'u', 't', 0xC4, 0x83 }; static const symbol s_3_61[4] = { 'i', 'v', 0xC4, 0x83 }; static const struct among a_3[62] = { /* 0 */ { 3, s_3_0, -1, 1, 0}, /* 1 */ { 5, s_3_1, -1, 1, 0}, /* 2 */ { 5, s_3_2, -1, 1, 0}, /* 3 */ { 4, s_3_3, -1, 1, 0}, /* 4 */ { 3, s_3_4, -1, 1, 0}, /* 5 */ { 3, s_3_5, -1, 1, 0}, /* 6 */ { 4, s_3_6, -1, 1, 0}, /* 7 */ { 4, s_3_7, -1, 3, 0}, /* 8 */ { 3, s_3_8, -1, 1, 0}, /* 9 */ { 3, s_3_9, -1, 1, 0}, /* 10 */ { 2, s_3_10, -1, 1, 0}, /* 11 */ { 3, s_3_11, -1, 1, 0}, /* 12 */ { 5, s_3_12, -1, 1, 0}, /* 13 */ { 5, s_3_13, -1, 1, 0}, /* 14 */ { 4, s_3_14, -1, 3, 0}, /* 15 */ { 4, s_3_15, -1, 2, 0}, /* 16 */ { 4, s_3_16, -1, 1, 0}, /* 17 */ { 3, s_3_17, -1, 1, 0}, /* 18 */ { 5, s_3_18, 17, 1, 0}, /* 19 */ { 3, s_3_19, -1, 1, 0}, /* 20 */ { 4, s_3_20, -1, 1, 0}, /* 21 */ { 4, s_3_21, -1, 3, 0}, /* 22 */ { 3, s_3_22, -1, 1, 0}, /* 23 */ { 3, s_3_23, -1, 1, 0}, /* 24 */ { 3, s_3_24, -1, 1, 0}, /* 25 */ { 5, s_3_25, -1, 1, 0}, /* 26 */ { 5, s_3_26, -1, 1, 0}, /* 27 */ { 4, s_3_27, -1, 2, 0}, /* 28 */ { 5, s_3_28, -1, 1, 0}, /* 29 */ { 3, s_3_29, -1, 1, 0}, /* 30 */ { 3, s_3_30, -1, 1, 0}, /* 31 */ { 5, s_3_31, 30, 1, 0}, /* 32 */ { 3, s_3_32, -1, 1, 0}, /* 33 */ { 4, s_3_33, -1, 1, 0}, /* 34 */ { 4, s_3_34, -1, 3, 0}, /* 35 */ { 3, s_3_35, -1, 1, 0}, /* 36 */ { 5, s_3_36, -1, 3, 0}, /* 37 */ { 3, s_3_37, -1, 1, 0}, /* 38 */ { 5, s_3_38, -1, 1, 0}, /* 39 */ { 4, s_3_39, -1, 1, 0}, /* 40 */ { 7, s_3_40, -1, 1, 0}, /* 41 */ { 4, s_3_41, -1, 1, 0}, /* 42 */ { 4, s_3_42, -1, 1, 0}, /* 43 */ { 3, s_3_43, -1, 3, 0}, /* 44 */ { 4, s_3_44, -1, 1, 0}, /* 45 */ { 2, s_3_45, -1, 1, 0}, /* 46 */ { 2, s_3_46, -1, 1, 0}, /* 47 */ { 2, s_3_47, -1, 1, 0}, /* 48 */ { 3, s_3_48, -1, 1, 0}, /* 49 */ { 3, s_3_49, -1, 3, 0}, /* 50 */ { 2, s_3_50, -1, 1, 0}, /* 51 */ { 2, s_3_51, -1, 1, 0}, /* 52 */ { 4, s_3_52, -1, 1, 0}, /* 53 */ { 6, s_3_53, -1, 1, 0}, /* 54 */ { 6, s_3_54, -1, 1, 0}, /* 55 */ { 5, s_3_55, -1, 1, 0}, /* 56 */ { 4, s_3_56, -1, 1, 0}, /* 57 */ { 4, s_3_57, -1, 1, 0}, /* 58 */ { 5, s_3_58, -1, 1, 0}, /* 59 */ { 5, s_3_59, -1, 3, 0}, /* 60 */ { 4, s_3_60, -1, 1, 0}, /* 61 */ { 4, s_3_61, -1, 1, 0} }; static const symbol s_4_0[2] = { 'e', 'a' }; static const symbol s_4_1[2] = { 'i', 'a' }; static const symbol s_4_2[3] = { 'e', 's', 'c' }; static const symbol s_4_3[4] = { 0xC4, 0x83, 's', 'c' }; static const symbol s_4_4[3] = { 'i', 'n', 'd' }; static const symbol s_4_5[4] = { 0xC3, 0xA2, 'n', 'd' }; static const symbol s_4_6[3] = { 'a', 'r', 'e' }; static const symbol s_4_7[3] = { 'e', 'r', 'e' }; static const symbol s_4_8[3] = { 'i', 'r', 'e' }; static const symbol s_4_9[4] = { 0xC3, 0xA2, 'r', 'e' }; static const symbol s_4_10[2] = { 's', 'e' }; static const symbol s_4_11[3] = { 'a', 's', 'e' }; static const symbol s_4_12[4] = { 's', 'e', 's', 'e' }; static const symbol s_4_13[3] = { 'i', 's', 'e' }; static const symbol s_4_14[3] = { 'u', 's', 'e' }; static const symbol s_4_15[4] = { 0xC3, 0xA2, 's', 'e' }; static const symbol s_4_16[5] = { 'e', 0xC5, 0x9F, 't', 'e' }; static const symbol s_4_17[6] = { 0xC4, 0x83, 0xC5, 0x9F, 't', 'e' }; static const symbol s_4_18[3] = { 'e', 'z', 'e' }; static const symbol s_4_19[2] = { 'a', 'i' }; static const symbol s_4_20[3] = { 'e', 'a', 'i' }; static const symbol s_4_21[3] = { 'i', 'a', 'i' }; static const symbol s_4_22[3] = { 's', 'e', 'i' }; static const symbol s_4_23[5] = { 'e', 0xC5, 0x9F, 't', 'i' }; static const symbol s_4_24[6] = { 0xC4, 0x83, 0xC5, 0x9F, 't', 'i' }; static const symbol s_4_25[2] = { 'u', 'i' }; static const symbol s_4_26[3] = { 'e', 'z', 'i' }; static const symbol s_4_27[4] = { 'a', 0xC5, 0x9F, 'i' }; static const symbol s_4_28[5] = { 's', 'e', 0xC5, 0x9F, 'i' }; static const symbol s_4_29[6] = { 'a', 's', 'e', 0xC5, 0x9F, 'i' }; static const symbol s_4_30[7] = { 's', 'e', 's', 'e', 0xC5, 0x9F, 'i' }; static const symbol s_4_31[6] = { 'i', 's', 'e', 0xC5, 0x9F, 'i' }; static const symbol s_4_32[6] = { 'u', 's', 'e', 0xC5, 0x9F, 'i' }; static const symbol s_4_33[7] = { 0xC3, 0xA2, 's', 'e', 0xC5, 0x9F, 'i' }; static const symbol s_4_34[4] = { 'i', 0xC5, 0x9F, 'i' }; static const symbol s_4_35[4] = { 'u', 0xC5, 0x9F, 'i' }; static const symbol s_4_36[5] = { 0xC3, 0xA2, 0xC5, 0x9F, 'i' }; static const symbol s_4_37[3] = { 0xC3, 0xA2, 'i' }; static const symbol s_4_38[4] = { 'a', 0xC5, 0xA3, 'i' }; static const symbol s_4_39[5] = { 'e', 'a', 0xC5, 0xA3, 'i' }; static const symbol s_4_40[5] = { 'i', 'a', 0xC5, 0xA3, 'i' }; static const symbol s_4_41[4] = { 'e', 0xC5, 0xA3, 'i' }; static const symbol s_4_42[4] = { 'i', 0xC5, 0xA3, 'i' }; static const symbol s_4_43[7] = { 'a', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_44[8] = { 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_45[9] = { 'a', 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_46[10] = { 's', 'e', 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_47[9] = { 'i', 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_48[9] = { 'u', 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_49[10] = { 0xC3, 0xA2, 's', 'e', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_50[7] = { 'i', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_51[7] = { 'u', 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_52[8] = { 0xC3, 0xA2, 'r', 0xC4, 0x83, 0xC5, 0xA3, 'i' }; static const symbol s_4_53[5] = { 0xC3, 0xA2, 0xC5, 0xA3, 'i' }; static const symbol s_4_54[2] = { 'a', 'm' }; static const symbol s_4_55[3] = { 'e', 'a', 'm' }; static const symbol s_4_56[3] = { 'i', 'a', 'm' }; static const symbol s_4_57[2] = { 'e', 'm' }; static const symbol s_4_58[4] = { 'a', 's', 'e', 'm' }; static const symbol s_4_59[5] = { 's', 'e', 's', 'e', 'm' }; static const symbol s_4_60[4] = { 'i', 's', 'e', 'm' }; static const symbol s_4_61[4] = { 'u', 's', 'e', 'm' }; static const symbol s_4_62[5] = { 0xC3, 0xA2, 's', 'e', 'm' }; static const symbol s_4_63[2] = { 'i', 'm' }; static const symbol s_4_64[3] = { 0xC4, 0x83, 'm' }; static const symbol s_4_65[5] = { 'a', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_66[6] = { 's', 'e', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_67[7] = { 'a', 's', 'e', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_68[8] = { 's', 'e', 's', 'e', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_69[7] = { 'i', 's', 'e', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_70[7] = { 'u', 's', 'e', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_71[8] = { 0xC3, 0xA2, 's', 'e', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_72[5] = { 'i', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_73[5] = { 'u', 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_74[6] = { 0xC3, 0xA2, 'r', 0xC4, 0x83, 'm' }; static const symbol s_4_75[3] = { 0xC3, 0xA2, 'm' }; static const symbol s_4_76[2] = { 'a', 'u' }; static const symbol s_4_77[3] = { 'e', 'a', 'u' }; static const symbol s_4_78[3] = { 'i', 'a', 'u' }; static const symbol s_4_79[4] = { 'i', 'n', 'd', 'u' }; static const symbol s_4_80[5] = { 0xC3, 0xA2, 'n', 'd', 'u' }; static const symbol s_4_81[2] = { 'e', 'z' }; static const symbol s_4_82[6] = { 'e', 'a', 's', 'c', 0xC4, 0x83 }; static const symbol s_4_83[4] = { 'a', 'r', 0xC4, 0x83 }; static const symbol s_4_84[5] = { 's', 'e', 'r', 0xC4, 0x83 }; static const symbol s_4_85[6] = { 'a', 's', 'e', 'r', 0xC4, 0x83 }; static const symbol s_4_86[7] = { 's', 'e', 's', 'e', 'r', 0xC4, 0x83 }; static const symbol s_4_87[6] = { 'i', 's', 'e', 'r', 0xC4, 0x83 }; static const symbol s_4_88[6] = { 'u', 's', 'e', 'r', 0xC4, 0x83 }; static const symbol s_4_89[7] = { 0xC3, 0xA2, 's', 'e', 'r', 0xC4, 0x83 }; static const symbol s_4_90[4] = { 'i', 'r', 0xC4, 0x83 }; static const symbol s_4_91[4] = { 'u', 'r', 0xC4, 0x83 }; static const symbol s_4_92[5] = { 0xC3, 0xA2, 'r', 0xC4, 0x83 }; static const symbol s_4_93[5] = { 'e', 'a', 'z', 0xC4, 0x83 }; static const struct among a_4[94] = { /* 0 */ { 2, s_4_0, -1, 1, 0}, /* 1 */ { 2, s_4_1, -1, 1, 0}, /* 2 */ { 3, s_4_2, -1, 1, 0}, /* 3 */ { 4, s_4_3, -1, 1, 0}, /* 4 */ { 3, s_4_4, -1, 1, 0}, /* 5 */ { 4, s_4_5, -1, 1, 0}, /* 6 */ { 3, s_4_6, -1, 1, 0}, /* 7 */ { 3, s_4_7, -1, 1, 0}, /* 8 */ { 3, s_4_8, -1, 1, 0}, /* 9 */ { 4, s_4_9, -1, 1, 0}, /* 10 */ { 2, s_4_10, -1, 2, 0}, /* 11 */ { 3, s_4_11, 10, 1, 0}, /* 12 */ { 4, s_4_12, 10, 2, 0}, /* 13 */ { 3, s_4_13, 10, 1, 0}, /* 14 */ { 3, s_4_14, 10, 1, 0}, /* 15 */ { 4, s_4_15, 10, 1, 0}, /* 16 */ { 5, s_4_16, -1, 1, 0}, /* 17 */ { 6, s_4_17, -1, 1, 0}, /* 18 */ { 3, s_4_18, -1, 1, 0}, /* 19 */ { 2, s_4_19, -1, 1, 0}, /* 20 */ { 3, s_4_20, 19, 1, 0}, /* 21 */ { 3, s_4_21, 19, 1, 0}, /* 22 */ { 3, s_4_22, -1, 2, 0}, /* 23 */ { 5, s_4_23, -1, 1, 0}, /* 24 */ { 6, s_4_24, -1, 1, 0}, /* 25 */ { 2, s_4_25, -1, 1, 0}, /* 26 */ { 3, s_4_26, -1, 1, 0}, /* 27 */ { 4, s_4_27, -1, 1, 0}, /* 28 */ { 5, s_4_28, -1, 2, 0}, /* 29 */ { 6, s_4_29, 28, 1, 0}, /* 30 */ { 7, s_4_30, 28, 2, 0}, /* 31 */ { 6, s_4_31, 28, 1, 0}, /* 32 */ { 6, s_4_32, 28, 1, 0}, /* 33 */ { 7, s_4_33, 28, 1, 0}, /* 34 */ { 4, s_4_34, -1, 1, 0}, /* 35 */ { 4, s_4_35, -1, 1, 0}, /* 36 */ { 5, s_4_36, -1, 1, 0}, /* 37 */ { 3, s_4_37, -1, 1, 0}, /* 38 */ { 4, s_4_38, -1, 2, 0}, /* 39 */ { 5, s_4_39, 38, 1, 0}, /* 40 */ { 5, s_4_40, 38, 1, 0}, /* 41 */ { 4, s_4_41, -1, 2, 0}, /* 42 */ { 4, s_4_42, -1, 2, 0}, /* 43 */ { 7, s_4_43, -1, 1, 0}, /* 44 */ { 8, s_4_44, -1, 2, 0}, /* 45 */ { 9, s_4_45, 44, 1, 0}, /* 46 */ { 10, s_4_46, 44, 2, 0}, /* 47 */ { 9, s_4_47, 44, 1, 0}, /* 48 */ { 9, s_4_48, 44, 1, 0}, /* 49 */ { 10, s_4_49, 44, 1, 0}, /* 50 */ { 7, s_4_50, -1, 1, 0}, /* 51 */ { 7, s_4_51, -1, 1, 0}, /* 52 */ { 8, s_4_52, -1, 1, 0}, /* 53 */ { 5, s_4_53, -1, 2, 0}, /* 54 */ { 2, s_4_54, -1, 1, 0}, /* 55 */ { 3, s_4_55, 54, 1, 0}, /* 56 */ { 3, s_4_56, 54, 1, 0}, /* 57 */ { 2, s_4_57, -1, 2, 0}, /* 58 */ { 4, s_4_58, 57, 1, 0}, /* 59 */ { 5, s_4_59, 57, 2, 0}, /* 60 */ { 4, s_4_60, 57, 1, 0}, /* 61 */ { 4, s_4_61, 57, 1, 0}, /* 62 */ { 5, s_4_62, 57, 1, 0}, /* 63 */ { 2, s_4_63, -1, 2, 0}, /* 64 */ { 3, s_4_64, -1, 2, 0}, /* 65 */ { 5, s_4_65, 64, 1, 0}, /* 66 */ { 6, s_4_66, 64, 2, 0}, /* 67 */ { 7, s_4_67, 66, 1, 0}, /* 68 */ { 8, s_4_68, 66, 2, 0}, /* 69 */ { 7, s_4_69, 66, 1, 0}, /* 70 */ { 7, s_4_70, 66, 1, 0}, /* 71 */ { 8, s_4_71, 66, 1, 0}, /* 72 */ { 5, s_4_72, 64, 1, 0}, /* 73 */ { 5, s_4_73, 64, 1, 0}, /* 74 */ { 6, s_4_74, 64, 1, 0}, /* 75 */ { 3, s_4_75, -1, 2, 0}, /* 76 */ { 2, s_4_76, -1, 1, 0}, /* 77 */ { 3, s_4_77, 76, 1, 0}, /* 78 */ { 3, s_4_78, 76, 1, 0}, /* 79 */ { 4, s_4_79, -1, 1, 0}, /* 80 */ { 5, s_4_80, -1, 1, 0}, /* 81 */ { 2, s_4_81, -1, 1, 0}, /* 82 */ { 6, s_4_82, -1, 1, 0}, /* 83 */ { 4, s_4_83, -1, 1, 0}, /* 84 */ { 5, s_4_84, -1, 2, 0}, /* 85 */ { 6, s_4_85, 84, 1, 0}, /* 86 */ { 7, s_4_86, 84, 2, 0}, /* 87 */ { 6, s_4_87, 84, 1, 0}, /* 88 */ { 6, s_4_88, 84, 1, 0}, /* 89 */ { 7, s_4_89, 84, 1, 0}, /* 90 */ { 4, s_4_90, -1, 1, 0}, /* 91 */ { 4, s_4_91, -1, 1, 0}, /* 92 */ { 5, s_4_92, -1, 1, 0}, /* 93 */ { 5, s_4_93, -1, 1, 0} }; static const symbol s_5_0[1] = { 'a' }; static const symbol s_5_1[1] = { 'e' }; static const symbol s_5_2[2] = { 'i', 'e' }; static const symbol s_5_3[1] = { 'i' }; static const symbol s_5_4[2] = { 0xC4, 0x83 }; static const struct among a_5[5] = { /* 0 */ { 1, s_5_0, -1, 1, 0}, /* 1 */ { 1, s_5_1, -1, 1, 0}, /* 2 */ { 2, s_5_2, 1, 1, 0}, /* 3 */ { 1, s_5_3, -1, 1, 0}, /* 4 */ { 2, s_5_4, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 32, 0, 0, 4 }; static const symbol s_0[] = { 'u' }; static const symbol s_1[] = { 'U' }; static const symbol s_2[] = { 'i' }; static const symbol s_3[] = { 'I' }; static const symbol s_4[] = { 'i' }; static const symbol s_5[] = { 'u' }; static const symbol s_6[] = { 'a' }; static const symbol s_7[] = { 'e' }; static const symbol s_8[] = { 'i' }; static const symbol s_9[] = { 'a', 'b' }; static const symbol s_10[] = { 'i' }; static const symbol s_11[] = { 'a', 't' }; static const symbol s_12[] = { 'a', 0xC5, 0xA3, 'i' }; static const symbol s_13[] = { 'a', 'b', 'i', 'l' }; static const symbol s_14[] = { 'i', 'b', 'i', 'l' }; static const symbol s_15[] = { 'i', 'v' }; static const symbol s_16[] = { 'i', 'c' }; static const symbol s_17[] = { 'a', 't' }; static const symbol s_18[] = { 'i', 't' }; static const symbol s_19[] = { 0xC5, 0xA3 }; static const symbol s_20[] = { 't' }; static const symbol s_21[] = { 'i', 's', 't' }; static const symbol s_22[] = { 'u' }; static int r_prelude(struct SN_env * z) { while(1) { /* repeat, line 32 */ int c1 = z->c; while(1) { /* goto, line 32 */ int c2 = z->c; if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab1; z->bra = z->c; /* [, line 33 */ { int c3 = z->c; /* or, line 33 */ if (!(eq_s(z, 1, s_0))) goto lab3; z->ket = z->c; /* ], line 33 */ if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab3; { int ret = slice_from_s(z, 1, s_1); /* <-, line 33 */ if (ret < 0) return ret; } goto lab2; lab3: z->c = c3; if (!(eq_s(z, 1, s_2))) goto lab1; z->ket = z->c; /* ], line 34 */ if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab1; { int ret = slice_from_s(z, 1, s_3); /* <-, line 34 */ if (ret < 0) return ret; } } lab2: z->c = c2; break; lab1: z->c = c2; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* goto, line 32 */ } } continue; lab0: z->c = c1; break; } return 1; } static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 44 */ { int c2 = z->c; /* or, line 46 */ if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab2; { int c3 = z->c; /* or, line 45 */ if (out_grouping_U(z, g_v, 97, 259, 0)) goto lab4; { /* gopast */ /* grouping v, line 45 */ int ret = out_grouping_U(z, g_v, 97, 259, 1); if (ret < 0) goto lab4; z->c += ret; } goto lab3; lab4: z->c = c3; if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab2; { /* gopast */ /* non v, line 45 */ int ret = in_grouping_U(z, g_v, 97, 259, 1); if (ret < 0) goto lab2; z->c += ret; } } lab3: goto lab1; lab2: z->c = c2; if (out_grouping_U(z, g_v, 97, 259, 0)) goto lab0; { int c4 = z->c; /* or, line 47 */ if (out_grouping_U(z, g_v, 97, 259, 0)) goto lab6; { /* gopast */ /* grouping v, line 47 */ int ret = out_grouping_U(z, g_v, 97, 259, 1); if (ret < 0) goto lab6; z->c += ret; } goto lab5; lab6: z->c = c4; if (in_grouping_U(z, g_v, 97, 259, 0)) goto lab0; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 47 */ } } lab5: ; } lab1: z->I[0] = z->c; /* setmark pV, line 48 */ lab0: z->c = c1; } { int c5 = z->c; /* do, line 50 */ { /* gopast */ /* grouping v, line 51 */ int ret = out_grouping_U(z, g_v, 97, 259, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 51 */ int ret = in_grouping_U(z, g_v, 97, 259, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 51 */ { /* gopast */ /* grouping v, line 52 */ int ret = out_grouping_U(z, g_v, 97, 259, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 52 */ int ret = in_grouping_U(z, g_v, 97, 259, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 52 */ lab7: z->c = c5; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 56 */ int c1 = z->c; z->bra = z->c; /* [, line 58 */ if (z->c >= z->l || (z->p[z->c + 0] != 73 && z->p[z->c + 0] != 85)) among_var = 3; else among_var = find_among(z, a_0, 3); /* substring, line 58 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 58 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_4); /* <-, line 59 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_5); /* <-, line 60 */ if (ret < 0) return ret; } break; case 3: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 61 */ } break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_step_0(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 73 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((266786 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_1, 16); /* substring, line 73 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 73 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 73 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 75 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_6); /* <-, line 77 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_7); /* <-, line 79 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_8); /* <-, line 81 */ if (ret < 0) return ret; } break; case 5: { int m1 = z->l - z->c; (void)m1; /* not, line 83 */ if (!(eq_s_b(z, 2, s_9))) goto lab0; return 0; lab0: z->c = z->l - m1; } { int ret = slice_from_s(z, 1, s_10); /* <-, line 83 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 2, s_11); /* <-, line 85 */ if (ret < 0) return ret; } break; case 7: { int ret = slice_from_s(z, 4, s_12); /* <-, line 87 */ if (ret < 0) return ret; } break; } return 1; } static int r_combo_suffix(struct SN_env * z) { int among_var; { int m_test = z->l - z->c; /* test, line 91 */ z->ket = z->c; /* [, line 92 */ among_var = find_among_b(z, a_2, 46); /* substring, line 92 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 92 */ { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 92 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 4, s_13); /* <-, line 101 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 4, s_14); /* <-, line 104 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 2, s_15); /* <-, line 107 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 2, s_16); /* <-, line 113 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 2, s_17); /* <-, line 118 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_from_s(z, 2, s_18); /* <-, line 122 */ if (ret < 0) return ret; } break; } z->B[0] = 1; /* set standard_suffix_removed, line 125 */ z->c = z->l - m_test; } return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->B[0] = 0; /* unset standard_suffix_removed, line 130 */ while(1) { /* repeat, line 131 */ int m1 = z->l - z->c; (void)m1; { int ret = r_combo_suffix(z); if (ret == 0) goto lab0; /* call combo_suffix, line 131 */ if (ret < 0) return ret; } continue; lab0: z->c = z->l - m1; break; } z->ket = z->c; /* [, line 132 */ among_var = find_among_b(z, a_3, 62); /* substring, line 132 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 132 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 132 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 149 */ if (ret < 0) return ret; } break; case 2: if (!(eq_s_b(z, 2, s_19))) return 0; z->bra = z->c; /* ], line 152 */ { int ret = slice_from_s(z, 1, s_20); /* <-, line 152 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 3, s_21); /* <-, line 156 */ if (ret < 0) return ret; } break; } z->B[0] = 1; /* set standard_suffix_removed, line 160 */ return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 164 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 164 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 165 */ among_var = find_among_b(z, a_4, 94); /* substring, line 165 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 165 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int m2 = z->l - z->c; (void)m2; /* or, line 200 */ if (out_grouping_b_U(z, g_v, 97, 259, 0)) goto lab1; goto lab0; lab1: z->c = z->l - m2; if (!(eq_s_b(z, 1, s_22))) { z->lb = mlimit; return 0; } } lab0: { int ret = slice_del(z); /* delete, line 200 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 214 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } static int r_vowel_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 219 */ among_var = find_among_b(z, a_5, 5); /* substring, line 219 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 219 */ { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 219 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 220 */ if (ret < 0) return ret; } break; } return 1; } extern int romanian_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 226 */ { int ret = r_prelude(z); if (ret == 0) goto lab0; /* call prelude, line 226 */ if (ret < 0) return ret; } lab0: z->c = c1; } { int c2 = z->c; /* do, line 227 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab1; /* call mark_regions, line 227 */ if (ret < 0) return ret; } lab1: z->c = c2; } z->lb = z->c; z->c = z->l; /* backwards, line 228 */ { int m3 = z->l - z->c; (void)m3; /* do, line 229 */ { int ret = r_step_0(z); if (ret == 0) goto lab2; /* call step_0, line 229 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 230 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab3; /* call standard_suffix, line 230 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } { int m5 = z->l - z->c; (void)m5; /* do, line 231 */ { int m6 = z->l - z->c; (void)m6; /* or, line 231 */ if (!(z->B[0])) goto lab6; /* Boolean test standard_suffix_removed, line 231 */ goto lab5; lab6: z->c = z->l - m6; { int ret = r_verb_suffix(z); if (ret == 0) goto lab4; /* call verb_suffix, line 231 */ if (ret < 0) return ret; } } lab5: lab4: z->c = z->l - m5; } { int m7 = z->l - z->c; (void)m7; /* do, line 232 */ { int ret = r_vowel_suffix(z); if (ret == 0) goto lab7; /* call vowel_suffix, line 232 */ if (ret < 0) return ret; } lab7: z->c = z->l - m7; } z->c = z->lb; { int c8 = z->c; /* do, line 234 */ { int ret = r_postlude(z); if (ret == 0) goto lab8; /* call postlude, line 234 */ if (ret < 0) return ret; } lab8: z->c = c8; } return 1; } extern struct SN_env * romanian_UTF_8_create_env(void) { return SN_create_env(0, 3, 1); } extern void romanian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_romanian.h000066400000000000000000000004711456444476200311120ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* romanian_UTF_8_create_env(void); extern void romanian_UTF_8_close_env(struct SN_env* z); extern int romanian_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_russian.c000066400000000000000000000615401456444476200307710ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int russian_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_tidy_up(struct SN_env * z); static int r_derivational(struct SN_env * z); static int r_noun(struct SN_env * z); static int r_verb(struct SN_env * z); static int r_reflexive(struct SN_env * z); static int r_adjectival(struct SN_env * z); static int r_adjective(struct SN_env * z); static int r_perfective_gerund(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * russian_UTF_8_create_env(void); extern void russian_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[10] = { 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C }; static const symbol s_0_1[12] = { 0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C }; static const symbol s_0_2[12] = { 0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8, 0xD1, 0x81, 0xD1, 0x8C }; static const symbol s_0_3[2] = { 0xD0, 0xB2 }; static const symbol s_0_4[4] = { 0xD1, 0x8B, 0xD0, 0xB2 }; static const symbol s_0_5[4] = { 0xD0, 0xB8, 0xD0, 0xB2 }; static const symbol s_0_6[6] = { 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8 }; static const symbol s_0_7[8] = { 0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8 }; static const symbol s_0_8[8] = { 0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88, 0xD0, 0xB8 }; static const struct among a_0[9] = { /* 0 */ { 10, s_0_0, -1, 1, 0}, /* 1 */ { 12, s_0_1, 0, 2, 0}, /* 2 */ { 12, s_0_2, 0, 2, 0}, /* 3 */ { 2, s_0_3, -1, 1, 0}, /* 4 */ { 4, s_0_4, 3, 2, 0}, /* 5 */ { 4, s_0_5, 3, 2, 0}, /* 6 */ { 6, s_0_6, -1, 1, 0}, /* 7 */ { 8, s_0_7, 6, 2, 0}, /* 8 */ { 8, s_0_8, 6, 2, 0} }; static const symbol s_1_0[6] = { 0xD0, 0xB5, 0xD0, 0xBC, 0xD1, 0x83 }; static const symbol s_1_1[6] = { 0xD0, 0xBE, 0xD0, 0xBC, 0xD1, 0x83 }; static const symbol s_1_2[4] = { 0xD1, 0x8B, 0xD1, 0x85 }; static const symbol s_1_3[4] = { 0xD0, 0xB8, 0xD1, 0x85 }; static const symbol s_1_4[4] = { 0xD1, 0x83, 0xD1, 0x8E }; static const symbol s_1_5[4] = { 0xD1, 0x8E, 0xD1, 0x8E }; static const symbol s_1_6[4] = { 0xD0, 0xB5, 0xD1, 0x8E }; static const symbol s_1_7[4] = { 0xD0, 0xBE, 0xD1, 0x8E }; static const symbol s_1_8[4] = { 0xD1, 0x8F, 0xD1, 0x8F }; static const symbol s_1_9[4] = { 0xD0, 0xB0, 0xD1, 0x8F }; static const symbol s_1_10[4] = { 0xD1, 0x8B, 0xD0, 0xB5 }; static const symbol s_1_11[4] = { 0xD0, 0xB5, 0xD0, 0xB5 }; static const symbol s_1_12[4] = { 0xD0, 0xB8, 0xD0, 0xB5 }; static const symbol s_1_13[4] = { 0xD0, 0xBE, 0xD0, 0xB5 }; static const symbol s_1_14[6] = { 0xD1, 0x8B, 0xD0, 0xBC, 0xD0, 0xB8 }; static const symbol s_1_15[6] = { 0xD0, 0xB8, 0xD0, 0xBC, 0xD0, 0xB8 }; static const symbol s_1_16[4] = { 0xD1, 0x8B, 0xD0, 0xB9 }; static const symbol s_1_17[4] = { 0xD0, 0xB5, 0xD0, 0xB9 }; static const symbol s_1_18[4] = { 0xD0, 0xB8, 0xD0, 0xB9 }; static const symbol s_1_19[4] = { 0xD0, 0xBE, 0xD0, 0xB9 }; static const symbol s_1_20[4] = { 0xD1, 0x8B, 0xD0, 0xBC }; static const symbol s_1_21[4] = { 0xD0, 0xB5, 0xD0, 0xBC }; static const symbol s_1_22[4] = { 0xD0, 0xB8, 0xD0, 0xBC }; static const symbol s_1_23[4] = { 0xD0, 0xBE, 0xD0, 0xBC }; static const symbol s_1_24[6] = { 0xD0, 0xB5, 0xD0, 0xB3, 0xD0, 0xBE }; static const symbol s_1_25[6] = { 0xD0, 0xBE, 0xD0, 0xB3, 0xD0, 0xBE }; static const struct among a_1[26] = { /* 0 */ { 6, s_1_0, -1, 1, 0}, /* 1 */ { 6, s_1_1, -1, 1, 0}, /* 2 */ { 4, s_1_2, -1, 1, 0}, /* 3 */ { 4, s_1_3, -1, 1, 0}, /* 4 */ { 4, s_1_4, -1, 1, 0}, /* 5 */ { 4, s_1_5, -1, 1, 0}, /* 6 */ { 4, s_1_6, -1, 1, 0}, /* 7 */ { 4, s_1_7, -1, 1, 0}, /* 8 */ { 4, s_1_8, -1, 1, 0}, /* 9 */ { 4, s_1_9, -1, 1, 0}, /* 10 */ { 4, s_1_10, -1, 1, 0}, /* 11 */ { 4, s_1_11, -1, 1, 0}, /* 12 */ { 4, s_1_12, -1, 1, 0}, /* 13 */ { 4, s_1_13, -1, 1, 0}, /* 14 */ { 6, s_1_14, -1, 1, 0}, /* 15 */ { 6, s_1_15, -1, 1, 0}, /* 16 */ { 4, s_1_16, -1, 1, 0}, /* 17 */ { 4, s_1_17, -1, 1, 0}, /* 18 */ { 4, s_1_18, -1, 1, 0}, /* 19 */ { 4, s_1_19, -1, 1, 0}, /* 20 */ { 4, s_1_20, -1, 1, 0}, /* 21 */ { 4, s_1_21, -1, 1, 0}, /* 22 */ { 4, s_1_22, -1, 1, 0}, /* 23 */ { 4, s_1_23, -1, 1, 0}, /* 24 */ { 6, s_1_24, -1, 1, 0}, /* 25 */ { 6, s_1_25, -1, 1, 0} }; static const symbol s_2_0[4] = { 0xD0, 0xB2, 0xD1, 0x88 }; static const symbol s_2_1[6] = { 0xD1, 0x8B, 0xD0, 0xB2, 0xD1, 0x88 }; static const symbol s_2_2[6] = { 0xD0, 0xB8, 0xD0, 0xB2, 0xD1, 0x88 }; static const symbol s_2_3[2] = { 0xD1, 0x89 }; static const symbol s_2_4[4] = { 0xD1, 0x8E, 0xD1, 0x89 }; static const symbol s_2_5[6] = { 0xD1, 0x83, 0xD1, 0x8E, 0xD1, 0x89 }; static const symbol s_2_6[4] = { 0xD0, 0xB5, 0xD0, 0xBC }; static const symbol s_2_7[4] = { 0xD0, 0xBD, 0xD0, 0xBD }; static const struct among a_2[8] = { /* 0 */ { 4, s_2_0, -1, 1, 0}, /* 1 */ { 6, s_2_1, 0, 2, 0}, /* 2 */ { 6, s_2_2, 0, 2, 0}, /* 3 */ { 2, s_2_3, -1, 1, 0}, /* 4 */ { 4, s_2_4, 3, 1, 0}, /* 5 */ { 6, s_2_5, 4, 2, 0}, /* 6 */ { 4, s_2_6, -1, 1, 0}, /* 7 */ { 4, s_2_7, -1, 1, 0} }; static const symbol s_3_0[4] = { 0xD1, 0x81, 0xD1, 0x8C }; static const symbol s_3_1[4] = { 0xD1, 0x81, 0xD1, 0x8F }; static const struct among a_3[2] = { /* 0 */ { 4, s_3_0, -1, 1, 0}, /* 1 */ { 4, s_3_1, -1, 1, 0} }; static const symbol s_4_0[4] = { 0xD1, 0x8B, 0xD1, 0x82 }; static const symbol s_4_1[4] = { 0xD1, 0x8E, 0xD1, 0x82 }; static const symbol s_4_2[6] = { 0xD1, 0x83, 0xD1, 0x8E, 0xD1, 0x82 }; static const symbol s_4_3[4] = { 0xD1, 0x8F, 0xD1, 0x82 }; static const symbol s_4_4[4] = { 0xD0, 0xB5, 0xD1, 0x82 }; static const symbol s_4_5[6] = { 0xD1, 0x83, 0xD0, 0xB5, 0xD1, 0x82 }; static const symbol s_4_6[4] = { 0xD0, 0xB8, 0xD1, 0x82 }; static const symbol s_4_7[4] = { 0xD0, 0xBD, 0xD1, 0x8B }; static const symbol s_4_8[6] = { 0xD0, 0xB5, 0xD0, 0xBD, 0xD1, 0x8B }; static const symbol s_4_9[4] = { 0xD1, 0x82, 0xD1, 0x8C }; static const symbol s_4_10[6] = { 0xD1, 0x8B, 0xD1, 0x82, 0xD1, 0x8C }; static const symbol s_4_11[6] = { 0xD0, 0xB8, 0xD1, 0x82, 0xD1, 0x8C }; static const symbol s_4_12[6] = { 0xD0, 0xB5, 0xD1, 0x88, 0xD1, 0x8C }; static const symbol s_4_13[6] = { 0xD0, 0xB8, 0xD1, 0x88, 0xD1, 0x8C }; static const symbol s_4_14[2] = { 0xD1, 0x8E }; static const symbol s_4_15[4] = { 0xD1, 0x83, 0xD1, 0x8E }; static const symbol s_4_16[4] = { 0xD0, 0xBB, 0xD0, 0xB0 }; static const symbol s_4_17[6] = { 0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xB0 }; static const symbol s_4_18[6] = { 0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xB0 }; static const symbol s_4_19[4] = { 0xD0, 0xBD, 0xD0, 0xB0 }; static const symbol s_4_20[6] = { 0xD0, 0xB5, 0xD0, 0xBD, 0xD0, 0xB0 }; static const symbol s_4_21[6] = { 0xD0, 0xB5, 0xD1, 0x82, 0xD0, 0xB5 }; static const symbol s_4_22[6] = { 0xD0, 0xB8, 0xD1, 0x82, 0xD0, 0xB5 }; static const symbol s_4_23[6] = { 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5 }; static const symbol s_4_24[8] = { 0xD1, 0x83, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5 }; static const symbol s_4_25[8] = { 0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x82, 0xD0, 0xB5 }; static const symbol s_4_26[4] = { 0xD0, 0xBB, 0xD0, 0xB8 }; static const symbol s_4_27[6] = { 0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xB8 }; static const symbol s_4_28[6] = { 0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xB8 }; static const symbol s_4_29[2] = { 0xD0, 0xB9 }; static const symbol s_4_30[4] = { 0xD1, 0x83, 0xD0, 0xB9 }; static const symbol s_4_31[4] = { 0xD0, 0xB5, 0xD0, 0xB9 }; static const symbol s_4_32[2] = { 0xD0, 0xBB }; static const symbol s_4_33[4] = { 0xD1, 0x8B, 0xD0, 0xBB }; static const symbol s_4_34[4] = { 0xD0, 0xB8, 0xD0, 0xBB }; static const symbol s_4_35[4] = { 0xD1, 0x8B, 0xD0, 0xBC }; static const symbol s_4_36[4] = { 0xD0, 0xB5, 0xD0, 0xBC }; static const symbol s_4_37[4] = { 0xD0, 0xB8, 0xD0, 0xBC }; static const symbol s_4_38[2] = { 0xD0, 0xBD }; static const symbol s_4_39[4] = { 0xD0, 0xB5, 0xD0, 0xBD }; static const symbol s_4_40[4] = { 0xD0, 0xBB, 0xD0, 0xBE }; static const symbol s_4_41[6] = { 0xD1, 0x8B, 0xD0, 0xBB, 0xD0, 0xBE }; static const symbol s_4_42[6] = { 0xD0, 0xB8, 0xD0, 0xBB, 0xD0, 0xBE }; static const symbol s_4_43[4] = { 0xD0, 0xBD, 0xD0, 0xBE }; static const symbol s_4_44[6] = { 0xD0, 0xB5, 0xD0, 0xBD, 0xD0, 0xBE }; static const symbol s_4_45[6] = { 0xD0, 0xBD, 0xD0, 0xBD, 0xD0, 0xBE }; static const struct among a_4[46] = { /* 0 */ { 4, s_4_0, -1, 2, 0}, /* 1 */ { 4, s_4_1, -1, 1, 0}, /* 2 */ { 6, s_4_2, 1, 2, 0}, /* 3 */ { 4, s_4_3, -1, 2, 0}, /* 4 */ { 4, s_4_4, -1, 1, 0}, /* 5 */ { 6, s_4_5, 4, 2, 0}, /* 6 */ { 4, s_4_6, -1, 2, 0}, /* 7 */ { 4, s_4_7, -1, 1, 0}, /* 8 */ { 6, s_4_8, 7, 2, 0}, /* 9 */ { 4, s_4_9, -1, 1, 0}, /* 10 */ { 6, s_4_10, 9, 2, 0}, /* 11 */ { 6, s_4_11, 9, 2, 0}, /* 12 */ { 6, s_4_12, -1, 1, 0}, /* 13 */ { 6, s_4_13, -1, 2, 0}, /* 14 */ { 2, s_4_14, -1, 2, 0}, /* 15 */ { 4, s_4_15, 14, 2, 0}, /* 16 */ { 4, s_4_16, -1, 1, 0}, /* 17 */ { 6, s_4_17, 16, 2, 0}, /* 18 */ { 6, s_4_18, 16, 2, 0}, /* 19 */ { 4, s_4_19, -1, 1, 0}, /* 20 */ { 6, s_4_20, 19, 2, 0}, /* 21 */ { 6, s_4_21, -1, 1, 0}, /* 22 */ { 6, s_4_22, -1, 2, 0}, /* 23 */ { 6, s_4_23, -1, 1, 0}, /* 24 */ { 8, s_4_24, 23, 2, 0}, /* 25 */ { 8, s_4_25, 23, 2, 0}, /* 26 */ { 4, s_4_26, -1, 1, 0}, /* 27 */ { 6, s_4_27, 26, 2, 0}, /* 28 */ { 6, s_4_28, 26, 2, 0}, /* 29 */ { 2, s_4_29, -1, 1, 0}, /* 30 */ { 4, s_4_30, 29, 2, 0}, /* 31 */ { 4, s_4_31, 29, 2, 0}, /* 32 */ { 2, s_4_32, -1, 1, 0}, /* 33 */ { 4, s_4_33, 32, 2, 0}, /* 34 */ { 4, s_4_34, 32, 2, 0}, /* 35 */ { 4, s_4_35, -1, 2, 0}, /* 36 */ { 4, s_4_36, -1, 1, 0}, /* 37 */ { 4, s_4_37, -1, 2, 0}, /* 38 */ { 2, s_4_38, -1, 1, 0}, /* 39 */ { 4, s_4_39, 38, 2, 0}, /* 40 */ { 4, s_4_40, -1, 1, 0}, /* 41 */ { 6, s_4_41, 40, 2, 0}, /* 42 */ { 6, s_4_42, 40, 2, 0}, /* 43 */ { 4, s_4_43, -1, 1, 0}, /* 44 */ { 6, s_4_44, 43, 2, 0}, /* 45 */ { 6, s_4_45, 43, 1, 0} }; static const symbol s_5_0[2] = { 0xD1, 0x83 }; static const symbol s_5_1[4] = { 0xD1, 0x8F, 0xD1, 0x85 }; static const symbol s_5_2[6] = { 0xD0, 0xB8, 0xD1, 0x8F, 0xD1, 0x85 }; static const symbol s_5_3[4] = { 0xD0, 0xB0, 0xD1, 0x85 }; static const symbol s_5_4[2] = { 0xD1, 0x8B }; static const symbol s_5_5[2] = { 0xD1, 0x8C }; static const symbol s_5_6[2] = { 0xD1, 0x8E }; static const symbol s_5_7[4] = { 0xD1, 0x8C, 0xD1, 0x8E }; static const symbol s_5_8[4] = { 0xD0, 0xB8, 0xD1, 0x8E }; static const symbol s_5_9[2] = { 0xD1, 0x8F }; static const symbol s_5_10[4] = { 0xD1, 0x8C, 0xD1, 0x8F }; static const symbol s_5_11[4] = { 0xD0, 0xB8, 0xD1, 0x8F }; static const symbol s_5_12[2] = { 0xD0, 0xB0 }; static const symbol s_5_13[4] = { 0xD0, 0xB5, 0xD0, 0xB2 }; static const symbol s_5_14[4] = { 0xD0, 0xBE, 0xD0, 0xB2 }; static const symbol s_5_15[2] = { 0xD0, 0xB5 }; static const symbol s_5_16[4] = { 0xD1, 0x8C, 0xD0, 0xB5 }; static const symbol s_5_17[4] = { 0xD0, 0xB8, 0xD0, 0xB5 }; static const symbol s_5_18[2] = { 0xD0, 0xB8 }; static const symbol s_5_19[4] = { 0xD0, 0xB5, 0xD0, 0xB8 }; static const symbol s_5_20[4] = { 0xD0, 0xB8, 0xD0, 0xB8 }; static const symbol s_5_21[6] = { 0xD1, 0x8F, 0xD0, 0xBC, 0xD0, 0xB8 }; static const symbol s_5_22[8] = { 0xD0, 0xB8, 0xD1, 0x8F, 0xD0, 0xBC, 0xD0, 0xB8 }; static const symbol s_5_23[6] = { 0xD0, 0xB0, 0xD0, 0xBC, 0xD0, 0xB8 }; static const symbol s_5_24[2] = { 0xD0, 0xB9 }; static const symbol s_5_25[4] = { 0xD0, 0xB5, 0xD0, 0xB9 }; static const symbol s_5_26[6] = { 0xD0, 0xB8, 0xD0, 0xB5, 0xD0, 0xB9 }; static const symbol s_5_27[4] = { 0xD0, 0xB8, 0xD0, 0xB9 }; static const symbol s_5_28[4] = { 0xD0, 0xBE, 0xD0, 0xB9 }; static const symbol s_5_29[4] = { 0xD1, 0x8F, 0xD0, 0xBC }; static const symbol s_5_30[6] = { 0xD0, 0xB8, 0xD1, 0x8F, 0xD0, 0xBC }; static const symbol s_5_31[4] = { 0xD0, 0xB0, 0xD0, 0xBC }; static const symbol s_5_32[4] = { 0xD0, 0xB5, 0xD0, 0xBC }; static const symbol s_5_33[6] = { 0xD0, 0xB8, 0xD0, 0xB5, 0xD0, 0xBC }; static const symbol s_5_34[4] = { 0xD0, 0xBE, 0xD0, 0xBC }; static const symbol s_5_35[2] = { 0xD0, 0xBE }; static const struct among a_5[36] = { /* 0 */ { 2, s_5_0, -1, 1, 0}, /* 1 */ { 4, s_5_1, -1, 1, 0}, /* 2 */ { 6, s_5_2, 1, 1, 0}, /* 3 */ { 4, s_5_3, -1, 1, 0}, /* 4 */ { 2, s_5_4, -1, 1, 0}, /* 5 */ { 2, s_5_5, -1, 1, 0}, /* 6 */ { 2, s_5_6, -1, 1, 0}, /* 7 */ { 4, s_5_7, 6, 1, 0}, /* 8 */ { 4, s_5_8, 6, 1, 0}, /* 9 */ { 2, s_5_9, -1, 1, 0}, /* 10 */ { 4, s_5_10, 9, 1, 0}, /* 11 */ { 4, s_5_11, 9, 1, 0}, /* 12 */ { 2, s_5_12, -1, 1, 0}, /* 13 */ { 4, s_5_13, -1, 1, 0}, /* 14 */ { 4, s_5_14, -1, 1, 0}, /* 15 */ { 2, s_5_15, -1, 1, 0}, /* 16 */ { 4, s_5_16, 15, 1, 0}, /* 17 */ { 4, s_5_17, 15, 1, 0}, /* 18 */ { 2, s_5_18, -1, 1, 0}, /* 19 */ { 4, s_5_19, 18, 1, 0}, /* 20 */ { 4, s_5_20, 18, 1, 0}, /* 21 */ { 6, s_5_21, 18, 1, 0}, /* 22 */ { 8, s_5_22, 21, 1, 0}, /* 23 */ { 6, s_5_23, 18, 1, 0}, /* 24 */ { 2, s_5_24, -1, 1, 0}, /* 25 */ { 4, s_5_25, 24, 1, 0}, /* 26 */ { 6, s_5_26, 25, 1, 0}, /* 27 */ { 4, s_5_27, 24, 1, 0}, /* 28 */ { 4, s_5_28, 24, 1, 0}, /* 29 */ { 4, s_5_29, -1, 1, 0}, /* 30 */ { 6, s_5_30, 29, 1, 0}, /* 31 */ { 4, s_5_31, -1, 1, 0}, /* 32 */ { 4, s_5_32, -1, 1, 0}, /* 33 */ { 6, s_5_33, 32, 1, 0}, /* 34 */ { 4, s_5_34, -1, 1, 0}, /* 35 */ { 2, s_5_35, -1, 1, 0} }; static const symbol s_6_0[6] = { 0xD0, 0xBE, 0xD1, 0x81, 0xD1, 0x82 }; static const symbol s_6_1[8] = { 0xD0, 0xBE, 0xD1, 0x81, 0xD1, 0x82, 0xD1, 0x8C }; static const struct among a_6[2] = { /* 0 */ { 6, s_6_0, -1, 1, 0}, /* 1 */ { 8, s_6_1, -1, 1, 0} }; static const symbol s_7_0[6] = { 0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x88 }; static const symbol s_7_1[2] = { 0xD1, 0x8C }; static const symbol s_7_2[8] = { 0xD0, 0xB5, 0xD0, 0xB9, 0xD1, 0x88, 0xD0, 0xB5 }; static const symbol s_7_3[2] = { 0xD0, 0xBD }; static const struct among a_7[4] = { /* 0 */ { 6, s_7_0, -1, 1, 0}, /* 1 */ { 2, s_7_1, -1, 3, 0}, /* 2 */ { 8, s_7_2, -1, 1, 0}, /* 3 */ { 2, s_7_3, -1, 2, 0} }; static const unsigned char g_v[] = { 33, 65, 8, 232 }; static const symbol s_0[] = { 0xD0, 0xB0 }; static const symbol s_1[] = { 0xD1, 0x8F }; static const symbol s_2[] = { 0xD0, 0xB0 }; static const symbol s_3[] = { 0xD1, 0x8F }; static const symbol s_4[] = { 0xD0, 0xB0 }; static const symbol s_5[] = { 0xD1, 0x8F }; static const symbol s_6[] = { 0xD0, 0xBD }; static const symbol s_7[] = { 0xD0, 0xBD }; static const symbol s_8[] = { 0xD0, 0xBD }; static const symbol s_9[] = { 0xD0, 0xB8 }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; { int c1 = z->c; /* do, line 61 */ { /* gopast */ /* grouping v, line 62 */ int ret = out_grouping_U(z, g_v, 1072, 1103, 1); if (ret < 0) goto lab0; z->c += ret; } z->I[0] = z->c; /* setmark pV, line 62 */ { /* gopast */ /* non v, line 62 */ int ret = in_grouping_U(z, g_v, 1072, 1103, 1); if (ret < 0) goto lab0; z->c += ret; } { /* gopast */ /* grouping v, line 63 */ int ret = out_grouping_U(z, g_v, 1072, 1103, 1); if (ret < 0) goto lab0; z->c += ret; } { /* gopast */ /* non v, line 63 */ int ret = in_grouping_U(z, g_v, 1072, 1103, 1); if (ret < 0) goto lab0; z->c += ret; } z->I[1] = z->c; /* setmark p2, line 63 */ lab0: z->c = c1; } return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_perfective_gerund(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 72 */ among_var = find_among_b(z, a_0, 9); /* substring, line 72 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 72 */ switch(among_var) { case 0: return 0; case 1: { int m1 = z->l - z->c; (void)m1; /* or, line 76 */ if (!(eq_s_b(z, 2, s_0))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 2, s_1))) return 0; } lab0: { int ret = slice_del(z); /* delete, line 76 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 83 */ if (ret < 0) return ret; } break; } return 1; } static int r_adjective(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 88 */ among_var = find_among_b(z, a_1, 26); /* substring, line 88 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 88 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 97 */ if (ret < 0) return ret; } break; } return 1; } static int r_adjectival(struct SN_env * z) { int among_var; { int ret = r_adjective(z); if (ret == 0) return 0; /* call adjective, line 102 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 109 */ z->ket = z->c; /* [, line 110 */ among_var = find_among_b(z, a_2, 8); /* substring, line 110 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 110 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab0; } case 1: { int m1 = z->l - z->c; (void)m1; /* or, line 115 */ if (!(eq_s_b(z, 2, s_2))) goto lab2; goto lab1; lab2: z->c = z->l - m1; if (!(eq_s_b(z, 2, s_3))) { z->c = z->l - m_keep; goto lab0; } } lab1: { int ret = slice_del(z); /* delete, line 115 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 122 */ if (ret < 0) return ret; } break; } lab0: ; } return 1; } static int r_reflexive(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 129 */ if (z->c - 3 <= z->lb || (z->p[z->c - 1] != 140 && z->p[z->c - 1] != 143)) return 0; among_var = find_among_b(z, a_3, 2); /* substring, line 129 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 129 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 132 */ if (ret < 0) return ret; } break; } return 1; } static int r_verb(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 137 */ among_var = find_among_b(z, a_4, 46); /* substring, line 137 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 137 */ switch(among_var) { case 0: return 0; case 1: { int m1 = z->l - z->c; (void)m1; /* or, line 143 */ if (!(eq_s_b(z, 2, s_4))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 2, s_5))) return 0; } lab0: { int ret = slice_del(z); /* delete, line 143 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 151 */ if (ret < 0) return ret; } break; } return 1; } static int r_noun(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 160 */ among_var = find_among_b(z, a_5, 36); /* substring, line 160 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 160 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 167 */ if (ret < 0) return ret; } break; } return 1; } static int r_derivational(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 176 */ if (z->c - 5 <= z->lb || (z->p[z->c - 1] != 130 && z->p[z->c - 1] != 140)) return 0; among_var = find_among_b(z, a_6, 2); /* substring, line 176 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 176 */ { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 176 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 179 */ if (ret < 0) return ret; } break; } return 1; } static int r_tidy_up(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 184 */ among_var = find_among_b(z, a_7, 4); /* substring, line 184 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 184 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 188 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 189 */ if (!(eq_s_b(z, 2, s_6))) return 0; z->bra = z->c; /* ], line 189 */ if (!(eq_s_b(z, 2, s_7))) return 0; { int ret = slice_del(z); /* delete, line 189 */ if (ret < 0) return ret; } break; case 2: if (!(eq_s_b(z, 2, s_8))) return 0; { int ret = slice_del(z); /* delete, line 192 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_del(z); /* delete, line 194 */ if (ret < 0) return ret; } break; } return 1; } extern int russian_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 201 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 201 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 202 */ { int mlimit; /* setlimit, line 202 */ int m2 = z->l - z->c; (void)m2; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 202 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m2; { int m3 = z->l - z->c; (void)m3; /* do, line 203 */ { int m4 = z->l - z->c; (void)m4; /* or, line 204 */ { int ret = r_perfective_gerund(z); if (ret == 0) goto lab3; /* call perfective_gerund, line 204 */ if (ret < 0) return ret; } goto lab2; lab3: z->c = z->l - m4; { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 205 */ { int ret = r_reflexive(z); if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call reflexive, line 205 */ if (ret < 0) return ret; } lab4: ; } { int m5 = z->l - z->c; (void)m5; /* or, line 206 */ { int ret = r_adjectival(z); if (ret == 0) goto lab6; /* call adjectival, line 206 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m5; { int ret = r_verb(z); if (ret == 0) goto lab7; /* call verb, line 206 */ if (ret < 0) return ret; } goto lab5; lab7: z->c = z->l - m5; { int ret = r_noun(z); if (ret == 0) goto lab1; /* call noun, line 206 */ if (ret < 0) return ret; } } lab5: ; } lab2: lab1: z->c = z->l - m3; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 209 */ z->ket = z->c; /* [, line 209 */ if (!(eq_s_b(z, 2, s_9))) { z->c = z->l - m_keep; goto lab8; } z->bra = z->c; /* ], line 209 */ { int ret = slice_del(z); /* delete, line 209 */ if (ret < 0) return ret; } lab8: ; } { int m6 = z->l - z->c; (void)m6; /* do, line 212 */ { int ret = r_derivational(z); if (ret == 0) goto lab9; /* call derivational, line 212 */ if (ret < 0) return ret; } lab9: z->c = z->l - m6; } { int m7 = z->l - z->c; (void)m7; /* do, line 213 */ { int ret = r_tidy_up(z); if (ret == 0) goto lab10; /* call tidy_up, line 213 */ if (ret < 0) return ret; } lab10: z->c = z->l - m7; } z->lb = mlimit; } z->c = z->lb; return 1; } extern struct SN_env * russian_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); } extern void russian_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_russian.h000066400000000000000000000004661456444476200307760ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* russian_UTF_8_create_env(void); extern void russian_UTF_8_close_env(struct SN_env* z); extern int russian_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_spanish.c000066400000000000000000001212241456444476200307460ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int spanish_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_residual_suffix(struct SN_env * z); static int r_verb_suffix(struct SN_env * z); static int r_y_verb_suffix(struct SN_env * z); static int r_standard_suffix(struct SN_env * z); static int r_attached_pronoun(struct SN_env * z); static int r_R2(struct SN_env * z); static int r_R1(struct SN_env * z); static int r_RV(struct SN_env * z); static int r_mark_regions(struct SN_env * z); static int r_postlude(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * spanish_UTF_8_create_env(void); extern void spanish_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_1[2] = { 0xC3, 0xA1 }; static const symbol s_0_2[2] = { 0xC3, 0xA9 }; static const symbol s_0_3[2] = { 0xC3, 0xAD }; static const symbol s_0_4[2] = { 0xC3, 0xB3 }; static const symbol s_0_5[2] = { 0xC3, 0xBA }; static const struct among a_0[6] = { /* 0 */ { 0, 0, -1, 6, 0}, /* 1 */ { 2, s_0_1, 0, 1, 0}, /* 2 */ { 2, s_0_2, 0, 2, 0}, /* 3 */ { 2, s_0_3, 0, 3, 0}, /* 4 */ { 2, s_0_4, 0, 4, 0}, /* 5 */ { 2, s_0_5, 0, 5, 0} }; static const symbol s_1_0[2] = { 'l', 'a' }; static const symbol s_1_1[4] = { 's', 'e', 'l', 'a' }; static const symbol s_1_2[2] = { 'l', 'e' }; static const symbol s_1_3[2] = { 'm', 'e' }; static const symbol s_1_4[2] = { 's', 'e' }; static const symbol s_1_5[2] = { 'l', 'o' }; static const symbol s_1_6[4] = { 's', 'e', 'l', 'o' }; static const symbol s_1_7[3] = { 'l', 'a', 's' }; static const symbol s_1_8[5] = { 's', 'e', 'l', 'a', 's' }; static const symbol s_1_9[3] = { 'l', 'e', 's' }; static const symbol s_1_10[3] = { 'l', 'o', 's' }; static const symbol s_1_11[5] = { 's', 'e', 'l', 'o', 's' }; static const symbol s_1_12[3] = { 'n', 'o', 's' }; static const struct among a_1[13] = { /* 0 */ { 2, s_1_0, -1, -1, 0}, /* 1 */ { 4, s_1_1, 0, -1, 0}, /* 2 */ { 2, s_1_2, -1, -1, 0}, /* 3 */ { 2, s_1_3, -1, -1, 0}, /* 4 */ { 2, s_1_4, -1, -1, 0}, /* 5 */ { 2, s_1_5, -1, -1, 0}, /* 6 */ { 4, s_1_6, 5, -1, 0}, /* 7 */ { 3, s_1_7, -1, -1, 0}, /* 8 */ { 5, s_1_8, 7, -1, 0}, /* 9 */ { 3, s_1_9, -1, -1, 0}, /* 10 */ { 3, s_1_10, -1, -1, 0}, /* 11 */ { 5, s_1_11, 10, -1, 0}, /* 12 */ { 3, s_1_12, -1, -1, 0} }; static const symbol s_2_0[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_2_1[5] = { 'i', 'e', 'n', 'd', 'o' }; static const symbol s_2_2[5] = { 'y', 'e', 'n', 'd', 'o' }; static const symbol s_2_3[5] = { 0xC3, 0xA1, 'n', 'd', 'o' }; static const symbol s_2_4[6] = { 'i', 0xC3, 0xA9, 'n', 'd', 'o' }; static const symbol s_2_5[2] = { 'a', 'r' }; static const symbol s_2_6[2] = { 'e', 'r' }; static const symbol s_2_7[2] = { 'i', 'r' }; static const symbol s_2_8[3] = { 0xC3, 0xA1, 'r' }; static const symbol s_2_9[3] = { 0xC3, 0xA9, 'r' }; static const symbol s_2_10[3] = { 0xC3, 0xAD, 'r' }; static const struct among a_2[11] = { /* 0 */ { 4, s_2_0, -1, 6, 0}, /* 1 */ { 5, s_2_1, -1, 6, 0}, /* 2 */ { 5, s_2_2, -1, 7, 0}, /* 3 */ { 5, s_2_3, -1, 2, 0}, /* 4 */ { 6, s_2_4, -1, 1, 0}, /* 5 */ { 2, s_2_5, -1, 6, 0}, /* 6 */ { 2, s_2_6, -1, 6, 0}, /* 7 */ { 2, s_2_7, -1, 6, 0}, /* 8 */ { 3, s_2_8, -1, 3, 0}, /* 9 */ { 3, s_2_9, -1, 4, 0}, /* 10 */ { 3, s_2_10, -1, 5, 0} }; static const symbol s_3_0[2] = { 'i', 'c' }; static const symbol s_3_1[2] = { 'a', 'd' }; static const symbol s_3_2[2] = { 'o', 's' }; static const symbol s_3_3[2] = { 'i', 'v' }; static const struct among a_3[4] = { /* 0 */ { 2, s_3_0, -1, -1, 0}, /* 1 */ { 2, s_3_1, -1, -1, 0}, /* 2 */ { 2, s_3_2, -1, -1, 0}, /* 3 */ { 2, s_3_3, -1, 1, 0} }; static const symbol s_4_0[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_4_1[4] = { 'i', 'b', 'l', 'e' }; static const symbol s_4_2[4] = { 'a', 'n', 't', 'e' }; static const struct among a_4[3] = { /* 0 */ { 4, s_4_0, -1, 1, 0}, /* 1 */ { 4, s_4_1, -1, 1, 0}, /* 2 */ { 4, s_4_2, -1, 1, 0} }; static const symbol s_5_0[2] = { 'i', 'c' }; static const symbol s_5_1[4] = { 'a', 'b', 'i', 'l' }; static const symbol s_5_2[2] = { 'i', 'v' }; static const struct among a_5[3] = { /* 0 */ { 2, s_5_0, -1, 1, 0}, /* 1 */ { 4, s_5_1, -1, 1, 0}, /* 2 */ { 2, s_5_2, -1, 1, 0} }; static const symbol s_6_0[3] = { 'i', 'c', 'a' }; static const symbol s_6_1[5] = { 'a', 'n', 'c', 'i', 'a' }; static const symbol s_6_2[5] = { 'e', 'n', 'c', 'i', 'a' }; static const symbol s_6_3[5] = { 'a', 'd', 'o', 'r', 'a' }; static const symbol s_6_4[3] = { 'o', 's', 'a' }; static const symbol s_6_5[4] = { 'i', 's', 't', 'a' }; static const symbol s_6_6[3] = { 'i', 'v', 'a' }; static const symbol s_6_7[4] = { 'a', 'n', 'z', 'a' }; static const symbol s_6_8[6] = { 'l', 'o', 'g', 0xC3, 0xAD, 'a' }; static const symbol s_6_9[4] = { 'i', 'd', 'a', 'd' }; static const symbol s_6_10[4] = { 'a', 'b', 'l', 'e' }; static const symbol s_6_11[4] = { 'i', 'b', 'l', 'e' }; static const symbol s_6_12[4] = { 'a', 'n', 't', 'e' }; static const symbol s_6_13[5] = { 'm', 'e', 'n', 't', 'e' }; static const symbol s_6_14[6] = { 'a', 'm', 'e', 'n', 't', 'e' }; static const symbol s_6_15[6] = { 'a', 'c', 'i', 0xC3, 0xB3, 'n' }; static const symbol s_6_16[6] = { 'u', 'c', 'i', 0xC3, 0xB3, 'n' }; static const symbol s_6_17[3] = { 'i', 'c', 'o' }; static const symbol s_6_18[4] = { 'i', 's', 'm', 'o' }; static const symbol s_6_19[3] = { 'o', 's', 'o' }; static const symbol s_6_20[7] = { 'a', 'm', 'i', 'e', 'n', 't', 'o' }; static const symbol s_6_21[7] = { 'i', 'm', 'i', 'e', 'n', 't', 'o' }; static const symbol s_6_22[3] = { 'i', 'v', 'o' }; static const symbol s_6_23[4] = { 'a', 'd', 'o', 'r' }; static const symbol s_6_24[4] = { 'i', 'c', 'a', 's' }; static const symbol s_6_25[6] = { 'a', 'n', 'c', 'i', 'a', 's' }; static const symbol s_6_26[6] = { 'e', 'n', 'c', 'i', 'a', 's' }; static const symbol s_6_27[6] = { 'a', 'd', 'o', 'r', 'a', 's' }; static const symbol s_6_28[4] = { 'o', 's', 'a', 's' }; static const symbol s_6_29[5] = { 'i', 's', 't', 'a', 's' }; static const symbol s_6_30[4] = { 'i', 'v', 'a', 's' }; static const symbol s_6_31[5] = { 'a', 'n', 'z', 'a', 's' }; static const symbol s_6_32[7] = { 'l', 'o', 'g', 0xC3, 0xAD, 'a', 's' }; static const symbol s_6_33[6] = { 'i', 'd', 'a', 'd', 'e', 's' }; static const symbol s_6_34[5] = { 'a', 'b', 'l', 'e', 's' }; static const symbol s_6_35[5] = { 'i', 'b', 'l', 'e', 's' }; static const symbol s_6_36[7] = { 'a', 'c', 'i', 'o', 'n', 'e', 's' }; static const symbol s_6_37[7] = { 'u', 'c', 'i', 'o', 'n', 'e', 's' }; static const symbol s_6_38[6] = { 'a', 'd', 'o', 'r', 'e', 's' }; static const symbol s_6_39[5] = { 'a', 'n', 't', 'e', 's' }; static const symbol s_6_40[4] = { 'i', 'c', 'o', 's' }; static const symbol s_6_41[5] = { 'i', 's', 'm', 'o', 's' }; static const symbol s_6_42[4] = { 'o', 's', 'o', 's' }; static const symbol s_6_43[8] = { 'a', 'm', 'i', 'e', 'n', 't', 'o', 's' }; static const symbol s_6_44[8] = { 'i', 'm', 'i', 'e', 'n', 't', 'o', 's' }; static const symbol s_6_45[4] = { 'i', 'v', 'o', 's' }; static const struct among a_6[46] = { /* 0 */ { 3, s_6_0, -1, 1, 0}, /* 1 */ { 5, s_6_1, -1, 2, 0}, /* 2 */ { 5, s_6_2, -1, 5, 0}, /* 3 */ { 5, s_6_3, -1, 2, 0}, /* 4 */ { 3, s_6_4, -1, 1, 0}, /* 5 */ { 4, s_6_5, -1, 1, 0}, /* 6 */ { 3, s_6_6, -1, 9, 0}, /* 7 */ { 4, s_6_7, -1, 1, 0}, /* 8 */ { 6, s_6_8, -1, 3, 0}, /* 9 */ { 4, s_6_9, -1, 8, 0}, /* 10 */ { 4, s_6_10, -1, 1, 0}, /* 11 */ { 4, s_6_11, -1, 1, 0}, /* 12 */ { 4, s_6_12, -1, 2, 0}, /* 13 */ { 5, s_6_13, -1, 7, 0}, /* 14 */ { 6, s_6_14, 13, 6, 0}, /* 15 */ { 6, s_6_15, -1, 2, 0}, /* 16 */ { 6, s_6_16, -1, 4, 0}, /* 17 */ { 3, s_6_17, -1, 1, 0}, /* 18 */ { 4, s_6_18, -1, 1, 0}, /* 19 */ { 3, s_6_19, -1, 1, 0}, /* 20 */ { 7, s_6_20, -1, 1, 0}, /* 21 */ { 7, s_6_21, -1, 1, 0}, /* 22 */ { 3, s_6_22, -1, 9, 0}, /* 23 */ { 4, s_6_23, -1, 2, 0}, /* 24 */ { 4, s_6_24, -1, 1, 0}, /* 25 */ { 6, s_6_25, -1, 2, 0}, /* 26 */ { 6, s_6_26, -1, 5, 0}, /* 27 */ { 6, s_6_27, -1, 2, 0}, /* 28 */ { 4, s_6_28, -1, 1, 0}, /* 29 */ { 5, s_6_29, -1, 1, 0}, /* 30 */ { 4, s_6_30, -1, 9, 0}, /* 31 */ { 5, s_6_31, -1, 1, 0}, /* 32 */ { 7, s_6_32, -1, 3, 0}, /* 33 */ { 6, s_6_33, -1, 8, 0}, /* 34 */ { 5, s_6_34, -1, 1, 0}, /* 35 */ { 5, s_6_35, -1, 1, 0}, /* 36 */ { 7, s_6_36, -1, 2, 0}, /* 37 */ { 7, s_6_37, -1, 4, 0}, /* 38 */ { 6, s_6_38, -1, 2, 0}, /* 39 */ { 5, s_6_39, -1, 2, 0}, /* 40 */ { 4, s_6_40, -1, 1, 0}, /* 41 */ { 5, s_6_41, -1, 1, 0}, /* 42 */ { 4, s_6_42, -1, 1, 0}, /* 43 */ { 8, s_6_43, -1, 1, 0}, /* 44 */ { 8, s_6_44, -1, 1, 0}, /* 45 */ { 4, s_6_45, -1, 9, 0} }; static const symbol s_7_0[2] = { 'y', 'a' }; static const symbol s_7_1[2] = { 'y', 'e' }; static const symbol s_7_2[3] = { 'y', 'a', 'n' }; static const symbol s_7_3[3] = { 'y', 'e', 'n' }; static const symbol s_7_4[5] = { 'y', 'e', 'r', 'o', 'n' }; static const symbol s_7_5[5] = { 'y', 'e', 'n', 'd', 'o' }; static const symbol s_7_6[2] = { 'y', 'o' }; static const symbol s_7_7[3] = { 'y', 'a', 's' }; static const symbol s_7_8[3] = { 'y', 'e', 's' }; static const symbol s_7_9[4] = { 'y', 'a', 'i', 's' }; static const symbol s_7_10[5] = { 'y', 'a', 'm', 'o', 's' }; static const symbol s_7_11[3] = { 'y', 0xC3, 0xB3 }; static const struct among a_7[12] = { /* 0 */ { 2, s_7_0, -1, 1, 0}, /* 1 */ { 2, s_7_1, -1, 1, 0}, /* 2 */ { 3, s_7_2, -1, 1, 0}, /* 3 */ { 3, s_7_3, -1, 1, 0}, /* 4 */ { 5, s_7_4, -1, 1, 0}, /* 5 */ { 5, s_7_5, -1, 1, 0}, /* 6 */ { 2, s_7_6, -1, 1, 0}, /* 7 */ { 3, s_7_7, -1, 1, 0}, /* 8 */ { 3, s_7_8, -1, 1, 0}, /* 9 */ { 4, s_7_9, -1, 1, 0}, /* 10 */ { 5, s_7_10, -1, 1, 0}, /* 11 */ { 3, s_7_11, -1, 1, 0} }; static const symbol s_8_0[3] = { 'a', 'b', 'a' }; static const symbol s_8_1[3] = { 'a', 'd', 'a' }; static const symbol s_8_2[3] = { 'i', 'd', 'a' }; static const symbol s_8_3[3] = { 'a', 'r', 'a' }; static const symbol s_8_4[4] = { 'i', 'e', 'r', 'a' }; static const symbol s_8_5[3] = { 0xC3, 0xAD, 'a' }; static const symbol s_8_6[5] = { 'a', 'r', 0xC3, 0xAD, 'a' }; static const symbol s_8_7[5] = { 'e', 'r', 0xC3, 0xAD, 'a' }; static const symbol s_8_8[5] = { 'i', 'r', 0xC3, 0xAD, 'a' }; static const symbol s_8_9[2] = { 'a', 'd' }; static const symbol s_8_10[2] = { 'e', 'd' }; static const symbol s_8_11[2] = { 'i', 'd' }; static const symbol s_8_12[3] = { 'a', 's', 'e' }; static const symbol s_8_13[4] = { 'i', 'e', 's', 'e' }; static const symbol s_8_14[4] = { 'a', 's', 't', 'e' }; static const symbol s_8_15[4] = { 'i', 's', 't', 'e' }; static const symbol s_8_16[2] = { 'a', 'n' }; static const symbol s_8_17[4] = { 'a', 'b', 'a', 'n' }; static const symbol s_8_18[4] = { 'a', 'r', 'a', 'n' }; static const symbol s_8_19[5] = { 'i', 'e', 'r', 'a', 'n' }; static const symbol s_8_20[4] = { 0xC3, 0xAD, 'a', 'n' }; static const symbol s_8_21[6] = { 'a', 'r', 0xC3, 0xAD, 'a', 'n' }; static const symbol s_8_22[6] = { 'e', 'r', 0xC3, 0xAD, 'a', 'n' }; static const symbol s_8_23[6] = { 'i', 'r', 0xC3, 0xAD, 'a', 'n' }; static const symbol s_8_24[2] = { 'e', 'n' }; static const symbol s_8_25[4] = { 'a', 's', 'e', 'n' }; static const symbol s_8_26[5] = { 'i', 'e', 's', 'e', 'n' }; static const symbol s_8_27[4] = { 'a', 'r', 'o', 'n' }; static const symbol s_8_28[5] = { 'i', 'e', 'r', 'o', 'n' }; static const symbol s_8_29[5] = { 'a', 'r', 0xC3, 0xA1, 'n' }; static const symbol s_8_30[5] = { 'e', 'r', 0xC3, 0xA1, 'n' }; static const symbol s_8_31[5] = { 'i', 'r', 0xC3, 0xA1, 'n' }; static const symbol s_8_32[3] = { 'a', 'd', 'o' }; static const symbol s_8_33[3] = { 'i', 'd', 'o' }; static const symbol s_8_34[4] = { 'a', 'n', 'd', 'o' }; static const symbol s_8_35[5] = { 'i', 'e', 'n', 'd', 'o' }; static const symbol s_8_36[2] = { 'a', 'r' }; static const symbol s_8_37[2] = { 'e', 'r' }; static const symbol s_8_38[2] = { 'i', 'r' }; static const symbol s_8_39[2] = { 'a', 's' }; static const symbol s_8_40[4] = { 'a', 'b', 'a', 's' }; static const symbol s_8_41[4] = { 'a', 'd', 'a', 's' }; static const symbol s_8_42[4] = { 'i', 'd', 'a', 's' }; static const symbol s_8_43[4] = { 'a', 'r', 'a', 's' }; static const symbol s_8_44[5] = { 'i', 'e', 'r', 'a', 's' }; static const symbol s_8_45[4] = { 0xC3, 0xAD, 'a', 's' }; static const symbol s_8_46[6] = { 'a', 'r', 0xC3, 0xAD, 'a', 's' }; static const symbol s_8_47[6] = { 'e', 'r', 0xC3, 0xAD, 'a', 's' }; static const symbol s_8_48[6] = { 'i', 'r', 0xC3, 0xAD, 'a', 's' }; static const symbol s_8_49[2] = { 'e', 's' }; static const symbol s_8_50[4] = { 'a', 's', 'e', 's' }; static const symbol s_8_51[5] = { 'i', 'e', 's', 'e', 's' }; static const symbol s_8_52[5] = { 'a', 'b', 'a', 'i', 's' }; static const symbol s_8_53[5] = { 'a', 'r', 'a', 'i', 's' }; static const symbol s_8_54[6] = { 'i', 'e', 'r', 'a', 'i', 's' }; static const symbol s_8_55[5] = { 0xC3, 0xAD, 'a', 'i', 's' }; static const symbol s_8_56[7] = { 'a', 'r', 0xC3, 0xAD, 'a', 'i', 's' }; static const symbol s_8_57[7] = { 'e', 'r', 0xC3, 0xAD, 'a', 'i', 's' }; static const symbol s_8_58[7] = { 'i', 'r', 0xC3, 0xAD, 'a', 'i', 's' }; static const symbol s_8_59[5] = { 'a', 's', 'e', 'i', 's' }; static const symbol s_8_60[6] = { 'i', 'e', 's', 'e', 'i', 's' }; static const symbol s_8_61[6] = { 'a', 's', 't', 'e', 'i', 's' }; static const symbol s_8_62[6] = { 'i', 's', 't', 'e', 'i', 's' }; static const symbol s_8_63[4] = { 0xC3, 0xA1, 'i', 's' }; static const symbol s_8_64[4] = { 0xC3, 0xA9, 'i', 's' }; static const symbol s_8_65[6] = { 'a', 'r', 0xC3, 0xA9, 'i', 's' }; static const symbol s_8_66[6] = { 'e', 'r', 0xC3, 0xA9, 'i', 's' }; static const symbol s_8_67[6] = { 'i', 'r', 0xC3, 0xA9, 'i', 's' }; static const symbol s_8_68[4] = { 'a', 'd', 'o', 's' }; static const symbol s_8_69[4] = { 'i', 'd', 'o', 's' }; static const symbol s_8_70[4] = { 'a', 'm', 'o', 's' }; static const symbol s_8_71[7] = { 0xC3, 0xA1, 'b', 'a', 'm', 'o', 's' }; static const symbol s_8_72[7] = { 0xC3, 0xA1, 'r', 'a', 'm', 'o', 's' }; static const symbol s_8_73[8] = { 'i', 0xC3, 0xA9, 'r', 'a', 'm', 'o', 's' }; static const symbol s_8_74[6] = { 0xC3, 0xAD, 'a', 'm', 'o', 's' }; static const symbol s_8_75[8] = { 'a', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; static const symbol s_8_76[8] = { 'e', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; static const symbol s_8_77[8] = { 'i', 'r', 0xC3, 0xAD, 'a', 'm', 'o', 's' }; static const symbol s_8_78[4] = { 'e', 'm', 'o', 's' }; static const symbol s_8_79[6] = { 'a', 'r', 'e', 'm', 'o', 's' }; static const symbol s_8_80[6] = { 'e', 'r', 'e', 'm', 'o', 's' }; static const symbol s_8_81[6] = { 'i', 'r', 'e', 'm', 'o', 's' }; static const symbol s_8_82[7] = { 0xC3, 0xA1, 's', 'e', 'm', 'o', 's' }; static const symbol s_8_83[8] = { 'i', 0xC3, 0xA9, 's', 'e', 'm', 'o', 's' }; static const symbol s_8_84[4] = { 'i', 'm', 'o', 's' }; static const symbol s_8_85[5] = { 'a', 'r', 0xC3, 0xA1, 's' }; static const symbol s_8_86[5] = { 'e', 'r', 0xC3, 0xA1, 's' }; static const symbol s_8_87[5] = { 'i', 'r', 0xC3, 0xA1, 's' }; static const symbol s_8_88[3] = { 0xC3, 0xAD, 's' }; static const symbol s_8_89[4] = { 'a', 'r', 0xC3, 0xA1 }; static const symbol s_8_90[4] = { 'e', 'r', 0xC3, 0xA1 }; static const symbol s_8_91[4] = { 'i', 'r', 0xC3, 0xA1 }; static const symbol s_8_92[4] = { 'a', 'r', 0xC3, 0xA9 }; static const symbol s_8_93[4] = { 'e', 'r', 0xC3, 0xA9 }; static const symbol s_8_94[4] = { 'i', 'r', 0xC3, 0xA9 }; static const symbol s_8_95[3] = { 'i', 0xC3, 0xB3 }; static const struct among a_8[96] = { /* 0 */ { 3, s_8_0, -1, 2, 0}, /* 1 */ { 3, s_8_1, -1, 2, 0}, /* 2 */ { 3, s_8_2, -1, 2, 0}, /* 3 */ { 3, s_8_3, -1, 2, 0}, /* 4 */ { 4, s_8_4, -1, 2, 0}, /* 5 */ { 3, s_8_5, -1, 2, 0}, /* 6 */ { 5, s_8_6, 5, 2, 0}, /* 7 */ { 5, s_8_7, 5, 2, 0}, /* 8 */ { 5, s_8_8, 5, 2, 0}, /* 9 */ { 2, s_8_9, -1, 2, 0}, /* 10 */ { 2, s_8_10, -1, 2, 0}, /* 11 */ { 2, s_8_11, -1, 2, 0}, /* 12 */ { 3, s_8_12, -1, 2, 0}, /* 13 */ { 4, s_8_13, -1, 2, 0}, /* 14 */ { 4, s_8_14, -1, 2, 0}, /* 15 */ { 4, s_8_15, -1, 2, 0}, /* 16 */ { 2, s_8_16, -1, 2, 0}, /* 17 */ { 4, s_8_17, 16, 2, 0}, /* 18 */ { 4, s_8_18, 16, 2, 0}, /* 19 */ { 5, s_8_19, 16, 2, 0}, /* 20 */ { 4, s_8_20, 16, 2, 0}, /* 21 */ { 6, s_8_21, 20, 2, 0}, /* 22 */ { 6, s_8_22, 20, 2, 0}, /* 23 */ { 6, s_8_23, 20, 2, 0}, /* 24 */ { 2, s_8_24, -1, 1, 0}, /* 25 */ { 4, s_8_25, 24, 2, 0}, /* 26 */ { 5, s_8_26, 24, 2, 0}, /* 27 */ { 4, s_8_27, -1, 2, 0}, /* 28 */ { 5, s_8_28, -1, 2, 0}, /* 29 */ { 5, s_8_29, -1, 2, 0}, /* 30 */ { 5, s_8_30, -1, 2, 0}, /* 31 */ { 5, s_8_31, -1, 2, 0}, /* 32 */ { 3, s_8_32, -1, 2, 0}, /* 33 */ { 3, s_8_33, -1, 2, 0}, /* 34 */ { 4, s_8_34, -1, 2, 0}, /* 35 */ { 5, s_8_35, -1, 2, 0}, /* 36 */ { 2, s_8_36, -1, 2, 0}, /* 37 */ { 2, s_8_37, -1, 2, 0}, /* 38 */ { 2, s_8_38, -1, 2, 0}, /* 39 */ { 2, s_8_39, -1, 2, 0}, /* 40 */ { 4, s_8_40, 39, 2, 0}, /* 41 */ { 4, s_8_41, 39, 2, 0}, /* 42 */ { 4, s_8_42, 39, 2, 0}, /* 43 */ { 4, s_8_43, 39, 2, 0}, /* 44 */ { 5, s_8_44, 39, 2, 0}, /* 45 */ { 4, s_8_45, 39, 2, 0}, /* 46 */ { 6, s_8_46, 45, 2, 0}, /* 47 */ { 6, s_8_47, 45, 2, 0}, /* 48 */ { 6, s_8_48, 45, 2, 0}, /* 49 */ { 2, s_8_49, -1, 1, 0}, /* 50 */ { 4, s_8_50, 49, 2, 0}, /* 51 */ { 5, s_8_51, 49, 2, 0}, /* 52 */ { 5, s_8_52, -1, 2, 0}, /* 53 */ { 5, s_8_53, -1, 2, 0}, /* 54 */ { 6, s_8_54, -1, 2, 0}, /* 55 */ { 5, s_8_55, -1, 2, 0}, /* 56 */ { 7, s_8_56, 55, 2, 0}, /* 57 */ { 7, s_8_57, 55, 2, 0}, /* 58 */ { 7, s_8_58, 55, 2, 0}, /* 59 */ { 5, s_8_59, -1, 2, 0}, /* 60 */ { 6, s_8_60, -1, 2, 0}, /* 61 */ { 6, s_8_61, -1, 2, 0}, /* 62 */ { 6, s_8_62, -1, 2, 0}, /* 63 */ { 4, s_8_63, -1, 2, 0}, /* 64 */ { 4, s_8_64, -1, 1, 0}, /* 65 */ { 6, s_8_65, 64, 2, 0}, /* 66 */ { 6, s_8_66, 64, 2, 0}, /* 67 */ { 6, s_8_67, 64, 2, 0}, /* 68 */ { 4, s_8_68, -1, 2, 0}, /* 69 */ { 4, s_8_69, -1, 2, 0}, /* 70 */ { 4, s_8_70, -1, 2, 0}, /* 71 */ { 7, s_8_71, 70, 2, 0}, /* 72 */ { 7, s_8_72, 70, 2, 0}, /* 73 */ { 8, s_8_73, 70, 2, 0}, /* 74 */ { 6, s_8_74, 70, 2, 0}, /* 75 */ { 8, s_8_75, 74, 2, 0}, /* 76 */ { 8, s_8_76, 74, 2, 0}, /* 77 */ { 8, s_8_77, 74, 2, 0}, /* 78 */ { 4, s_8_78, -1, 1, 0}, /* 79 */ { 6, s_8_79, 78, 2, 0}, /* 80 */ { 6, s_8_80, 78, 2, 0}, /* 81 */ { 6, s_8_81, 78, 2, 0}, /* 82 */ { 7, s_8_82, 78, 2, 0}, /* 83 */ { 8, s_8_83, 78, 2, 0}, /* 84 */ { 4, s_8_84, -1, 2, 0}, /* 85 */ { 5, s_8_85, -1, 2, 0}, /* 86 */ { 5, s_8_86, -1, 2, 0}, /* 87 */ { 5, s_8_87, -1, 2, 0}, /* 88 */ { 3, s_8_88, -1, 2, 0}, /* 89 */ { 4, s_8_89, -1, 2, 0}, /* 90 */ { 4, s_8_90, -1, 2, 0}, /* 91 */ { 4, s_8_91, -1, 2, 0}, /* 92 */ { 4, s_8_92, -1, 2, 0}, /* 93 */ { 4, s_8_93, -1, 2, 0}, /* 94 */ { 4, s_8_94, -1, 2, 0}, /* 95 */ { 3, s_8_95, -1, 2, 0} }; static const symbol s_9_0[1] = { 'a' }; static const symbol s_9_1[1] = { 'e' }; static const symbol s_9_2[1] = { 'o' }; static const symbol s_9_3[2] = { 'o', 's' }; static const symbol s_9_4[2] = { 0xC3, 0xA1 }; static const symbol s_9_5[2] = { 0xC3, 0xA9 }; static const symbol s_9_6[2] = { 0xC3, 0xAD }; static const symbol s_9_7[2] = { 0xC3, 0xB3 }; static const struct among a_9[8] = { /* 0 */ { 1, s_9_0, -1, 1, 0}, /* 1 */ { 1, s_9_1, -1, 2, 0}, /* 2 */ { 1, s_9_2, -1, 1, 0}, /* 3 */ { 2, s_9_3, -1, 1, 0}, /* 4 */ { 2, s_9_4, -1, 1, 0}, /* 5 */ { 2, s_9_5, -1, 2, 0}, /* 6 */ { 2, s_9_6, -1, 1, 0}, /* 7 */ { 2, s_9_7, -1, 1, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 10 }; static const symbol s_0[] = { 'a' }; static const symbol s_1[] = { 'e' }; static const symbol s_2[] = { 'i' }; static const symbol s_3[] = { 'o' }; static const symbol s_4[] = { 'u' }; static const symbol s_5[] = { 'i', 'e', 'n', 'd', 'o' }; static const symbol s_6[] = { 'a', 'n', 'd', 'o' }; static const symbol s_7[] = { 'a', 'r' }; static const symbol s_8[] = { 'e', 'r' }; static const symbol s_9[] = { 'i', 'r' }; static const symbol s_10[] = { 'u' }; static const symbol s_11[] = { 'i', 'c' }; static const symbol s_12[] = { 'l', 'o', 'g' }; static const symbol s_13[] = { 'u' }; static const symbol s_14[] = { 'e', 'n', 't', 'e' }; static const symbol s_15[] = { 'a', 't' }; static const symbol s_16[] = { 'a', 't' }; static const symbol s_17[] = { 'u' }; static const symbol s_18[] = { 'u' }; static const symbol s_19[] = { 'g' }; static const symbol s_20[] = { 'u' }; static const symbol s_21[] = { 'g' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; z->I[1] = z->l; z->I[2] = z->l; { int c1 = z->c; /* do, line 37 */ { int c2 = z->c; /* or, line 39 */ if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab2; { int c3 = z->c; /* or, line 38 */ if (out_grouping_U(z, g_v, 97, 252, 0)) goto lab4; { /* gopast */ /* grouping v, line 38 */ int ret = out_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) goto lab4; z->c += ret; } goto lab3; lab4: z->c = c3; if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab2; { /* gopast */ /* non v, line 38 */ int ret = in_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) goto lab2; z->c += ret; } } lab3: goto lab1; lab2: z->c = c2; if (out_grouping_U(z, g_v, 97, 252, 0)) goto lab0; { int c4 = z->c; /* or, line 40 */ if (out_grouping_U(z, g_v, 97, 252, 0)) goto lab6; { /* gopast */ /* grouping v, line 40 */ int ret = out_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) goto lab6; z->c += ret; } goto lab5; lab6: z->c = c4; if (in_grouping_U(z, g_v, 97, 252, 0)) goto lab0; { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 40 */ } } lab5: ; } lab1: z->I[0] = z->c; /* setmark pV, line 41 */ lab0: z->c = c1; } { int c5 = z->c; /* do, line 43 */ { /* gopast */ /* grouping v, line 44 */ int ret = out_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 44 */ int ret = in_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[1] = z->c; /* setmark p1, line 44 */ { /* gopast */ /* grouping v, line 45 */ int ret = out_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) goto lab7; z->c += ret; } { /* gopast */ /* non v, line 45 */ int ret = in_grouping_U(z, g_v, 97, 252, 1); if (ret < 0) goto lab7; z->c += ret; } z->I[2] = z->c; /* setmark p2, line 45 */ lab7: z->c = c5; } return 1; } static int r_postlude(struct SN_env * z) { int among_var; while(1) { /* repeat, line 49 */ int c1 = z->c; z->bra = z->c; /* [, line 50 */ if (z->c + 1 >= z->l || z->p[z->c + 1] >> 5 != 5 || !((67641858 >> (z->p[z->c + 1] & 0x1f)) & 1)) among_var = 6; else among_var = find_among(z, a_0, 6); /* substring, line 50 */ if (!(among_var)) goto lab0; z->ket = z->c; /* ], line 50 */ switch(among_var) { case 0: goto lab0; case 1: { int ret = slice_from_s(z, 1, s_0); /* <-, line 51 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 1, s_1); /* <-, line 52 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_2); /* <-, line 53 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_3); /* <-, line 54 */ if (ret < 0) return ret; } break; case 5: { int ret = slice_from_s(z, 1, s_4); /* <-, line 55 */ if (ret < 0) return ret; } break; case 6: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab0; z->c = ret; /* next, line 57 */ } break; } continue; lab0: z->c = c1; break; } return 1; } static int r_RV(struct SN_env * z) { if (!(z->I[0] <= z->c)) return 0; return 1; } static int r_R1(struct SN_env * z) { if (!(z->I[1] <= z->c)) return 0; return 1; } static int r_R2(struct SN_env * z) { if (!(z->I[2] <= z->c)) return 0; return 1; } static int r_attached_pronoun(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 68 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((557090 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_1, 13))) return 0; /* substring, line 68 */ z->bra = z->c; /* ], line 68 */ if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 111 && z->p[z->c - 1] != 114)) return 0; among_var = find_among_b(z, a_2, 11); /* substring, line 72 */ if (!(among_var)) return 0; { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 72 */ if (ret < 0) return ret; } switch(among_var) { case 0: return 0; case 1: z->bra = z->c; /* ], line 73 */ { int ret = slice_from_s(z, 5, s_5); /* <-, line 73 */ if (ret < 0) return ret; } break; case 2: z->bra = z->c; /* ], line 74 */ { int ret = slice_from_s(z, 4, s_6); /* <-, line 74 */ if (ret < 0) return ret; } break; case 3: z->bra = z->c; /* ], line 75 */ { int ret = slice_from_s(z, 2, s_7); /* <-, line 75 */ if (ret < 0) return ret; } break; case 4: z->bra = z->c; /* ], line 76 */ { int ret = slice_from_s(z, 2, s_8); /* <-, line 76 */ if (ret < 0) return ret; } break; case 5: z->bra = z->c; /* ], line 77 */ { int ret = slice_from_s(z, 2, s_9); /* <-, line 77 */ if (ret < 0) return ret; } break; case 6: { int ret = slice_del(z); /* delete, line 81 */ if (ret < 0) return ret; } break; case 7: if (!(eq_s_b(z, 1, s_10))) return 0; { int ret = slice_del(z); /* delete, line 82 */ if (ret < 0) return ret; } break; } return 1; } static int r_standard_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 87 */ if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((835634 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; among_var = find_among_b(z, a_6, 46); /* substring, line 87 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 87 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 99 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 99 */ if (ret < 0) return ret; } break; case 2: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 105 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 105 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 106 */ z->ket = z->c; /* [, line 106 */ if (!(eq_s_b(z, 2, s_11))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 106 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call R2, line 106 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 106 */ if (ret < 0) return ret; } lab0: ; } break; case 3: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 111 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 3, s_12); /* <-, line 111 */ if (ret < 0) return ret; } break; case 4: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 115 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 1, s_13); /* <-, line 115 */ if (ret < 0) return ret; } break; case 5: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 119 */ if (ret < 0) return ret; } { int ret = slice_from_s(z, 4, s_14); /* <-, line 119 */ if (ret < 0) return ret; } break; case 6: { int ret = r_R1(z); if (ret == 0) return 0; /* call R1, line 123 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 123 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 124 */ z->ket = z->c; /* [, line 125 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4718616 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab1; } among_var = find_among_b(z, a_3, 4); /* substring, line 125 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 125 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 125 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 125 */ if (ret < 0) return ret; } switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab1; } case 1: z->ket = z->c; /* [, line 126 */ if (!(eq_s_b(z, 2, s_15))) { z->c = z->l - m_keep; goto lab1; } z->bra = z->c; /* ], line 126 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab1; } /* call R2, line 126 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 126 */ if (ret < 0) return ret; } break; } lab1: ; } break; case 7: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 135 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 135 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 136 */ z->ket = z->c; /* [, line 137 */ if (z->c - 3 <= z->lb || z->p[z->c - 1] != 101) { z->c = z->l - m_keep; goto lab2; } among_var = find_among_b(z, a_4, 3); /* substring, line 137 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab2; } z->bra = z->c; /* ], line 137 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab2; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call R2, line 140 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 140 */ if (ret < 0) return ret; } break; } lab2: ; } break; case 8: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 147 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 147 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 148 */ z->ket = z->c; /* [, line 149 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((4198408 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->c = z->l - m_keep; goto lab3; } among_var = find_among_b(z, a_5, 3); /* substring, line 149 */ if (!(among_var)) { z->c = z->l - m_keep; goto lab3; } z->bra = z->c; /* ], line 149 */ switch(among_var) { case 0: { z->c = z->l - m_keep; goto lab3; } case 1: { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab3; } /* call R2, line 152 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 152 */ if (ret < 0) return ret; } break; } lab3: ; } break; case 9: { int ret = r_R2(z); if (ret == 0) return 0; /* call R2, line 159 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 159 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 160 */ z->ket = z->c; /* [, line 161 */ if (!(eq_s_b(z, 2, s_16))) { z->c = z->l - m_keep; goto lab4; } z->bra = z->c; /* ], line 161 */ { int ret = r_R2(z); if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call R2, line 161 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 161 */ if (ret < 0) return ret; } lab4: ; } break; } return 1; } static int r_y_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 168 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 168 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 168 */ among_var = find_among_b(z, a_7, 12); /* substring, line 168 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 168 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: if (!(eq_s_b(z, 1, s_17))) return 0; { int ret = slice_del(z); /* delete, line 171 */ if (ret < 0) return ret; } break; } return 1; } static int r_verb_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 176 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 176 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 176 */ among_var = find_among_b(z, a_8, 96); /* substring, line 176 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 176 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 179 */ if (!(eq_s_b(z, 1, s_18))) { z->c = z->l - m_keep; goto lab0; } { int m_test = z->l - z->c; /* test, line 179 */ if (!(eq_s_b(z, 1, s_19))) { z->c = z->l - m_keep; goto lab0; } z->c = z->l - m_test; } lab0: ; } z->bra = z->c; /* ], line 179 */ { int ret = slice_del(z); /* delete, line 179 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_del(z); /* delete, line 200 */ if (ret < 0) return ret; } break; } return 1; } static int r_residual_suffix(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 205 */ among_var = find_among_b(z, a_9, 8); /* substring, line 205 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 205 */ switch(among_var) { case 0: return 0; case 1: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 208 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 208 */ if (ret < 0) return ret; } break; case 2: { int ret = r_RV(z); if (ret == 0) return 0; /* call RV, line 210 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 210 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 210 */ z->ket = z->c; /* [, line 210 */ if (!(eq_s_b(z, 1, s_20))) { z->c = z->l - m_keep; goto lab0; } z->bra = z->c; /* ], line 210 */ { int m_test = z->l - z->c; /* test, line 210 */ if (!(eq_s_b(z, 1, s_21))) { z->c = z->l - m_keep; goto lab0; } z->c = z->l - m_test; } { int ret = r_RV(z); if (ret == 0) { z->c = z->l - m_keep; goto lab0; } /* call RV, line 210 */ if (ret < 0) return ret; } { int ret = slice_del(z); /* delete, line 210 */ if (ret < 0) return ret; } lab0: ; } break; } return 1; } extern int spanish_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 216 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 216 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 217 */ { int m2 = z->l - z->c; (void)m2; /* do, line 218 */ { int ret = r_attached_pronoun(z); if (ret == 0) goto lab1; /* call attached_pronoun, line 218 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 219 */ { int m4 = z->l - z->c; (void)m4; /* or, line 219 */ { int ret = r_standard_suffix(z); if (ret == 0) goto lab4; /* call standard_suffix, line 219 */ if (ret < 0) return ret; } goto lab3; lab4: z->c = z->l - m4; { int ret = r_y_verb_suffix(z); if (ret == 0) goto lab5; /* call y_verb_suffix, line 220 */ if (ret < 0) return ret; } goto lab3; lab5: z->c = z->l - m4; { int ret = r_verb_suffix(z); if (ret == 0) goto lab2; /* call verb_suffix, line 221 */ if (ret < 0) return ret; } } lab3: lab2: z->c = z->l - m3; } { int m5 = z->l - z->c; (void)m5; /* do, line 223 */ { int ret = r_residual_suffix(z); if (ret == 0) goto lab6; /* call residual_suffix, line 223 */ if (ret < 0) return ret; } lab6: z->c = z->l - m5; } z->c = z->lb; { int c6 = z->c; /* do, line 225 */ { int ret = r_postlude(z); if (ret == 0) goto lab7; /* call postlude, line 225 */ if (ret < 0) return ret; } lab7: z->c = c6; } return 1; } extern struct SN_env * spanish_UTF_8_create_env(void) { return SN_create_env(0, 3, 0); } extern void spanish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_spanish.h000066400000000000000000000004661456444476200307570ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* spanish_UTF_8_create_env(void); extern void spanish_UTF_8_close_env(struct SN_env* z); extern int spanish_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_swedish.c000066400000000000000000000246741456444476200307620ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int swedish_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_other_suffix(struct SN_env * z); static int r_consonant_pair(struct SN_env * z); static int r_main_suffix(struct SN_env * z); static int r_mark_regions(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * swedish_UTF_8_create_env(void); extern void swedish_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[1] = { 'a' }; static const symbol s_0_1[4] = { 'a', 'r', 'n', 'a' }; static const symbol s_0_2[4] = { 'e', 'r', 'n', 'a' }; static const symbol s_0_3[7] = { 'h', 'e', 't', 'e', 'r', 'n', 'a' }; static const symbol s_0_4[4] = { 'o', 'r', 'n', 'a' }; static const symbol s_0_5[2] = { 'a', 'd' }; static const symbol s_0_6[1] = { 'e' }; static const symbol s_0_7[3] = { 'a', 'd', 'e' }; static const symbol s_0_8[4] = { 'a', 'n', 'd', 'e' }; static const symbol s_0_9[4] = { 'a', 'r', 'n', 'e' }; static const symbol s_0_10[3] = { 'a', 'r', 'e' }; static const symbol s_0_11[4] = { 'a', 's', 't', 'e' }; static const symbol s_0_12[2] = { 'e', 'n' }; static const symbol s_0_13[5] = { 'a', 'n', 'd', 'e', 'n' }; static const symbol s_0_14[4] = { 'a', 'r', 'e', 'n' }; static const symbol s_0_15[5] = { 'h', 'e', 't', 'e', 'n' }; static const symbol s_0_16[3] = { 'e', 'r', 'n' }; static const symbol s_0_17[2] = { 'a', 'r' }; static const symbol s_0_18[2] = { 'e', 'r' }; static const symbol s_0_19[5] = { 'h', 'e', 't', 'e', 'r' }; static const symbol s_0_20[2] = { 'o', 'r' }; static const symbol s_0_21[1] = { 's' }; static const symbol s_0_22[2] = { 'a', 's' }; static const symbol s_0_23[5] = { 'a', 'r', 'n', 'a', 's' }; static const symbol s_0_24[5] = { 'e', 'r', 'n', 'a', 's' }; static const symbol s_0_25[5] = { 'o', 'r', 'n', 'a', 's' }; static const symbol s_0_26[2] = { 'e', 's' }; static const symbol s_0_27[4] = { 'a', 'd', 'e', 's' }; static const symbol s_0_28[5] = { 'a', 'n', 'd', 'e', 's' }; static const symbol s_0_29[3] = { 'e', 'n', 's' }; static const symbol s_0_30[5] = { 'a', 'r', 'e', 'n', 's' }; static const symbol s_0_31[6] = { 'h', 'e', 't', 'e', 'n', 's' }; static const symbol s_0_32[4] = { 'e', 'r', 'n', 's' }; static const symbol s_0_33[2] = { 'a', 't' }; static const symbol s_0_34[5] = { 'a', 'n', 'd', 'e', 't' }; static const symbol s_0_35[3] = { 'h', 'e', 't' }; static const symbol s_0_36[3] = { 'a', 's', 't' }; static const struct among a_0[37] = { /* 0 */ { 1, s_0_0, -1, 1, 0}, /* 1 */ { 4, s_0_1, 0, 1, 0}, /* 2 */ { 4, s_0_2, 0, 1, 0}, /* 3 */ { 7, s_0_3, 2, 1, 0}, /* 4 */ { 4, s_0_4, 0, 1, 0}, /* 5 */ { 2, s_0_5, -1, 1, 0}, /* 6 */ { 1, s_0_6, -1, 1, 0}, /* 7 */ { 3, s_0_7, 6, 1, 0}, /* 8 */ { 4, s_0_8, 6, 1, 0}, /* 9 */ { 4, s_0_9, 6, 1, 0}, /* 10 */ { 3, s_0_10, 6, 1, 0}, /* 11 */ { 4, s_0_11, 6, 1, 0}, /* 12 */ { 2, s_0_12, -1, 1, 0}, /* 13 */ { 5, s_0_13, 12, 1, 0}, /* 14 */ { 4, s_0_14, 12, 1, 0}, /* 15 */ { 5, s_0_15, 12, 1, 0}, /* 16 */ { 3, s_0_16, -1, 1, 0}, /* 17 */ { 2, s_0_17, -1, 1, 0}, /* 18 */ { 2, s_0_18, -1, 1, 0}, /* 19 */ { 5, s_0_19, 18, 1, 0}, /* 20 */ { 2, s_0_20, -1, 1, 0}, /* 21 */ { 1, s_0_21, -1, 2, 0}, /* 22 */ { 2, s_0_22, 21, 1, 0}, /* 23 */ { 5, s_0_23, 22, 1, 0}, /* 24 */ { 5, s_0_24, 22, 1, 0}, /* 25 */ { 5, s_0_25, 22, 1, 0}, /* 26 */ { 2, s_0_26, 21, 1, 0}, /* 27 */ { 4, s_0_27, 26, 1, 0}, /* 28 */ { 5, s_0_28, 26, 1, 0}, /* 29 */ { 3, s_0_29, 21, 1, 0}, /* 30 */ { 5, s_0_30, 29, 1, 0}, /* 31 */ { 6, s_0_31, 29, 1, 0}, /* 32 */ { 4, s_0_32, 21, 1, 0}, /* 33 */ { 2, s_0_33, -1, 1, 0}, /* 34 */ { 5, s_0_34, -1, 1, 0}, /* 35 */ { 3, s_0_35, -1, 1, 0}, /* 36 */ { 3, s_0_36, -1, 1, 0} }; static const symbol s_1_0[2] = { 'd', 'd' }; static const symbol s_1_1[2] = { 'g', 'd' }; static const symbol s_1_2[2] = { 'n', 'n' }; static const symbol s_1_3[2] = { 'd', 't' }; static const symbol s_1_4[2] = { 'g', 't' }; static const symbol s_1_5[2] = { 'k', 't' }; static const symbol s_1_6[2] = { 't', 't' }; static const struct among a_1[7] = { /* 0 */ { 2, s_1_0, -1, -1, 0}, /* 1 */ { 2, s_1_1, -1, -1, 0}, /* 2 */ { 2, s_1_2, -1, -1, 0}, /* 3 */ { 2, s_1_3, -1, -1, 0}, /* 4 */ { 2, s_1_4, -1, -1, 0}, /* 5 */ { 2, s_1_5, -1, -1, 0}, /* 6 */ { 2, s_1_6, -1, -1, 0} }; static const symbol s_2_0[2] = { 'i', 'g' }; static const symbol s_2_1[3] = { 'l', 'i', 'g' }; static const symbol s_2_2[3] = { 'e', 'l', 's' }; static const symbol s_2_3[5] = { 'f', 'u', 'l', 'l', 't' }; static const symbol s_2_4[5] = { 'l', 0xC3, 0xB6, 's', 't' }; static const struct among a_2[5] = { /* 0 */ { 2, s_2_0, -1, 1, 0}, /* 1 */ { 3, s_2_1, 0, 1, 0}, /* 2 */ { 3, s_2_2, -1, 1, 0}, /* 3 */ { 5, s_2_3, -1, 3, 0}, /* 4 */ { 5, s_2_4, -1, 2, 0} }; static const unsigned char g_v[] = { 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 }; static const unsigned char g_s_ending[] = { 119, 127, 149 }; static const symbol s_0[] = { 'l', 0xC3, 0xB6, 's' }; static const symbol s_1[] = { 'f', 'u', 'l', 'l' }; static int r_mark_regions(struct SN_env * z) { z->I[0] = z->l; { int c_test = z->c; /* test, line 29 */ { int ret = skip_utf8(z->p, z->c, 0, z->l, + 3); if (ret < 0) return 0; z->c = ret; /* hop, line 29 */ } z->I[1] = z->c; /* setmark x, line 29 */ z->c = c_test; } if (out_grouping_U(z, g_v, 97, 246, 1) < 0) return 0; /* goto */ /* grouping v, line 30 */ { /* gopast */ /* non v, line 30 */ int ret = in_grouping_U(z, g_v, 97, 246, 1); if (ret < 0) return 0; z->c += ret; } z->I[0] = z->c; /* setmark p1, line 30 */ /* try, line 31 */ if (!(z->I[0] < z->I[1])) goto lab0; z->I[0] = z->I[1]; lab0: return 1; } static int r_main_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 37 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 37 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 37 */ if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1851442 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_0, 37); /* substring, line 37 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 37 */ z->lb = mlimit; } switch(among_var) { case 0: return 0; case 1: { int ret = slice_del(z); /* delete, line 44 */ if (ret < 0) return ret; } break; case 2: if (in_grouping_b_U(z, g_s_ending, 98, 121, 0)) return 0; { int ret = slice_del(z); /* delete, line 46 */ if (ret < 0) return ret; } break; } return 1; } static int r_consonant_pair(struct SN_env * z) { { int mlimit; /* setlimit, line 50 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 50 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* and, line 52 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1064976 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } if (!(find_among_b(z, a_1, 7))) { z->lb = mlimit; return 0; } /* among, line 51 */ z->c = z->l - m2; z->ket = z->c; /* [, line 52 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) { z->lb = mlimit; return 0; } z->c = ret; /* next, line 52 */ } z->bra = z->c; /* ], line 52 */ { int ret = slice_del(z); /* delete, line 52 */ if (ret < 0) return ret; } } z->lb = mlimit; } return 1; } static int r_other_suffix(struct SN_env * z) { int among_var; { int mlimit; /* setlimit, line 55 */ int m1 = z->l - z->c; (void)m1; if (z->c < z->I[0]) return 0; z->c = z->I[0]; /* tomark, line 55 */ mlimit = z->lb; z->lb = z->c; z->c = z->l - m1; z->ket = z->c; /* [, line 56 */ if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((1572992 >> (z->p[z->c - 1] & 0x1f)) & 1)) { z->lb = mlimit; return 0; } among_var = find_among_b(z, a_2, 5); /* substring, line 56 */ if (!(among_var)) { z->lb = mlimit; return 0; } z->bra = z->c; /* ], line 56 */ switch(among_var) { case 0: { z->lb = mlimit; return 0; } case 1: { int ret = slice_del(z); /* delete, line 57 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 4, s_0); /* <-, line 58 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 4, s_1); /* <-, line 59 */ if (ret < 0) return ret; } break; } z->lb = mlimit; } return 1; } extern int swedish_UTF_8_stem(struct SN_env * z) { { int c1 = z->c; /* do, line 66 */ { int ret = r_mark_regions(z); if (ret == 0) goto lab0; /* call mark_regions, line 66 */ if (ret < 0) return ret; } lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 67 */ { int m2 = z->l - z->c; (void)m2; /* do, line 68 */ { int ret = r_main_suffix(z); if (ret == 0) goto lab1; /* call main_suffix, line 68 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 69 */ { int ret = r_consonant_pair(z); if (ret == 0) goto lab2; /* call consonant_pair, line 69 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } { int m4 = z->l - z->c; (void)m4; /* do, line 70 */ { int ret = r_other_suffix(z); if (ret == 0) goto lab3; /* call other_suffix, line 70 */ if (ret < 0) return ret; } lab3: z->c = z->l - m4; } z->c = z->lb; return 1; } extern struct SN_env * swedish_UTF_8_create_env(void) { return SN_create_env(0, 2, 0); } extern void swedish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_swedish.h000066400000000000000000000004661456444476200307600ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* swedish_UTF_8_create_env(void); extern void swedish_UTF_8_close_env(struct SN_env* z); extern int swedish_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_turkish.c000066400000000000000000002360321456444476200307760ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #include "../runtime/header.h" #ifdef __cplusplus extern "C" { #endif extern int turkish_UTF_8_stem(struct SN_env * z); #ifdef __cplusplus } #endif static int r_stem_suffix_chain_before_ki(struct SN_env * z); static int r_stem_noun_suffixes(struct SN_env * z); static int r_stem_nominal_verb_suffixes(struct SN_env * z); static int r_postlude(struct SN_env * z); static int r_post_process_last_consonants(struct SN_env * z); static int r_more_than_one_syllable_word(struct SN_env * z); static int r_mark_suffix_with_optional_s_consonant(struct SN_env * z); static int r_mark_suffix_with_optional_n_consonant(struct SN_env * z); static int r_mark_suffix_with_optional_U_vowel(struct SN_env * z); static int r_mark_suffix_with_optional_y_consonant(struct SN_env * z); static int r_mark_ysA(struct SN_env * z); static int r_mark_ymUs_(struct SN_env * z); static int r_mark_yken(struct SN_env * z); static int r_mark_yDU(struct SN_env * z); static int r_mark_yUz(struct SN_env * z); static int r_mark_yUm(struct SN_env * z); static int r_mark_yU(struct SN_env * z); static int r_mark_ylA(struct SN_env * z); static int r_mark_yA(struct SN_env * z); static int r_mark_possessives(struct SN_env * z); static int r_mark_sUnUz(struct SN_env * z); static int r_mark_sUn(struct SN_env * z); static int r_mark_sU(struct SN_env * z); static int r_mark_nUz(struct SN_env * z); static int r_mark_nUn(struct SN_env * z); static int r_mark_nU(struct SN_env * z); static int r_mark_ndAn(struct SN_env * z); static int r_mark_ndA(struct SN_env * z); static int r_mark_ncA(struct SN_env * z); static int r_mark_nA(struct SN_env * z); static int r_mark_lArI(struct SN_env * z); static int r_mark_lAr(struct SN_env * z); static int r_mark_ki(struct SN_env * z); static int r_mark_DUr(struct SN_env * z); static int r_mark_DAn(struct SN_env * z); static int r_mark_DA(struct SN_env * z); static int r_mark_cAsInA(struct SN_env * z); static int r_is_reserved_word(struct SN_env * z); static int r_check_vowel_harmony(struct SN_env * z); static int r_append_U_to_stems_ending_with_d_or_g(struct SN_env * z); #ifdef __cplusplus extern "C" { #endif extern struct SN_env * turkish_UTF_8_create_env(void); extern void turkish_UTF_8_close_env(struct SN_env * z); #ifdef __cplusplus } #endif static const symbol s_0_0[1] = { 'm' }; static const symbol s_0_1[1] = { 'n' }; static const symbol s_0_2[3] = { 'm', 'i', 'z' }; static const symbol s_0_3[3] = { 'n', 'i', 'z' }; static const symbol s_0_4[3] = { 'm', 'u', 'z' }; static const symbol s_0_5[3] = { 'n', 'u', 'z' }; static const symbol s_0_6[4] = { 'm', 0xC4, 0xB1, 'z' }; static const symbol s_0_7[4] = { 'n', 0xC4, 0xB1, 'z' }; static const symbol s_0_8[4] = { 'm', 0xC3, 0xBC, 'z' }; static const symbol s_0_9[4] = { 'n', 0xC3, 0xBC, 'z' }; static const struct among a_0[10] = { /* 0 */ { 1, s_0_0, -1, -1, 0}, /* 1 */ { 1, s_0_1, -1, -1, 0}, /* 2 */ { 3, s_0_2, -1, -1, 0}, /* 3 */ { 3, s_0_3, -1, -1, 0}, /* 4 */ { 3, s_0_4, -1, -1, 0}, /* 5 */ { 3, s_0_5, -1, -1, 0}, /* 6 */ { 4, s_0_6, -1, -1, 0}, /* 7 */ { 4, s_0_7, -1, -1, 0}, /* 8 */ { 4, s_0_8, -1, -1, 0}, /* 9 */ { 4, s_0_9, -1, -1, 0} }; static const symbol s_1_0[4] = { 'l', 'e', 'r', 'i' }; static const symbol s_1_1[5] = { 'l', 'a', 'r', 0xC4, 0xB1 }; static const struct among a_1[2] = { /* 0 */ { 4, s_1_0, -1, -1, 0}, /* 1 */ { 5, s_1_1, -1, -1, 0} }; static const symbol s_2_0[2] = { 'n', 'i' }; static const symbol s_2_1[2] = { 'n', 'u' }; static const symbol s_2_2[3] = { 'n', 0xC4, 0xB1 }; static const symbol s_2_3[3] = { 'n', 0xC3, 0xBC }; static const struct among a_2[4] = { /* 0 */ { 2, s_2_0, -1, -1, 0}, /* 1 */ { 2, s_2_1, -1, -1, 0}, /* 2 */ { 3, s_2_2, -1, -1, 0}, /* 3 */ { 3, s_2_3, -1, -1, 0} }; static const symbol s_3_0[2] = { 'i', 'n' }; static const symbol s_3_1[2] = { 'u', 'n' }; static const symbol s_3_2[3] = { 0xC4, 0xB1, 'n' }; static const symbol s_3_3[3] = { 0xC3, 0xBC, 'n' }; static const struct among a_3[4] = { /* 0 */ { 2, s_3_0, -1, -1, 0}, /* 1 */ { 2, s_3_1, -1, -1, 0}, /* 2 */ { 3, s_3_2, -1, -1, 0}, /* 3 */ { 3, s_3_3, -1, -1, 0} }; static const symbol s_4_0[1] = { 'a' }; static const symbol s_4_1[1] = { 'e' }; static const struct among a_4[2] = { /* 0 */ { 1, s_4_0, -1, -1, 0}, /* 1 */ { 1, s_4_1, -1, -1, 0} }; static const symbol s_5_0[2] = { 'n', 'a' }; static const symbol s_5_1[2] = { 'n', 'e' }; static const struct among a_5[2] = { /* 0 */ { 2, s_5_0, -1, -1, 0}, /* 1 */ { 2, s_5_1, -1, -1, 0} }; static const symbol s_6_0[2] = { 'd', 'a' }; static const symbol s_6_1[2] = { 't', 'a' }; static const symbol s_6_2[2] = { 'd', 'e' }; static const symbol s_6_3[2] = { 't', 'e' }; static const struct among a_6[4] = { /* 0 */ { 2, s_6_0, -1, -1, 0}, /* 1 */ { 2, s_6_1, -1, -1, 0}, /* 2 */ { 2, s_6_2, -1, -1, 0}, /* 3 */ { 2, s_6_3, -1, -1, 0} }; static const symbol s_7_0[3] = { 'n', 'd', 'a' }; static const symbol s_7_1[3] = { 'n', 'd', 'e' }; static const struct among a_7[2] = { /* 0 */ { 3, s_7_0, -1, -1, 0}, /* 1 */ { 3, s_7_1, -1, -1, 0} }; static const symbol s_8_0[3] = { 'd', 'a', 'n' }; static const symbol s_8_1[3] = { 't', 'a', 'n' }; static const symbol s_8_2[3] = { 'd', 'e', 'n' }; static const symbol s_8_3[3] = { 't', 'e', 'n' }; static const struct among a_8[4] = { /* 0 */ { 3, s_8_0, -1, -1, 0}, /* 1 */ { 3, s_8_1, -1, -1, 0}, /* 2 */ { 3, s_8_2, -1, -1, 0}, /* 3 */ { 3, s_8_3, -1, -1, 0} }; static const symbol s_9_0[4] = { 'n', 'd', 'a', 'n' }; static const symbol s_9_1[4] = { 'n', 'd', 'e', 'n' }; static const struct among a_9[2] = { /* 0 */ { 4, s_9_0, -1, -1, 0}, /* 1 */ { 4, s_9_1, -1, -1, 0} }; static const symbol s_10_0[2] = { 'l', 'a' }; static const symbol s_10_1[2] = { 'l', 'e' }; static const struct among a_10[2] = { /* 0 */ { 2, s_10_0, -1, -1, 0}, /* 1 */ { 2, s_10_1, -1, -1, 0} }; static const symbol s_11_0[2] = { 'c', 'a' }; static const symbol s_11_1[2] = { 'c', 'e' }; static const struct among a_11[2] = { /* 0 */ { 2, s_11_0, -1, -1, 0}, /* 1 */ { 2, s_11_1, -1, -1, 0} }; static const symbol s_12_0[2] = { 'i', 'm' }; static const symbol s_12_1[2] = { 'u', 'm' }; static const symbol s_12_2[3] = { 0xC4, 0xB1, 'm' }; static const symbol s_12_3[3] = { 0xC3, 0xBC, 'm' }; static const struct among a_12[4] = { /* 0 */ { 2, s_12_0, -1, -1, 0}, /* 1 */ { 2, s_12_1, -1, -1, 0}, /* 2 */ { 3, s_12_2, -1, -1, 0}, /* 3 */ { 3, s_12_3, -1, -1, 0} }; static const symbol s_13_0[3] = { 's', 'i', 'n' }; static const symbol s_13_1[3] = { 's', 'u', 'n' }; static const symbol s_13_2[4] = { 's', 0xC4, 0xB1, 'n' }; static const symbol s_13_3[4] = { 's', 0xC3, 0xBC, 'n' }; static const struct among a_13[4] = { /* 0 */ { 3, s_13_0, -1, -1, 0}, /* 1 */ { 3, s_13_1, -1, -1, 0}, /* 2 */ { 4, s_13_2, -1, -1, 0}, /* 3 */ { 4, s_13_3, -1, -1, 0} }; static const symbol s_14_0[2] = { 'i', 'z' }; static const symbol s_14_1[2] = { 'u', 'z' }; static const symbol s_14_2[3] = { 0xC4, 0xB1, 'z' }; static const symbol s_14_3[3] = { 0xC3, 0xBC, 'z' }; static const struct among a_14[4] = { /* 0 */ { 2, s_14_0, -1, -1, 0}, /* 1 */ { 2, s_14_1, -1, -1, 0}, /* 2 */ { 3, s_14_2, -1, -1, 0}, /* 3 */ { 3, s_14_3, -1, -1, 0} }; static const symbol s_15_0[5] = { 's', 'i', 'n', 'i', 'z' }; static const symbol s_15_1[5] = { 's', 'u', 'n', 'u', 'z' }; static const symbol s_15_2[7] = { 's', 0xC4, 0xB1, 'n', 0xC4, 0xB1, 'z' }; static const symbol s_15_3[7] = { 's', 0xC3, 0xBC, 'n', 0xC3, 0xBC, 'z' }; static const struct among a_15[4] = { /* 0 */ { 5, s_15_0, -1, -1, 0}, /* 1 */ { 5, s_15_1, -1, -1, 0}, /* 2 */ { 7, s_15_2, -1, -1, 0}, /* 3 */ { 7, s_15_3, -1, -1, 0} }; static const symbol s_16_0[3] = { 'l', 'a', 'r' }; static const symbol s_16_1[3] = { 'l', 'e', 'r' }; static const struct among a_16[2] = { /* 0 */ { 3, s_16_0, -1, -1, 0}, /* 1 */ { 3, s_16_1, -1, -1, 0} }; static const symbol s_17_0[3] = { 'n', 'i', 'z' }; static const symbol s_17_1[3] = { 'n', 'u', 'z' }; static const symbol s_17_2[4] = { 'n', 0xC4, 0xB1, 'z' }; static const symbol s_17_3[4] = { 'n', 0xC3, 0xBC, 'z' }; static const struct among a_17[4] = { /* 0 */ { 3, s_17_0, -1, -1, 0}, /* 1 */ { 3, s_17_1, -1, -1, 0}, /* 2 */ { 4, s_17_2, -1, -1, 0}, /* 3 */ { 4, s_17_3, -1, -1, 0} }; static const symbol s_18_0[3] = { 'd', 'i', 'r' }; static const symbol s_18_1[3] = { 't', 'i', 'r' }; static const symbol s_18_2[3] = { 'd', 'u', 'r' }; static const symbol s_18_3[3] = { 't', 'u', 'r' }; static const symbol s_18_4[4] = { 'd', 0xC4, 0xB1, 'r' }; static const symbol s_18_5[4] = { 't', 0xC4, 0xB1, 'r' }; static const symbol s_18_6[4] = { 'd', 0xC3, 0xBC, 'r' }; static const symbol s_18_7[4] = { 't', 0xC3, 0xBC, 'r' }; static const struct among a_18[8] = { /* 0 */ { 3, s_18_0, -1, -1, 0}, /* 1 */ { 3, s_18_1, -1, -1, 0}, /* 2 */ { 3, s_18_2, -1, -1, 0}, /* 3 */ { 3, s_18_3, -1, -1, 0}, /* 4 */ { 4, s_18_4, -1, -1, 0}, /* 5 */ { 4, s_18_5, -1, -1, 0}, /* 6 */ { 4, s_18_6, -1, -1, 0}, /* 7 */ { 4, s_18_7, -1, -1, 0} }; static const symbol s_19_0[7] = { 'c', 'a', 's', 0xC4, 0xB1, 'n', 'a' }; static const symbol s_19_1[6] = { 'c', 'e', 's', 'i', 'n', 'e' }; static const struct among a_19[2] = { /* 0 */ { 7, s_19_0, -1, -1, 0}, /* 1 */ { 6, s_19_1, -1, -1, 0} }; static const symbol s_20_0[2] = { 'd', 'i' }; static const symbol s_20_1[2] = { 't', 'i' }; static const symbol s_20_2[3] = { 'd', 'i', 'k' }; static const symbol s_20_3[3] = { 't', 'i', 'k' }; static const symbol s_20_4[3] = { 'd', 'u', 'k' }; static const symbol s_20_5[3] = { 't', 'u', 'k' }; static const symbol s_20_6[4] = { 'd', 0xC4, 0xB1, 'k' }; static const symbol s_20_7[4] = { 't', 0xC4, 0xB1, 'k' }; static const symbol s_20_8[4] = { 'd', 0xC3, 0xBC, 'k' }; static const symbol s_20_9[4] = { 't', 0xC3, 0xBC, 'k' }; static const symbol s_20_10[3] = { 'd', 'i', 'm' }; static const symbol s_20_11[3] = { 't', 'i', 'm' }; static const symbol s_20_12[3] = { 'd', 'u', 'm' }; static const symbol s_20_13[3] = { 't', 'u', 'm' }; static const symbol s_20_14[4] = { 'd', 0xC4, 0xB1, 'm' }; static const symbol s_20_15[4] = { 't', 0xC4, 0xB1, 'm' }; static const symbol s_20_16[4] = { 'd', 0xC3, 0xBC, 'm' }; static const symbol s_20_17[4] = { 't', 0xC3, 0xBC, 'm' }; static const symbol s_20_18[3] = { 'd', 'i', 'n' }; static const symbol s_20_19[3] = { 't', 'i', 'n' }; static const symbol s_20_20[3] = { 'd', 'u', 'n' }; static const symbol s_20_21[3] = { 't', 'u', 'n' }; static const symbol s_20_22[4] = { 'd', 0xC4, 0xB1, 'n' }; static const symbol s_20_23[4] = { 't', 0xC4, 0xB1, 'n' }; static const symbol s_20_24[4] = { 'd', 0xC3, 0xBC, 'n' }; static const symbol s_20_25[4] = { 't', 0xC3, 0xBC, 'n' }; static const symbol s_20_26[2] = { 'd', 'u' }; static const symbol s_20_27[2] = { 't', 'u' }; static const symbol s_20_28[3] = { 'd', 0xC4, 0xB1 }; static const symbol s_20_29[3] = { 't', 0xC4, 0xB1 }; static const symbol s_20_30[3] = { 'd', 0xC3, 0xBC }; static const symbol s_20_31[3] = { 't', 0xC3, 0xBC }; static const struct among a_20[32] = { /* 0 */ { 2, s_20_0, -1, -1, 0}, /* 1 */ { 2, s_20_1, -1, -1, 0}, /* 2 */ { 3, s_20_2, -1, -1, 0}, /* 3 */ { 3, s_20_3, -1, -1, 0}, /* 4 */ { 3, s_20_4, -1, -1, 0}, /* 5 */ { 3, s_20_5, -1, -1, 0}, /* 6 */ { 4, s_20_6, -1, -1, 0}, /* 7 */ { 4, s_20_7, -1, -1, 0}, /* 8 */ { 4, s_20_8, -1, -1, 0}, /* 9 */ { 4, s_20_9, -1, -1, 0}, /* 10 */ { 3, s_20_10, -1, -1, 0}, /* 11 */ { 3, s_20_11, -1, -1, 0}, /* 12 */ { 3, s_20_12, -1, -1, 0}, /* 13 */ { 3, s_20_13, -1, -1, 0}, /* 14 */ { 4, s_20_14, -1, -1, 0}, /* 15 */ { 4, s_20_15, -1, -1, 0}, /* 16 */ { 4, s_20_16, -1, -1, 0}, /* 17 */ { 4, s_20_17, -1, -1, 0}, /* 18 */ { 3, s_20_18, -1, -1, 0}, /* 19 */ { 3, s_20_19, -1, -1, 0}, /* 20 */ { 3, s_20_20, -1, -1, 0}, /* 21 */ { 3, s_20_21, -1, -1, 0}, /* 22 */ { 4, s_20_22, -1, -1, 0}, /* 23 */ { 4, s_20_23, -1, -1, 0}, /* 24 */ { 4, s_20_24, -1, -1, 0}, /* 25 */ { 4, s_20_25, -1, -1, 0}, /* 26 */ { 2, s_20_26, -1, -1, 0}, /* 27 */ { 2, s_20_27, -1, -1, 0}, /* 28 */ { 3, s_20_28, -1, -1, 0}, /* 29 */ { 3, s_20_29, -1, -1, 0}, /* 30 */ { 3, s_20_30, -1, -1, 0}, /* 31 */ { 3, s_20_31, -1, -1, 0} }; static const symbol s_21_0[2] = { 's', 'a' }; static const symbol s_21_1[2] = { 's', 'e' }; static const symbol s_21_2[3] = { 's', 'a', 'k' }; static const symbol s_21_3[3] = { 's', 'e', 'k' }; static const symbol s_21_4[3] = { 's', 'a', 'm' }; static const symbol s_21_5[3] = { 's', 'e', 'm' }; static const symbol s_21_6[3] = { 's', 'a', 'n' }; static const symbol s_21_7[3] = { 's', 'e', 'n' }; static const struct among a_21[8] = { /* 0 */ { 2, s_21_0, -1, -1, 0}, /* 1 */ { 2, s_21_1, -1, -1, 0}, /* 2 */ { 3, s_21_2, -1, -1, 0}, /* 3 */ { 3, s_21_3, -1, -1, 0}, /* 4 */ { 3, s_21_4, -1, -1, 0}, /* 5 */ { 3, s_21_5, -1, -1, 0}, /* 6 */ { 3, s_21_6, -1, -1, 0}, /* 7 */ { 3, s_21_7, -1, -1, 0} }; static const symbol s_22_0[4] = { 'm', 'i', 0xC5, 0x9F }; static const symbol s_22_1[4] = { 'm', 'u', 0xC5, 0x9F }; static const symbol s_22_2[5] = { 'm', 0xC4, 0xB1, 0xC5, 0x9F }; static const symbol s_22_3[5] = { 'm', 0xC3, 0xBC, 0xC5, 0x9F }; static const struct among a_22[4] = { /* 0 */ { 4, s_22_0, -1, -1, 0}, /* 1 */ { 4, s_22_1, -1, -1, 0}, /* 2 */ { 5, s_22_2, -1, -1, 0}, /* 3 */ { 5, s_22_3, -1, -1, 0} }; static const symbol s_23_0[1] = { 'b' }; static const symbol s_23_1[1] = { 'c' }; static const symbol s_23_2[1] = { 'd' }; static const symbol s_23_3[2] = { 0xC4, 0x9F }; static const struct among a_23[4] = { /* 0 */ { 1, s_23_0, -1, 1, 0}, /* 1 */ { 1, s_23_1, -1, 2, 0}, /* 2 */ { 1, s_23_2, -1, 3, 0}, /* 3 */ { 2, s_23_3, -1, 4, 0} }; static const unsigned char g_vowel[] = { 17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 8, 0, 0, 0, 0, 0, 0, 1 }; static const unsigned char g_U[] = { 1, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 1 }; static const unsigned char g_vowel1[] = { 1, 64, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; static const unsigned char g_vowel2[] = { 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130 }; static const unsigned char g_vowel3[] = { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; static const unsigned char g_vowel4[] = { 17 }; static const unsigned char g_vowel5[] = { 65 }; static const unsigned char g_vowel6[] = { 65 }; static const symbol s_0[] = { 'a' }; static const symbol s_1[] = { 'e' }; static const symbol s_2[] = { 0xC4, 0xB1 }; static const symbol s_3[] = { 'i' }; static const symbol s_4[] = { 'o' }; static const symbol s_5[] = { 0xC3, 0xB6 }; static const symbol s_6[] = { 'u' }; static const symbol s_7[] = { 0xC3, 0xBC }; static const symbol s_8[] = { 'n' }; static const symbol s_9[] = { 'n' }; static const symbol s_10[] = { 's' }; static const symbol s_11[] = { 's' }; static const symbol s_12[] = { 'y' }; static const symbol s_13[] = { 'y' }; static const symbol s_14[] = { 'k', 'i' }; static const symbol s_15[] = { 'k', 'e', 'n' }; static const symbol s_16[] = { 'p' }; static const symbol s_17[] = { 0xC3, 0xA7 }; static const symbol s_18[] = { 't' }; static const symbol s_19[] = { 'k' }; static const symbol s_20[] = { 'd' }; static const symbol s_21[] = { 'g' }; static const symbol s_22[] = { 'a' }; static const symbol s_23[] = { 0xC4, 0xB1 }; static const symbol s_24[] = { 0xC4, 0xB1 }; static const symbol s_25[] = { 'e' }; static const symbol s_26[] = { 'i' }; static const symbol s_27[] = { 'i' }; static const symbol s_28[] = { 'o' }; static const symbol s_29[] = { 'u' }; static const symbol s_30[] = { 'u' }; static const symbol s_31[] = { 0xC3, 0xB6 }; static const symbol s_32[] = { 0xC3, 0xBC }; static const symbol s_33[] = { 0xC3, 0xBC }; static const symbol s_34[] = { 'a', 'd' }; static const symbol s_35[] = { 's', 'o', 'y', 'a', 'd' }; static int r_check_vowel_harmony(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 112 */ if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) return 0; /* goto */ /* grouping vowel, line 114 */ { int m1 = z->l - z->c; (void)m1; /* or, line 116 */ if (!(eq_s_b(z, 1, s_0))) goto lab1; if (out_grouping_b_U(z, g_vowel1, 97, 305, 1) < 0) goto lab1; /* goto */ /* grouping vowel1, line 116 */ goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_1))) goto lab2; if (out_grouping_b_U(z, g_vowel2, 101, 252, 1) < 0) goto lab2; /* goto */ /* grouping vowel2, line 117 */ goto lab0; lab2: z->c = z->l - m1; if (!(eq_s_b(z, 2, s_2))) goto lab3; if (out_grouping_b_U(z, g_vowel3, 97, 305, 1) < 0) goto lab3; /* goto */ /* grouping vowel3, line 118 */ goto lab0; lab3: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_3))) goto lab4; if (out_grouping_b_U(z, g_vowel4, 101, 105, 1) < 0) goto lab4; /* goto */ /* grouping vowel4, line 119 */ goto lab0; lab4: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_4))) goto lab5; if (out_grouping_b_U(z, g_vowel5, 111, 117, 1) < 0) goto lab5; /* goto */ /* grouping vowel5, line 120 */ goto lab0; lab5: z->c = z->l - m1; if (!(eq_s_b(z, 2, s_5))) goto lab6; if (out_grouping_b_U(z, g_vowel6, 246, 252, 1) < 0) goto lab6; /* goto */ /* grouping vowel6, line 121 */ goto lab0; lab6: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_6))) goto lab7; if (out_grouping_b_U(z, g_vowel5, 111, 117, 1) < 0) goto lab7; /* goto */ /* grouping vowel5, line 122 */ goto lab0; lab7: z->c = z->l - m1; if (!(eq_s_b(z, 2, s_7))) return 0; if (out_grouping_b_U(z, g_vowel6, 246, 252, 1) < 0) return 0; /* goto */ /* grouping vowel6, line 123 */ } lab0: z->c = z->l - m_test; } return 1; } static int r_mark_suffix_with_optional_n_consonant(struct SN_env * z) { { int m1 = z->l - z->c; (void)m1; /* or, line 134 */ { int m_test = z->l - z->c; /* test, line 133 */ if (!(eq_s_b(z, 1, s_8))) goto lab1; z->c = z->l - m_test; } { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) goto lab1; z->c = ret; /* next, line 133 */ } { int m_test = z->l - z->c; /* test, line 133 */ if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) goto lab1; z->c = z->l - m_test; } goto lab0; lab1: z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* not, line 135 */ { int m_test = z->l - z->c; /* test, line 135 */ if (!(eq_s_b(z, 1, s_9))) goto lab2; z->c = z->l - m_test; } return 0; lab2: z->c = z->l - m2; } { int m_test = z->l - z->c; /* test, line 135 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 135 */ } { int m_test = z->l - z->c; /* test, line 135 */ if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) return 0; z->c = z->l - m_test; } z->c = z->l - m_test; } } lab0: return 1; } static int r_mark_suffix_with_optional_s_consonant(struct SN_env * z) { { int m1 = z->l - z->c; (void)m1; /* or, line 145 */ { int m_test = z->l - z->c; /* test, line 144 */ if (!(eq_s_b(z, 1, s_10))) goto lab1; z->c = z->l - m_test; } { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) goto lab1; z->c = ret; /* next, line 144 */ } { int m_test = z->l - z->c; /* test, line 144 */ if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) goto lab1; z->c = z->l - m_test; } goto lab0; lab1: z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* not, line 146 */ { int m_test = z->l - z->c; /* test, line 146 */ if (!(eq_s_b(z, 1, s_11))) goto lab2; z->c = z->l - m_test; } return 0; lab2: z->c = z->l - m2; } { int m_test = z->l - z->c; /* test, line 146 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 146 */ } { int m_test = z->l - z->c; /* test, line 146 */ if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) return 0; z->c = z->l - m_test; } z->c = z->l - m_test; } } lab0: return 1; } static int r_mark_suffix_with_optional_y_consonant(struct SN_env * z) { { int m1 = z->l - z->c; (void)m1; /* or, line 155 */ { int m_test = z->l - z->c; /* test, line 154 */ if (!(eq_s_b(z, 1, s_12))) goto lab1; z->c = z->l - m_test; } { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) goto lab1; z->c = ret; /* next, line 154 */ } { int m_test = z->l - z->c; /* test, line 154 */ if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) goto lab1; z->c = z->l - m_test; } goto lab0; lab1: z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* not, line 156 */ { int m_test = z->l - z->c; /* test, line 156 */ if (!(eq_s_b(z, 1, s_13))) goto lab2; z->c = z->l - m_test; } return 0; lab2: z->c = z->l - m2; } { int m_test = z->l - z->c; /* test, line 156 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 156 */ } { int m_test = z->l - z->c; /* test, line 156 */ if (in_grouping_b_U(z, g_vowel, 97, 305, 0)) return 0; z->c = z->l - m_test; } z->c = z->l - m_test; } } lab0: return 1; } static int r_mark_suffix_with_optional_U_vowel(struct SN_env * z) { { int m1 = z->l - z->c; (void)m1; /* or, line 161 */ { int m_test = z->l - z->c; /* test, line 160 */ if (in_grouping_b_U(z, g_U, 105, 305, 0)) goto lab1; z->c = z->l - m_test; } { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) goto lab1; z->c = ret; /* next, line 160 */ } { int m_test = z->l - z->c; /* test, line 160 */ if (out_grouping_b_U(z, g_vowel, 97, 305, 0)) goto lab1; z->c = z->l - m_test; } goto lab0; lab1: z->c = z->l - m1; { int m2 = z->l - z->c; (void)m2; /* not, line 162 */ { int m_test = z->l - z->c; /* test, line 162 */ if (in_grouping_b_U(z, g_U, 105, 305, 0)) goto lab2; z->c = z->l - m_test; } return 0; lab2: z->c = z->l - m2; } { int m_test = z->l - z->c; /* test, line 162 */ { int ret = skip_utf8(z->p, z->c, z->lb, 0, -1); if (ret < 0) return 0; z->c = ret; /* next, line 162 */ } { int m_test = z->l - z->c; /* test, line 162 */ if (out_grouping_b_U(z, g_vowel, 97, 305, 0)) return 0; z->c = z->l - m_test; } z->c = z->l - m_test; } } lab0: return 1; } static int r_mark_possessives(struct SN_env * z) { if (z->c <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((67133440 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_0, 10))) return 0; /* among, line 167 */ { int ret = r_mark_suffix_with_optional_U_vowel(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_U_vowel, line 169 */ if (ret < 0) return ret; } return 1; } static int r_mark_sU(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 173 */ if (ret < 0) return ret; } if (in_grouping_b_U(z, g_U, 105, 305, 0)) return 0; { int ret = r_mark_suffix_with_optional_s_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_s_consonant, line 175 */ if (ret < 0) return ret; } return 1; } static int r_mark_lArI(struct SN_env * z) { if (z->c - 3 <= z->lb || (z->p[z->c - 1] != 105 && z->p[z->c - 1] != 177)) return 0; if (!(find_among_b(z, a_1, 2))) return 0; /* among, line 179 */ return 1; } static int r_mark_yU(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 183 */ if (ret < 0) return ret; } if (in_grouping_b_U(z, g_U, 105, 305, 0)) return 0; { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 185 */ if (ret < 0) return ret; } return 1; } static int r_mark_nU(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 189 */ if (ret < 0) return ret; } if (!(find_among_b(z, a_2, 4))) return 0; /* among, line 190 */ return 1; } static int r_mark_nUn(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 194 */ if (ret < 0) return ret; } if (z->c - 1 <= z->lb || z->p[z->c - 1] != 110) return 0; if (!(find_among_b(z, a_3, 4))) return 0; /* among, line 195 */ { int ret = r_mark_suffix_with_optional_n_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_n_consonant, line 196 */ if (ret < 0) return ret; } return 1; } static int r_mark_yA(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 200 */ if (ret < 0) return ret; } if (z->c <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; if (!(find_among_b(z, a_4, 2))) return 0; /* among, line 201 */ { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 202 */ if (ret < 0) return ret; } return 1; } static int r_mark_nA(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 206 */ if (ret < 0) return ret; } if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; if (!(find_among_b(z, a_5, 2))) return 0; /* among, line 207 */ return 1; } static int r_mark_DA(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 211 */ if (ret < 0) return ret; } if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; if (!(find_among_b(z, a_6, 4))) return 0; /* among, line 212 */ return 1; } static int r_mark_ndA(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 216 */ if (ret < 0) return ret; } if (z->c - 2 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; if (!(find_among_b(z, a_7, 2))) return 0; /* among, line 217 */ return 1; } static int r_mark_DAn(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 221 */ if (ret < 0) return ret; } if (z->c - 2 <= z->lb || z->p[z->c - 1] != 110) return 0; if (!(find_among_b(z, a_8, 4))) return 0; /* among, line 222 */ return 1; } static int r_mark_ndAn(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 226 */ if (ret < 0) return ret; } if (z->c - 3 <= z->lb || z->p[z->c - 1] != 110) return 0; if (!(find_among_b(z, a_9, 2))) return 0; /* among, line 227 */ return 1; } static int r_mark_ylA(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 231 */ if (ret < 0) return ret; } if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; if (!(find_among_b(z, a_10, 2))) return 0; /* among, line 232 */ { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 233 */ if (ret < 0) return ret; } return 1; } static int r_mark_ki(struct SN_env * z) { if (!(eq_s_b(z, 2, s_14))) return 0; return 1; } static int r_mark_ncA(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 241 */ if (ret < 0) return ret; } if (z->c - 1 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; if (!(find_among_b(z, a_11, 2))) return 0; /* among, line 242 */ { int ret = r_mark_suffix_with_optional_n_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_n_consonant, line 243 */ if (ret < 0) return ret; } return 1; } static int r_mark_yUm(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 247 */ if (ret < 0) return ret; } if (z->c - 1 <= z->lb || z->p[z->c - 1] != 109) return 0; if (!(find_among_b(z, a_12, 4))) return 0; /* among, line 248 */ { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 249 */ if (ret < 0) return ret; } return 1; } static int r_mark_sUn(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 253 */ if (ret < 0) return ret; } if (z->c - 2 <= z->lb || z->p[z->c - 1] != 110) return 0; if (!(find_among_b(z, a_13, 4))) return 0; /* among, line 254 */ return 1; } static int r_mark_yUz(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 258 */ if (ret < 0) return ret; } if (z->c - 1 <= z->lb || z->p[z->c - 1] != 122) return 0; if (!(find_among_b(z, a_14, 4))) return 0; /* among, line 259 */ { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 260 */ if (ret < 0) return ret; } return 1; } static int r_mark_sUnUz(struct SN_env * z) { if (z->c - 4 <= z->lb || z->p[z->c - 1] != 122) return 0; if (!(find_among_b(z, a_15, 4))) return 0; /* among, line 264 */ return 1; } static int r_mark_lAr(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 268 */ if (ret < 0) return ret; } if (z->c - 2 <= z->lb || z->p[z->c - 1] != 114) return 0; if (!(find_among_b(z, a_16, 2))) return 0; /* among, line 269 */ return 1; } static int r_mark_nUz(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 273 */ if (ret < 0) return ret; } if (z->c - 2 <= z->lb || z->p[z->c - 1] != 122) return 0; if (!(find_among_b(z, a_17, 4))) return 0; /* among, line 274 */ return 1; } static int r_mark_DUr(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 278 */ if (ret < 0) return ret; } if (z->c - 2 <= z->lb || z->p[z->c - 1] != 114) return 0; if (!(find_among_b(z, a_18, 8))) return 0; /* among, line 279 */ return 1; } static int r_mark_cAsInA(struct SN_env * z) { if (z->c - 5 <= z->lb || (z->p[z->c - 1] != 97 && z->p[z->c - 1] != 101)) return 0; if (!(find_among_b(z, a_19, 2))) return 0; /* among, line 283 */ return 1; } static int r_mark_yDU(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 287 */ if (ret < 0) return ret; } if (!(find_among_b(z, a_20, 32))) return 0; /* among, line 288 */ { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 292 */ if (ret < 0) return ret; } return 1; } static int r_mark_ysA(struct SN_env * z) { if (z->c - 1 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((26658 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0; if (!(find_among_b(z, a_21, 8))) return 0; /* among, line 297 */ { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 298 */ if (ret < 0) return ret; } return 1; } static int r_mark_ymUs_(struct SN_env * z) { { int ret = r_check_vowel_harmony(z); if (ret == 0) return 0; /* call check_vowel_harmony, line 302 */ if (ret < 0) return ret; } if (z->c - 3 <= z->lb || z->p[z->c - 1] != 159) return 0; if (!(find_among_b(z, a_22, 4))) return 0; /* among, line 303 */ { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 304 */ if (ret < 0) return ret; } return 1; } static int r_mark_yken(struct SN_env * z) { if (!(eq_s_b(z, 3, s_15))) return 0; { int ret = r_mark_suffix_with_optional_y_consonant(z); if (ret == 0) return 0; /* call mark_suffix_with_optional_y_consonant, line 308 */ if (ret < 0) return ret; } return 1; } static int r_stem_nominal_verb_suffixes(struct SN_env * z) { z->ket = z->c; /* [, line 312 */ z->B[0] = 1; /* set continue_stemming_noun_suffixes, line 313 */ { int m1 = z->l - z->c; (void)m1; /* or, line 315 */ { int m2 = z->l - z->c; (void)m2; /* or, line 314 */ { int ret = r_mark_ymUs_(z); if (ret == 0) goto lab3; /* call mark_ymUs_, line 314 */ if (ret < 0) return ret; } goto lab2; lab3: z->c = z->l - m2; { int ret = r_mark_yDU(z); if (ret == 0) goto lab4; /* call mark_yDU, line 314 */ if (ret < 0) return ret; } goto lab2; lab4: z->c = z->l - m2; { int ret = r_mark_ysA(z); if (ret == 0) goto lab5; /* call mark_ysA, line 314 */ if (ret < 0) return ret; } goto lab2; lab5: z->c = z->l - m2; { int ret = r_mark_yken(z); if (ret == 0) goto lab1; /* call mark_yken, line 314 */ if (ret < 0) return ret; } } lab2: goto lab0; lab1: z->c = z->l - m1; { int ret = r_mark_cAsInA(z); if (ret == 0) goto lab6; /* call mark_cAsInA, line 316 */ if (ret < 0) return ret; } { int m3 = z->l - z->c; (void)m3; /* or, line 316 */ { int ret = r_mark_sUnUz(z); if (ret == 0) goto lab8; /* call mark_sUnUz, line 316 */ if (ret < 0) return ret; } goto lab7; lab8: z->c = z->l - m3; { int ret = r_mark_lAr(z); if (ret == 0) goto lab9; /* call mark_lAr, line 316 */ if (ret < 0) return ret; } goto lab7; lab9: z->c = z->l - m3; { int ret = r_mark_yUm(z); if (ret == 0) goto lab10; /* call mark_yUm, line 316 */ if (ret < 0) return ret; } goto lab7; lab10: z->c = z->l - m3; { int ret = r_mark_sUn(z); if (ret == 0) goto lab11; /* call mark_sUn, line 316 */ if (ret < 0) return ret; } goto lab7; lab11: z->c = z->l - m3; { int ret = r_mark_yUz(z); if (ret == 0) goto lab12; /* call mark_yUz, line 316 */ if (ret < 0) return ret; } goto lab7; lab12: z->c = z->l - m3; } lab7: { int ret = r_mark_ymUs_(z); if (ret == 0) goto lab6; /* call mark_ymUs_, line 316 */ if (ret < 0) return ret; } goto lab0; lab6: z->c = z->l - m1; { int ret = r_mark_lAr(z); if (ret == 0) goto lab13; /* call mark_lAr, line 319 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 319 */ { int ret = slice_del(z); /* delete, line 319 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 319 */ z->ket = z->c; /* [, line 319 */ { int m4 = z->l - z->c; (void)m4; /* or, line 319 */ { int ret = r_mark_DUr(z); if (ret == 0) goto lab16; /* call mark_DUr, line 319 */ if (ret < 0) return ret; } goto lab15; lab16: z->c = z->l - m4; { int ret = r_mark_yDU(z); if (ret == 0) goto lab17; /* call mark_yDU, line 319 */ if (ret < 0) return ret; } goto lab15; lab17: z->c = z->l - m4; { int ret = r_mark_ysA(z); if (ret == 0) goto lab18; /* call mark_ysA, line 319 */ if (ret < 0) return ret; } goto lab15; lab18: z->c = z->l - m4; { int ret = r_mark_ymUs_(z); if (ret == 0) { z->c = z->l - m_keep; goto lab14; } /* call mark_ymUs_, line 319 */ if (ret < 0) return ret; } } lab15: lab14: ; } z->B[0] = 0; /* unset continue_stemming_noun_suffixes, line 320 */ goto lab0; lab13: z->c = z->l - m1; { int ret = r_mark_nUz(z); if (ret == 0) goto lab19; /* call mark_nUz, line 323 */ if (ret < 0) return ret; } { int m5 = z->l - z->c; (void)m5; /* or, line 323 */ { int ret = r_mark_yDU(z); if (ret == 0) goto lab21; /* call mark_yDU, line 323 */ if (ret < 0) return ret; } goto lab20; lab21: z->c = z->l - m5; { int ret = r_mark_ysA(z); if (ret == 0) goto lab19; /* call mark_ysA, line 323 */ if (ret < 0) return ret; } } lab20: goto lab0; lab19: z->c = z->l - m1; { int m6 = z->l - z->c; (void)m6; /* or, line 325 */ { int ret = r_mark_sUnUz(z); if (ret == 0) goto lab24; /* call mark_sUnUz, line 325 */ if (ret < 0) return ret; } goto lab23; lab24: z->c = z->l - m6; { int ret = r_mark_yUz(z); if (ret == 0) goto lab25; /* call mark_yUz, line 325 */ if (ret < 0) return ret; } goto lab23; lab25: z->c = z->l - m6; { int ret = r_mark_sUn(z); if (ret == 0) goto lab26; /* call mark_sUn, line 325 */ if (ret < 0) return ret; } goto lab23; lab26: z->c = z->l - m6; { int ret = r_mark_yUm(z); if (ret == 0) goto lab22; /* call mark_yUm, line 325 */ if (ret < 0) return ret; } } lab23: z->bra = z->c; /* ], line 325 */ { int ret = slice_del(z); /* delete, line 325 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 325 */ z->ket = z->c; /* [, line 325 */ { int ret = r_mark_ymUs_(z); if (ret == 0) { z->c = z->l - m_keep; goto lab27; } /* call mark_ymUs_, line 325 */ if (ret < 0) return ret; } lab27: ; } goto lab0; lab22: z->c = z->l - m1; { int ret = r_mark_DUr(z); if (ret == 0) return 0; /* call mark_DUr, line 327 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 327 */ { int ret = slice_del(z); /* delete, line 327 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 327 */ z->ket = z->c; /* [, line 327 */ { int m7 = z->l - z->c; (void)m7; /* or, line 327 */ { int ret = r_mark_sUnUz(z); if (ret == 0) goto lab30; /* call mark_sUnUz, line 327 */ if (ret < 0) return ret; } goto lab29; lab30: z->c = z->l - m7; { int ret = r_mark_lAr(z); if (ret == 0) goto lab31; /* call mark_lAr, line 327 */ if (ret < 0) return ret; } goto lab29; lab31: z->c = z->l - m7; { int ret = r_mark_yUm(z); if (ret == 0) goto lab32; /* call mark_yUm, line 327 */ if (ret < 0) return ret; } goto lab29; lab32: z->c = z->l - m7; { int ret = r_mark_sUn(z); if (ret == 0) goto lab33; /* call mark_sUn, line 327 */ if (ret < 0) return ret; } goto lab29; lab33: z->c = z->l - m7; { int ret = r_mark_yUz(z); if (ret == 0) goto lab34; /* call mark_yUz, line 327 */ if (ret < 0) return ret; } goto lab29; lab34: z->c = z->l - m7; } lab29: { int ret = r_mark_ymUs_(z); if (ret == 0) { z->c = z->l - m_keep; goto lab28; } /* call mark_ymUs_, line 327 */ if (ret < 0) return ret; } lab28: ; } } lab0: z->bra = z->c; /* ], line 328 */ { int ret = slice_del(z); /* delete, line 328 */ if (ret < 0) return ret; } return 1; } static int r_stem_suffix_chain_before_ki(struct SN_env * z) { z->ket = z->c; /* [, line 333 */ { int ret = r_mark_ki(z); if (ret == 0) return 0; /* call mark_ki, line 334 */ if (ret < 0) return ret; } { int m1 = z->l - z->c; (void)m1; /* or, line 342 */ { int ret = r_mark_DA(z); if (ret == 0) goto lab1; /* call mark_DA, line 336 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 336 */ { int ret = slice_del(z); /* delete, line 336 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 336 */ z->ket = z->c; /* [, line 336 */ { int m2 = z->l - z->c; (void)m2; /* or, line 338 */ { int ret = r_mark_lAr(z); if (ret == 0) goto lab4; /* call mark_lAr, line 337 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 337 */ { int ret = slice_del(z); /* delete, line 337 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 337 */ { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab5; } /* call stem_suffix_chain_before_ki, line 337 */ if (ret < 0) return ret; } lab5: ; } goto lab3; lab4: z->c = z->l - m2; { int ret = r_mark_possessives(z); if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call mark_possessives, line 339 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 339 */ { int ret = slice_del(z); /* delete, line 339 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 339 */ z->ket = z->c; /* [, line 339 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call mark_lAr, line 339 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 339 */ { int ret = slice_del(z); /* delete, line 339 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab6; } /* call stem_suffix_chain_before_ki, line 339 */ if (ret < 0) return ret; } lab6: ; } } lab3: lab2: ; } goto lab0; lab1: z->c = z->l - m1; { int ret = r_mark_nUn(z); if (ret == 0) goto lab7; /* call mark_nUn, line 343 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 343 */ { int ret = slice_del(z); /* delete, line 343 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 343 */ z->ket = z->c; /* [, line 343 */ { int m3 = z->l - z->c; (void)m3; /* or, line 345 */ { int ret = r_mark_lArI(z); if (ret == 0) goto lab10; /* call mark_lArI, line 344 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 344 */ { int ret = slice_del(z); /* delete, line 344 */ if (ret < 0) return ret; } goto lab9; lab10: z->c = z->l - m3; z->ket = z->c; /* [, line 346 */ { int m4 = z->l - z->c; (void)m4; /* or, line 346 */ { int ret = r_mark_possessives(z); if (ret == 0) goto lab13; /* call mark_possessives, line 346 */ if (ret < 0) return ret; } goto lab12; lab13: z->c = z->l - m4; { int ret = r_mark_sU(z); if (ret == 0) goto lab11; /* call mark_sU, line 346 */ if (ret < 0) return ret; } } lab12: z->bra = z->c; /* ], line 346 */ { int ret = slice_del(z); /* delete, line 346 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 346 */ z->ket = z->c; /* [, line 346 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab14; } /* call mark_lAr, line 346 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 346 */ { int ret = slice_del(z); /* delete, line 346 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab14; } /* call stem_suffix_chain_before_ki, line 346 */ if (ret < 0) return ret; } lab14: ; } goto lab9; lab11: z->c = z->l - m3; { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab8; } /* call stem_suffix_chain_before_ki, line 348 */ if (ret < 0) return ret; } } lab9: lab8: ; } goto lab0; lab7: z->c = z->l - m1; { int ret = r_mark_ndA(z); if (ret == 0) return 0; /* call mark_ndA, line 351 */ if (ret < 0) return ret; } { int m5 = z->l - z->c; (void)m5; /* or, line 353 */ { int ret = r_mark_lArI(z); if (ret == 0) goto lab16; /* call mark_lArI, line 352 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 352 */ { int ret = slice_del(z); /* delete, line 352 */ if (ret < 0) return ret; } goto lab15; lab16: z->c = z->l - m5; { int ret = r_mark_sU(z); if (ret == 0) goto lab17; /* call mark_sU, line 354 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 354 */ { int ret = slice_del(z); /* delete, line 354 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 354 */ z->ket = z->c; /* [, line 354 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab18; } /* call mark_lAr, line 354 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 354 */ { int ret = slice_del(z); /* delete, line 354 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab18; } /* call stem_suffix_chain_before_ki, line 354 */ if (ret < 0) return ret; } lab18: ; } goto lab15; lab17: z->c = z->l - m5; { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) return 0; /* call stem_suffix_chain_before_ki, line 356 */ if (ret < 0) return ret; } } lab15: ; } lab0: return 1; } static int r_stem_noun_suffixes(struct SN_env * z) { { int m1 = z->l - z->c; (void)m1; /* or, line 363 */ z->ket = z->c; /* [, line 362 */ { int ret = r_mark_lAr(z); if (ret == 0) goto lab1; /* call mark_lAr, line 362 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 362 */ { int ret = slice_del(z); /* delete, line 362 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 362 */ { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab2; } /* call stem_suffix_chain_before_ki, line 362 */ if (ret < 0) return ret; } lab2: ; } goto lab0; lab1: z->c = z->l - m1; z->ket = z->c; /* [, line 364 */ { int ret = r_mark_ncA(z); if (ret == 0) goto lab3; /* call mark_ncA, line 364 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 364 */ { int ret = slice_del(z); /* delete, line 364 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 365 */ { int m2 = z->l - z->c; (void)m2; /* or, line 367 */ z->ket = z->c; /* [, line 366 */ { int ret = r_mark_lArI(z); if (ret == 0) goto lab6; /* call mark_lArI, line 366 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 366 */ { int ret = slice_del(z); /* delete, line 366 */ if (ret < 0) return ret; } goto lab5; lab6: z->c = z->l - m2; z->ket = z->c; /* [, line 368 */ { int m3 = z->l - z->c; (void)m3; /* or, line 368 */ { int ret = r_mark_possessives(z); if (ret == 0) goto lab9; /* call mark_possessives, line 368 */ if (ret < 0) return ret; } goto lab8; lab9: z->c = z->l - m3; { int ret = r_mark_sU(z); if (ret == 0) goto lab7; /* call mark_sU, line 368 */ if (ret < 0) return ret; } } lab8: z->bra = z->c; /* ], line 368 */ { int ret = slice_del(z); /* delete, line 368 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 368 */ z->ket = z->c; /* [, line 368 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab10; } /* call mark_lAr, line 368 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 368 */ { int ret = slice_del(z); /* delete, line 368 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab10; } /* call stem_suffix_chain_before_ki, line 368 */ if (ret < 0) return ret; } lab10: ; } goto lab5; lab7: z->c = z->l - m2; z->ket = z->c; /* [, line 370 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call mark_lAr, line 370 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 370 */ { int ret = slice_del(z); /* delete, line 370 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab4; } /* call stem_suffix_chain_before_ki, line 370 */ if (ret < 0) return ret; } } lab5: lab4: ; } goto lab0; lab3: z->c = z->l - m1; z->ket = z->c; /* [, line 374 */ { int m4 = z->l - z->c; (void)m4; /* or, line 374 */ { int ret = r_mark_ndA(z); if (ret == 0) goto lab13; /* call mark_ndA, line 374 */ if (ret < 0) return ret; } goto lab12; lab13: z->c = z->l - m4; { int ret = r_mark_nA(z); if (ret == 0) goto lab11; /* call mark_nA, line 374 */ if (ret < 0) return ret; } } lab12: { int m5 = z->l - z->c; (void)m5; /* or, line 377 */ { int ret = r_mark_lArI(z); if (ret == 0) goto lab15; /* call mark_lArI, line 376 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 376 */ { int ret = slice_del(z); /* delete, line 376 */ if (ret < 0) return ret; } goto lab14; lab15: z->c = z->l - m5; { int ret = r_mark_sU(z); if (ret == 0) goto lab16; /* call mark_sU, line 378 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 378 */ { int ret = slice_del(z); /* delete, line 378 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 378 */ z->ket = z->c; /* [, line 378 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab17; } /* call mark_lAr, line 378 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 378 */ { int ret = slice_del(z); /* delete, line 378 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab17; } /* call stem_suffix_chain_before_ki, line 378 */ if (ret < 0) return ret; } lab17: ; } goto lab14; lab16: z->c = z->l - m5; { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) goto lab11; /* call stem_suffix_chain_before_ki, line 380 */ if (ret < 0) return ret; } } lab14: goto lab0; lab11: z->c = z->l - m1; z->ket = z->c; /* [, line 384 */ { int m6 = z->l - z->c; (void)m6; /* or, line 384 */ { int ret = r_mark_ndAn(z); if (ret == 0) goto lab20; /* call mark_ndAn, line 384 */ if (ret < 0) return ret; } goto lab19; lab20: z->c = z->l - m6; { int ret = r_mark_nU(z); if (ret == 0) goto lab18; /* call mark_nU, line 384 */ if (ret < 0) return ret; } } lab19: { int m7 = z->l - z->c; (void)m7; /* or, line 384 */ { int ret = r_mark_sU(z); if (ret == 0) goto lab22; /* call mark_sU, line 384 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 384 */ { int ret = slice_del(z); /* delete, line 384 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 384 */ z->ket = z->c; /* [, line 384 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab23; } /* call mark_lAr, line 384 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 384 */ { int ret = slice_del(z); /* delete, line 384 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab23; } /* call stem_suffix_chain_before_ki, line 384 */ if (ret < 0) return ret; } lab23: ; } goto lab21; lab22: z->c = z->l - m7; { int ret = r_mark_lArI(z); if (ret == 0) goto lab18; /* call mark_lArI, line 384 */ if (ret < 0) return ret; } } lab21: goto lab0; lab18: z->c = z->l - m1; z->ket = z->c; /* [, line 386 */ { int ret = r_mark_DAn(z); if (ret == 0) goto lab24; /* call mark_DAn, line 386 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 386 */ { int ret = slice_del(z); /* delete, line 386 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 386 */ z->ket = z->c; /* [, line 386 */ { int m8 = z->l - z->c; (void)m8; /* or, line 389 */ { int ret = r_mark_possessives(z); if (ret == 0) goto lab27; /* call mark_possessives, line 388 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 388 */ { int ret = slice_del(z); /* delete, line 388 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 388 */ z->ket = z->c; /* [, line 388 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab28; } /* call mark_lAr, line 388 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 388 */ { int ret = slice_del(z); /* delete, line 388 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab28; } /* call stem_suffix_chain_before_ki, line 388 */ if (ret < 0) return ret; } lab28: ; } goto lab26; lab27: z->c = z->l - m8; { int ret = r_mark_lAr(z); if (ret == 0) goto lab29; /* call mark_lAr, line 390 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 390 */ { int ret = slice_del(z); /* delete, line 390 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 390 */ { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab30; } /* call stem_suffix_chain_before_ki, line 390 */ if (ret < 0) return ret; } lab30: ; } goto lab26; lab29: z->c = z->l - m8; { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab25; } /* call stem_suffix_chain_before_ki, line 392 */ if (ret < 0) return ret; } } lab26: lab25: ; } goto lab0; lab24: z->c = z->l - m1; z->ket = z->c; /* [, line 396 */ { int m9 = z->l - z->c; (void)m9; /* or, line 396 */ { int ret = r_mark_nUn(z); if (ret == 0) goto lab33; /* call mark_nUn, line 396 */ if (ret < 0) return ret; } goto lab32; lab33: z->c = z->l - m9; { int ret = r_mark_ylA(z); if (ret == 0) goto lab31; /* call mark_ylA, line 396 */ if (ret < 0) return ret; } } lab32: z->bra = z->c; /* ], line 396 */ { int ret = slice_del(z); /* delete, line 396 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 397 */ { int m10 = z->l - z->c; (void)m10; /* or, line 399 */ z->ket = z->c; /* [, line 398 */ { int ret = r_mark_lAr(z); if (ret == 0) goto lab36; /* call mark_lAr, line 398 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 398 */ { int ret = slice_del(z); /* delete, line 398 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) goto lab36; /* call stem_suffix_chain_before_ki, line 398 */ if (ret < 0) return ret; } goto lab35; lab36: z->c = z->l - m10; z->ket = z->c; /* [, line 400 */ { int m11 = z->l - z->c; (void)m11; /* or, line 400 */ { int ret = r_mark_possessives(z); if (ret == 0) goto lab39; /* call mark_possessives, line 400 */ if (ret < 0) return ret; } goto lab38; lab39: z->c = z->l - m11; { int ret = r_mark_sU(z); if (ret == 0) goto lab37; /* call mark_sU, line 400 */ if (ret < 0) return ret; } } lab38: z->bra = z->c; /* ], line 400 */ { int ret = slice_del(z); /* delete, line 400 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 400 */ z->ket = z->c; /* [, line 400 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab40; } /* call mark_lAr, line 400 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 400 */ { int ret = slice_del(z); /* delete, line 400 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab40; } /* call stem_suffix_chain_before_ki, line 400 */ if (ret < 0) return ret; } lab40: ; } goto lab35; lab37: z->c = z->l - m10; { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab34; } /* call stem_suffix_chain_before_ki, line 402 */ if (ret < 0) return ret; } } lab35: lab34: ; } goto lab0; lab31: z->c = z->l - m1; z->ket = z->c; /* [, line 406 */ { int ret = r_mark_lArI(z); if (ret == 0) goto lab41; /* call mark_lArI, line 406 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 406 */ { int ret = slice_del(z); /* delete, line 406 */ if (ret < 0) return ret; } goto lab0; lab41: z->c = z->l - m1; { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) goto lab42; /* call stem_suffix_chain_before_ki, line 408 */ if (ret < 0) return ret; } goto lab0; lab42: z->c = z->l - m1; z->ket = z->c; /* [, line 410 */ { int m12 = z->l - z->c; (void)m12; /* or, line 410 */ { int ret = r_mark_DA(z); if (ret == 0) goto lab45; /* call mark_DA, line 410 */ if (ret < 0) return ret; } goto lab44; lab45: z->c = z->l - m12; { int ret = r_mark_yU(z); if (ret == 0) goto lab46; /* call mark_yU, line 410 */ if (ret < 0) return ret; } goto lab44; lab46: z->c = z->l - m12; { int ret = r_mark_yA(z); if (ret == 0) goto lab43; /* call mark_yA, line 410 */ if (ret < 0) return ret; } } lab44: z->bra = z->c; /* ], line 410 */ { int ret = slice_del(z); /* delete, line 410 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 410 */ z->ket = z->c; /* [, line 410 */ { int m13 = z->l - z->c; (void)m13; /* or, line 410 */ { int ret = r_mark_possessives(z); if (ret == 0) goto lab49; /* call mark_possessives, line 410 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 410 */ { int ret = slice_del(z); /* delete, line 410 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 410 */ z->ket = z->c; /* [, line 410 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab50; } /* call mark_lAr, line 410 */ if (ret < 0) return ret; } lab50: ; } goto lab48; lab49: z->c = z->l - m13; { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab47; } /* call mark_lAr, line 410 */ if (ret < 0) return ret; } } lab48: z->bra = z->c; /* ], line 410 */ { int ret = slice_del(z); /* delete, line 410 */ if (ret < 0) return ret; } z->ket = z->c; /* [, line 410 */ { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab47; } /* call stem_suffix_chain_before_ki, line 410 */ if (ret < 0) return ret; } lab47: ; } goto lab0; lab43: z->c = z->l - m1; z->ket = z->c; /* [, line 412 */ { int m14 = z->l - z->c; (void)m14; /* or, line 412 */ { int ret = r_mark_possessives(z); if (ret == 0) goto lab52; /* call mark_possessives, line 412 */ if (ret < 0) return ret; } goto lab51; lab52: z->c = z->l - m14; { int ret = r_mark_sU(z); if (ret == 0) return 0; /* call mark_sU, line 412 */ if (ret < 0) return ret; } } lab51: z->bra = z->c; /* ], line 412 */ { int ret = slice_del(z); /* delete, line 412 */ if (ret < 0) return ret; } { int m_keep = z->l - z->c;/* (void) m_keep;*/ /* try, line 412 */ z->ket = z->c; /* [, line 412 */ { int ret = r_mark_lAr(z); if (ret == 0) { z->c = z->l - m_keep; goto lab53; } /* call mark_lAr, line 412 */ if (ret < 0) return ret; } z->bra = z->c; /* ], line 412 */ { int ret = slice_del(z); /* delete, line 412 */ if (ret < 0) return ret; } { int ret = r_stem_suffix_chain_before_ki(z); if (ret == 0) { z->c = z->l - m_keep; goto lab53; } /* call stem_suffix_chain_before_ki, line 412 */ if (ret < 0) return ret; } lab53: ; } } lab0: return 1; } static int r_post_process_last_consonants(struct SN_env * z) { int among_var; z->ket = z->c; /* [, line 416 */ among_var = find_among_b(z, a_23, 4); /* substring, line 416 */ if (!(among_var)) return 0; z->bra = z->c; /* ], line 416 */ switch(among_var) { case 0: return 0; case 1: { int ret = slice_from_s(z, 1, s_16); /* <-, line 417 */ if (ret < 0) return ret; } break; case 2: { int ret = slice_from_s(z, 2, s_17); /* <-, line 418 */ if (ret < 0) return ret; } break; case 3: { int ret = slice_from_s(z, 1, s_18); /* <-, line 419 */ if (ret < 0) return ret; } break; case 4: { int ret = slice_from_s(z, 1, s_19); /* <-, line 420 */ if (ret < 0) return ret; } break; } return 1; } static int r_append_U_to_stems_ending_with_d_or_g(struct SN_env * z) { { int m_test = z->l - z->c; /* test, line 431 */ { int m1 = z->l - z->c; (void)m1; /* or, line 431 */ if (!(eq_s_b(z, 1, s_20))) goto lab1; goto lab0; lab1: z->c = z->l - m1; if (!(eq_s_b(z, 1, s_21))) return 0; } lab0: z->c = z->l - m_test; } { int m2 = z->l - z->c; (void)m2; /* or, line 433 */ { int m_test = z->l - z->c; /* test, line 432 */ if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) goto lab3; /* goto */ /* grouping vowel, line 432 */ { int m3 = z->l - z->c; (void)m3; /* or, line 432 */ if (!(eq_s_b(z, 1, s_22))) goto lab5; goto lab4; lab5: z->c = z->l - m3; if (!(eq_s_b(z, 2, s_23))) goto lab3; } lab4: z->c = z->l - m_test; } { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 2, s_24); /* <+, line 432 */ z->c = c_keep; if (ret < 0) return ret; } goto lab2; lab3: z->c = z->l - m2; { int m_test = z->l - z->c; /* test, line 434 */ if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) goto lab6; /* goto */ /* grouping vowel, line 434 */ { int m4 = z->l - z->c; (void)m4; /* or, line 434 */ if (!(eq_s_b(z, 1, s_25))) goto lab8; goto lab7; lab8: z->c = z->l - m4; if (!(eq_s_b(z, 1, s_26))) goto lab6; } lab7: z->c = z->l - m_test; } { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_27); /* <+, line 434 */ z->c = c_keep; if (ret < 0) return ret; } goto lab2; lab6: z->c = z->l - m2; { int m_test = z->l - z->c; /* test, line 436 */ if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) goto lab9; /* goto */ /* grouping vowel, line 436 */ { int m5 = z->l - z->c; (void)m5; /* or, line 436 */ if (!(eq_s_b(z, 1, s_28))) goto lab11; goto lab10; lab11: z->c = z->l - m5; if (!(eq_s_b(z, 1, s_29))) goto lab9; } lab10: z->c = z->l - m_test; } { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 1, s_30); /* <+, line 436 */ z->c = c_keep; if (ret < 0) return ret; } goto lab2; lab9: z->c = z->l - m2; { int m_test = z->l - z->c; /* test, line 438 */ if (out_grouping_b_U(z, g_vowel, 97, 305, 1) < 0) return 0; /* goto */ /* grouping vowel, line 438 */ { int m6 = z->l - z->c; (void)m6; /* or, line 438 */ if (!(eq_s_b(z, 2, s_31))) goto lab13; goto lab12; lab13: z->c = z->l - m6; if (!(eq_s_b(z, 2, s_32))) return 0; } lab12: z->c = z->l - m_test; } { int c_keep = z->c; int ret = insert_s(z, z->c, z->c, 2, s_33); /* <+, line 438 */ z->c = c_keep; if (ret < 0) return ret; } } lab2: return 1; } static int r_more_than_one_syllable_word(struct SN_env * z) { { int c_test = z->c; /* test, line 446 */ { int i = 2; while(1) { /* atleast, line 446 */ int c1 = z->c; { /* gopast */ /* grouping vowel, line 446 */ int ret = out_grouping_U(z, g_vowel, 97, 305, 1); if (ret < 0) goto lab0; z->c += ret; } i--; continue; lab0: z->c = c1; break; } if (i > 0) return 0; } z->c = c_test; } return 1; } static int r_is_reserved_word(struct SN_env * z) { { int c1 = z->c; /* or, line 451 */ { int c_test = z->c; /* test, line 450 */ while(1) { /* gopast, line 450 */ if (!(eq_s(z, 2, s_34))) goto lab2; break; lab2: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) goto lab1; z->c = ret; /* gopast, line 450 */ } } z->I[0] = 2; if (!(z->I[0] == z->l)) goto lab1; z->c = c_test; } goto lab0; lab1: z->c = c1; { int c_test = z->c; /* test, line 452 */ while(1) { /* gopast, line 452 */ if (!(eq_s(z, 5, s_35))) goto lab3; break; lab3: { int ret = skip_utf8(z->p, z->c, 0, z->l, 1); if (ret < 0) return 0; z->c = ret; /* gopast, line 452 */ } } z->I[0] = 5; if (!(z->I[0] == z->l)) return 0; z->c = c_test; } } lab0: return 1; } static int r_postlude(struct SN_env * z) { { int c1 = z->c; /* not, line 456 */ { int ret = r_is_reserved_word(z); if (ret == 0) goto lab0; /* call is_reserved_word, line 456 */ if (ret < 0) return ret; } return 0; lab0: z->c = c1; } z->lb = z->c; z->c = z->l; /* backwards, line 457 */ { int m2 = z->l - z->c; (void)m2; /* do, line 458 */ { int ret = r_append_U_to_stems_ending_with_d_or_g(z); if (ret == 0) goto lab1; /* call append_U_to_stems_ending_with_d_or_g, line 458 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } { int m3 = z->l - z->c; (void)m3; /* do, line 459 */ { int ret = r_post_process_last_consonants(z); if (ret == 0) goto lab2; /* call post_process_last_consonants, line 459 */ if (ret < 0) return ret; } lab2: z->c = z->l - m3; } z->c = z->lb; return 1; } extern int turkish_UTF_8_stem(struct SN_env * z) { { int ret = r_more_than_one_syllable_word(z); if (ret == 0) return 0; /* call more_than_one_syllable_word, line 465 */ if (ret < 0) return ret; } z->lb = z->c; z->c = z->l; /* backwards, line 467 */ { int m1 = z->l - z->c; (void)m1; /* do, line 468 */ { int ret = r_stem_nominal_verb_suffixes(z); if (ret == 0) goto lab0; /* call stem_nominal_verb_suffixes, line 468 */ if (ret < 0) return ret; } lab0: z->c = z->l - m1; } if (!(z->B[0])) return 0; /* Boolean test continue_stemming_noun_suffixes, line 469 */ { int m2 = z->l - z->c; (void)m2; /* do, line 470 */ { int ret = r_stem_noun_suffixes(z); if (ret == 0) goto lab1; /* call stem_noun_suffixes, line 470 */ if (ret < 0) return ret; } lab1: z->c = z->l - m2; } z->c = z->lb; { int ret = r_postlude(z); if (ret == 0) return 0; /* call postlude, line 473 */ if (ret < 0) return ret; } return 1; } extern struct SN_env * turkish_UTF_8_create_env(void) { return SN_create_env(0, 1, 1); } extern void turkish_UTF_8_close_env(struct SN_env * z) { SN_close_env(z, 0); } LucenePlusPlus-rel_3.0.9/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_turkish.h000066400000000000000000000004661456444476200310030ustar00rootroot00000000000000 /* This file was generated automatically by the Snowball to ANSI C compiler */ #ifdef __cplusplus extern "C" { #endif extern struct SN_env* turkish_UTF_8_create_env(void); extern void turkish_UTF_8_close_env(struct SN_env* z); extern int turkish_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } #endif LucenePlusPlus-rel_3.0.9/src/core/000077500000000000000000000000001456444476200170475ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/CMakeLists.txt000066400000000000000000000035011456444476200216060ustar00rootroot00000000000000project(core) #################################### # create library target #################################### if (LUCENE_BUILD_SHARED) add_library(lucene++ SHARED) else() add_library(lucene++ STATIC) endif() add_library(lucene++::lucene++ ALIAS lucene++) #################################### # src #################################### file(GLOB_RECURSE lucene_sources "search/*.cpp" "analysis/*.cpp" "document/*.cpp" "index/*.cpp" "queryparser/*.cpp" "store/*.cpp" "util/*.c*" ) file(GLOB_RECURSE lucene_internal_headers "${lucene++-lib_SOURCE_DIR}/include/*.h" ) target_sources(lucene++ PRIVATE ${lucene_sources} ${lucene_internal_headers}) #################################### # include directories #################################### target_include_directories(lucene++ PUBLIC $ $ $ ${Boost_INCLUDE_DIRS}) #################################### # dependencies #################################### target_link_libraries(lucene++ Boost::boost Boost::date_time Boost::filesystem Boost::iostreams Boost::regex Boost::system Boost::thread ZLIB::ZLIB ) if(WIN32) target_link_libraries(lucene++ ws2_32) endif() #################################### # link args #################################### target_compile_options(lucene++ PRIVATE -DLPP_BUILDING_LIB) set_target_properties(lucene++ PROPERTIES COTIRE_CXX_PREFIX_HEADER_INIT "include/LuceneInc.h" CXX_VISIBILITY_PRESET hidden VISIBILITY_INLINES_HIDDEN 1 VERSION ${lucene++_VERSION} SOVERSION ${lucene++_SOVERSION}) cotire(lucene++) install(TARGETS lucene++ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT runtime) LucenePlusPlus-rel_3.0.9/src/core/analysis/000077500000000000000000000000001456444476200206725ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/analysis/ASCIIFoldingFilter.cpp000066400000000000000000003172201456444476200247040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ASCIIFoldingFilter.h" #include "TermAttribute.h" #include "MiscUtils.h" namespace Lucene { ASCIIFoldingFilter::ASCIIFoldingFilter(const TokenStreamPtr& input) : TokenFilter(input) { output = CharArray::newInstance(512); outputPos = 0; termAtt = addAttribute(); } ASCIIFoldingFilter::~ASCIIFoldingFilter() { } bool ASCIIFoldingFilter::incrementToken() { if (input->incrementToken()) { wchar_t* buffer = termAtt->termBufferArray(); int32_t length = termAtt->termLength(); // If no characters actually require rewriting then we just return token as-is for (int32_t i = 0; i < length; ++i) { wchar_t c = buffer[i]; if (c >= 0x0080) { foldToASCII(buffer, length); termAtt->setTermBuffer(output.get(), 0, outputPos); break; } } return true; } else { return false; } } void ASCIIFoldingFilter::foldToASCII(const wchar_t* input, int32_t length) { // Worst-case length required int32_t maxSizeNeeded = 4 * length; if (output.size() < maxSizeNeeded) { output.resize(MiscUtils::getNextSize(maxSizeNeeded)); } outputPos = 0; wchar_t* output = this->output.get(); for (int32_t pos = 0; pos < length; ++pos) { wchar_t c = input[pos]; // Quick test: if it's not in range then just keep current character if (c < 0x0080) { output[outputPos++] = c; } else { switch (c) { case 0x00C0: // [LATIN CAPITAL LETTER A WITH GRAVE] case 0x00C1: // [LATIN CAPITAL LETTER A WITH ACUTE] case 0x00C2: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX] case 0x00C3: // [LATIN CAPITAL LETTER A WITH TILDE] case 0x00C4: // [LATIN CAPITAL LETTER A WITH DIAERESIS] case 0x00C5: // [LATIN CAPITAL LETTER A WITH RING ABOVE] case 0x0100: // [LATIN CAPITAL LETTER A WITH MACRON] case 0x0102: // [LATIN CAPITAL LETTER A WITH BREVE] case 0x0104: // [LATIN CAPITAL LETTER A WITH OGONEK] case 0x018F: // [LATIN CAPITAL LETTER SCHWA] case 0x01CD: // [LATIN CAPITAL LETTER A WITH CARON] case 0x01DE: // [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON] case 0x01E0: // [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON] case 0x01FA: // [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE] case 0x0200: // [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE] case 0x0202: // [LATIN CAPITAL LETTER A WITH INVERTED BREVE] case 0x0226: // [LATIN CAPITAL LETTER A WITH DOT ABOVE] case 0x023A: // [LATIN CAPITAL LETTER A WITH STROKE] case 0x1D00: // [LATIN LETTER SMALL CAPITAL A] case 0x1E00: // [LATIN CAPITAL LETTER A WITH RING BELOW] case 0x1EA0: // [LATIN CAPITAL LETTER A WITH DOT BELOW] case 0x1EA2: // [LATIN CAPITAL LETTER A WITH HOOK ABOVE] case 0x1EA4: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE] case 0x1EA6: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE] case 0x1EA8: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] case 0x1EAA: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE] case 0x1EAC: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW] case 0x1EAE: // [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE] case 0x1EB0: // [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE] case 0x1EB2: // [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE] case 0x1EB4: // [LATIN CAPITAL LETTER A WITH BREVE AND TILDE] case 0x1EB6: // [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW] case 0x24B6: // [CIRCLED LATIN CAPITAL LETTER A] case 0xFF21: // [FULLWIDTH LATIN CAPITAL LETTER A] output[outputPos++] = L'A'; break; case 0x00E0: // [LATIN SMALL LETTER A WITH GRAVE] case 0x00E1: // [LATIN SMALL LETTER A WITH ACUTE] case 0x00E2: // [LATIN SMALL LETTER A WITH CIRCUMFLEX] case 0x00E3: // [LATIN SMALL LETTER A WITH TILDE] case 0x00E4: // [LATIN SMALL LETTER A WITH DIAERESIS] case 0x00E5: // [LATIN SMALL LETTER A WITH RING ABOVE] case 0x0101: // [LATIN SMALL LETTER A WITH MACRON] case 0x0103: // [LATIN SMALL LETTER A WITH BREVE] case 0x0105: // [LATIN SMALL LETTER A WITH OGONEK] case 0x01CE: // [LATIN SMALL LETTER A WITH CARON] case 0x01DF: // [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON] case 0x01E1: // [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON] case 0x01FB: // [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE] case 0x0201: // [LATIN SMALL LETTER A WITH DOUBLE GRAVE] case 0x0203: // [LATIN SMALL LETTER A WITH INVERTED BREVE] case 0x0227: // [LATIN SMALL LETTER A WITH DOT ABOVE] case 0x0250: // [LATIN SMALL LETTER TURNED A] case 0x0259: // [LATIN SMALL LETTER SCHWA] case 0x025A: // [LATIN SMALL LETTER SCHWA WITH HOOK] case 0x1D8F: // [LATIN SMALL LETTER A WITH RETROFLEX HOOK] case 0x1D95: // [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK] case 0x1E01: // [LATIN SMALL LETTER A WITH RING BELOW] case 0x1E9A: // [LATIN SMALL LETTER A WITH RIGHT HALF RING] case 0x1EA1: // [LATIN SMALL LETTER A WITH DOT BELOW] case 0x1EA3: // [LATIN SMALL LETTER A WITH HOOK ABOVE] case 0x1EA5: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE] case 0x1EA7: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE] case 0x1EA9: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] case 0x1EAB: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE] case 0x1EAD: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW] case 0x1EAF: // [LATIN SMALL LETTER A WITH BREVE AND ACUTE] case 0x1EB1: // [LATIN SMALL LETTER A WITH BREVE AND GRAVE] case 0x1EB3: // [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE] case 0x1EB5: // [LATIN SMALL LETTER A WITH BREVE AND TILDE] case 0x1EB7: // [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW] case 0x2090: // [LATIN SUBSCRIPT SMALL LETTER A] case 0x2094: // [LATIN SUBSCRIPT SMALL LETTER SCHWA] case 0x24D0: // [CIRCLED LATIN SMALL LETTER A] case 0x2C65: // [LATIN SMALL LETTER A WITH STROKE] case 0x2C6F: // [LATIN CAPITAL LETTER TURNED A] case 0xFF41: // [FULLWIDTH LATIN SMALL LETTER A] output[outputPos++] = L'a'; break; case 0xA732: // [LATIN CAPITAL LETTER AA] output[outputPos++] = L'A'; output[outputPos++] = L'A'; break; case 0x00C6: // [LATIN CAPITAL LETTER AE] case 0x01E2: // [LATIN CAPITAL LETTER AE WITH MACRON] case 0x01FC: // [LATIN CAPITAL LETTER AE WITH ACUTE] case 0x1D01: // [LATIN LETTER SMALL CAPITAL AE] output[outputPos++] = L'A'; output[outputPos++] = L'E'; break; case 0xA734: // [LATIN CAPITAL LETTER AO] output[outputPos++] = L'A'; output[outputPos++] = L'O'; break; case 0xA736: // [LATIN CAPITAL LETTER AU] output[outputPos++] = L'A'; output[outputPos++] = L'U'; break; case 0xA738: // [LATIN CAPITAL LETTER AV] case 0xA73A: // [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR] output[outputPos++] = L'A'; output[outputPos++] = L'V'; break; case 0xA73C: // [LATIN CAPITAL LETTER AY] output[outputPos++] = L'A'; output[outputPos++] = L'Y'; break; case 0x249C: // [PARENTHESIZED LATIN SMALL LETTER A] output[outputPos++] = L'('; output[outputPos++] = L'a'; output[outputPos++] = L')'; break; case 0xA733: // [LATIN SMALL LETTER AA] output[outputPos++] = L'a'; output[outputPos++] = L'a'; break; case 0x00E6: // [LATIN SMALL LETTER AE] case 0x01E3: // [LATIN SMALL LETTER AE WITH MACRON] case 0x01FD: // [LATIN SMALL LETTER AE WITH ACUTE] case 0x1D02: // [LATIN SMALL LETTER TURNED AE] output[outputPos++] = L'a'; output[outputPos++] = L'e'; break; case 0xA735: // [LATIN SMALL LETTER AO] output[outputPos++] = L'a'; output[outputPos++] = L'o'; break; case 0xA737: // [LATIN SMALL LETTER AU] output[outputPos++] = L'a'; output[outputPos++] = L'u'; break; case 0xA739: // [LATIN SMALL LETTER AV] case 0xA73B: // [LATIN SMALL LETTER AV WITH HORIZONTAL BAR] output[outputPos++] = L'a'; output[outputPos++] = L'v'; break; case 0xA73D: // [LATIN SMALL LETTER AY] output[outputPos++] = L'a'; output[outputPos++] = L'y'; break; case 0x0181: // [LATIN CAPITAL LETTER B WITH HOOK] case 0x0182: // [LATIN CAPITAL LETTER B WITH TOPBAR] case 0x0243: // [LATIN CAPITAL LETTER B WITH STROKE] case 0x0299: // [LATIN LETTER SMALL CAPITAL B] case 0x1D03: // [LATIN LETTER SMALL CAPITAL BARRED B] case 0x1E02: // [LATIN CAPITAL LETTER B WITH DOT ABOVE] case 0x1E04: // [LATIN CAPITAL LETTER B WITH DOT BELOW] case 0x1E06: // [LATIN CAPITAL LETTER B WITH LINE BELOW] case 0x24B7: // [CIRCLED LATIN CAPITAL LETTER B] case 0xFF22: // [FULLWIDTH LATIN CAPITAL LETTER B] output[outputPos++] = L'B'; break; case 0x0180: // [LATIN SMALL LETTER B WITH STROKE] case 0x0183: // [LATIN SMALL LETTER B WITH TOPBAR] case 0x0253: // [LATIN SMALL LETTER B WITH HOOK] case 0x1D6C: // [LATIN SMALL LETTER B WITH MIDDLE TILDE] case 0x1D80: // [LATIN SMALL LETTER B WITH PALATAL HOOK] case 0x1E03: // [LATIN SMALL LETTER B WITH DOT ABOVE] case 0x1E05: // [LATIN SMALL LETTER B WITH DOT BELOW] case 0x1E07: // [LATIN SMALL LETTER B WITH LINE BELOW] case 0x24D1: // [CIRCLED LATIN SMALL LETTER B] case 0xFF42: // [FULLWIDTH LATIN SMALL LETTER B] output[outputPos++] = L'b'; break; case 0x249D: // [PARENTHESIZED LATIN SMALL LETTER B] output[outputPos++] = L'('; output[outputPos++] = L'b'; output[outputPos++] = L')'; break; case 0x00C7: // [LATIN CAPITAL LETTER C WITH CEDILLA] case 0x0106: // [LATIN CAPITAL LETTER C WITH ACUTE] case 0x0108: // [LATIN CAPITAL LETTER C WITH CIRCUMFLEX] case 0x010A: // [LATIN CAPITAL LETTER C WITH DOT ABOVE] case 0x010C: // [LATIN CAPITAL LETTER C WITH CARON] case 0x0187: // [LATIN CAPITAL LETTER C WITH HOOK] case 0x023B: // [LATIN CAPITAL LETTER C WITH STROKE] case 0x0297: // [LATIN LETTER STRETCHED C] case 0x1D04: // [LATIN LETTER SMALL CAPITAL C] case 0x1E08: // [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE] case 0x24B8: // [CIRCLED LATIN CAPITAL LETTER C] case 0xFF23: // [FULLWIDTH LATIN CAPITAL LETTER C] output[outputPos++] = L'C'; break; case 0x00E7: // [LATIN SMALL LETTER C WITH CEDILLA] case 0x0107: // [LATIN SMALL LETTER C WITH ACUTE] case 0x0109: // [LATIN SMALL LETTER C WITH CIRCUMFLEX] case 0x010B: // [LATIN SMALL LETTER C WITH DOT ABOVE] case 0x010D: // [LATIN SMALL LETTER C WITH CARON] case 0x0188: // [LATIN SMALL LETTER C WITH HOOK] case 0x023C: // [LATIN SMALL LETTER C WITH STROKE] case 0x0255: // [LATIN SMALL LETTER C WITH CURL] case 0x1E09: // [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE] case 0x2184: // [LATIN SMALL LETTER REVERSED C] case 0x24D2: // [CIRCLED LATIN SMALL LETTER C] case 0xA73E: // [LATIN CAPITAL LETTER REVERSED C WITH DOT] case 0xA73F: // [LATIN SMALL LETTER REVERSED C WITH DOT] case 0xFF43: // [FULLWIDTH LATIN SMALL LETTER C] output[outputPos++] = L'c'; break; case 0x249E: // [PARENTHESIZED LATIN SMALL LETTER C] output[outputPos++] = L'('; output[outputPos++] = L'c'; output[outputPos++] = L')'; break; case 0x00D0: // [LATIN CAPITAL LETTER ETH] case 0x010E: // [LATIN CAPITAL LETTER D WITH CARON] case 0x0110: // [LATIN CAPITAL LETTER D WITH STROKE] case 0x0189: // [LATIN CAPITAL LETTER AFRICAN D] case 0x018A: // [LATIN CAPITAL LETTER D WITH HOOK] case 0x018B: // [LATIN CAPITAL LETTER D WITH TOPBAR] case 0x1D05: // [LATIN LETTER SMALL CAPITAL D] case 0x1D06: // [LATIN LETTER SMALL CAPITAL ETH] case 0x1E0A: // [LATIN CAPITAL LETTER D WITH DOT ABOVE] case 0x1E0C: // [LATIN CAPITAL LETTER D WITH DOT BELOW] case 0x1E0E: // [LATIN CAPITAL LETTER D WITH LINE BELOW] case 0x1E10: // [LATIN CAPITAL LETTER D WITH CEDILLA] case 0x1E12: // [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW] case 0x24B9: // [CIRCLED LATIN CAPITAL LETTER D] case 0xA779: // [LATIN CAPITAL LETTER INSULAR D] case 0xFF24: // [FULLWIDTH LATIN CAPITAL LETTER D] output[outputPos++] = L'D'; break; case 0x00F0: // [LATIN SMALL LETTER ETH] case 0x010F: // [LATIN SMALL LETTER D WITH CARON] case 0x0111: // [LATIN SMALL LETTER D WITH STROKE] case 0x018C: // [LATIN SMALL LETTER D WITH TOPBAR] case 0x0221: // [LATIN SMALL LETTER D WITH CURL] case 0x0256: // [LATIN SMALL LETTER D WITH TAIL] case 0x0257: // [LATIN SMALL LETTER D WITH HOOK] case 0x1D6D: // [LATIN SMALL LETTER D WITH MIDDLE TILDE] case 0x1D81: // [LATIN SMALL LETTER D WITH PALATAL HOOK] case 0x1D91: // [LATIN SMALL LETTER D WITH HOOK AND TAIL] case 0x1E0B: // [LATIN SMALL LETTER D WITH DOT ABOVE] case 0x1E0D: // [LATIN SMALL LETTER D WITH DOT BELOW] case 0x1E0F: // [LATIN SMALL LETTER D WITH LINE BELOW] case 0x1E11: // [LATIN SMALL LETTER D WITH CEDILLA] case 0x1E13: // [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW] case 0x24D3: // [CIRCLED LATIN SMALL LETTER D] case 0xA77A: // [LATIN SMALL LETTER INSULAR D] case 0xFF44: // [FULLWIDTH LATIN SMALL LETTER D] output[outputPos++] = L'd'; break; case 0x01C4: // [LATIN CAPITAL LETTER DZ WITH CARON] case 0x01F1: // [LATIN CAPITAL LETTER DZ] output[outputPos++] = L'D'; output[outputPos++] = L'Z'; break; case 0x01C5: // [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON] case 0x01F2: // [LATIN CAPITAL LETTER D WITH SMALL LETTER Z] output[outputPos++] = L'D'; output[outputPos++] = L'z'; break; case 0x249F: // [PARENTHESIZED LATIN SMALL LETTER D] output[outputPos++] = L'('; output[outputPos++] = L'd'; output[outputPos++] = L')'; break; case 0x0238: // [LATIN SMALL LETTER DB DIGRAPH] output[outputPos++] = L'd'; output[outputPos++] = L'b'; break; case 0x01C6: // [LATIN SMALL LETTER DZ WITH CARON] case 0x01F3: // [LATIN SMALL LETTER DZ] case 0x02A3: // [LATIN SMALL LETTER DZ DIGRAPH] case 0x02A5: // [LATIN SMALL LETTER DZ DIGRAPH WITH CURL] output[outputPos++] = L'd'; output[outputPos++] = L'z'; break; case 0x00C8: // [LATIN CAPITAL LETTER E WITH GRAVE] case 0x00C9: // [LATIN CAPITAL LETTER E WITH ACUTE] case 0x00CA: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX] case 0x00CB: // [LATIN CAPITAL LETTER E WITH DIAERESIS] case 0x0112: // [LATIN CAPITAL LETTER E WITH MACRON] case 0x0114: // [LATIN CAPITAL LETTER E WITH BREVE] case 0x0116: // [LATIN CAPITAL LETTER E WITH DOT ABOVE] case 0x0118: // [LATIN CAPITAL LETTER E WITH OGONEK] case 0x011A: // [LATIN CAPITAL LETTER E WITH CARON] case 0x018E: // [LATIN CAPITAL LETTER REVERSED E] case 0x0190: // [LATIN CAPITAL LETTER OPEN E] case 0x0204: // [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE] case 0x0206: // [LATIN CAPITAL LETTER E WITH INVERTED BREVE] case 0x0228: // [LATIN CAPITAL LETTER E WITH CEDILLA] case 0x0246: // [LATIN CAPITAL LETTER E WITH STROKE] case 0x1D07: // [LATIN LETTER SMALL CAPITAL E] case 0x1E14: // [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE] case 0x1E16: // [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE] case 0x1E18: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW] case 0x1E1A: // [LATIN CAPITAL LETTER E WITH TILDE BELOW] case 0x1E1C: // [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE] case 0x1EB8: // [LATIN CAPITAL LETTER E WITH DOT BELOW] case 0x1EBA: // [LATIN CAPITAL LETTER E WITH HOOK ABOVE] case 0x1EBC: // [LATIN CAPITAL LETTER E WITH TILDE] case 0x1EBE: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE] case 0x1EC0: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE] case 0x1EC2: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] case 0x1EC4: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE] case 0x1EC6: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW] case 0x24BA: // [CIRCLED LATIN CAPITAL LETTER E] case 0x2C7B: // [LATIN LETTER SMALL CAPITAL TURNED E] case 0xFF25: // [FULLWIDTH LATIN CAPITAL LETTER E] output[outputPos++] = L'E'; break; case 0x00E8: // [LATIN SMALL LETTER E WITH GRAVE] case 0x00E9: // [LATIN SMALL LETTER E WITH ACUTE] case 0x00EA: // [LATIN SMALL LETTER E WITH CIRCUMFLEX] case 0x00EB: // [LATIN SMALL LETTER E WITH DIAERESIS] case 0x0113: // [LATIN SMALL LETTER E WITH MACRON] case 0x0115: // [LATIN SMALL LETTER E WITH BREVE] case 0x0117: // [LATIN SMALL LETTER E WITH DOT ABOVE] case 0x0119: // [LATIN SMALL LETTER E WITH OGONEK] case 0x011B: // [LATIN SMALL LETTER E WITH CARON] case 0x01DD: // [LATIN SMALL LETTER TURNED E] case 0x0205: // [LATIN SMALL LETTER E WITH DOUBLE GRAVE] case 0x0207: // [LATIN SMALL LETTER E WITH INVERTED BREVE] case 0x0229: // [LATIN SMALL LETTER E WITH CEDILLA] case 0x0247: // [LATIN SMALL LETTER E WITH STROKE] case 0x0258: // [LATIN SMALL LETTER REVERSED E] case 0x025B: // [LATIN SMALL LETTER OPEN E] case 0x025C: // [LATIN SMALL LETTER REVERSED OPEN E] case 0x025D: // [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK] case 0x025E: // [LATIN SMALL LETTER CLOSED REVERSED OPEN E] case 0x029A: // [LATIN SMALL LETTER CLOSED OPEN E] case 0x1D08: // [LATIN SMALL LETTER TURNED OPEN E] case 0x1D92: // [LATIN SMALL LETTER E WITH RETROFLEX HOOK] case 0x1D93: // [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK] case 0x1D94: // [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK] case 0x1E15: // [LATIN SMALL LETTER E WITH MACRON AND GRAVE] case 0x1E17: // [LATIN SMALL LETTER E WITH MACRON AND ACUTE] case 0x1E19: // [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW] case 0x1E1B: // [LATIN SMALL LETTER E WITH TILDE BELOW] case 0x1E1D: // [LATIN SMALL LETTER E WITH CEDILLA AND BREVE] case 0x1EB9: // [LATIN SMALL LETTER E WITH DOT BELOW] case 0x1EBB: // [LATIN SMALL LETTER E WITH HOOK ABOVE] case 0x1EBD: // [LATIN SMALL LETTER E WITH TILDE] case 0x1EBF: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE] case 0x1EC1: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE] case 0x1EC3: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] case 0x1EC5: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE] case 0x1EC7: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW] case 0x2091: // [LATIN SUBSCRIPT SMALL LETTER E] case 0x24D4: // [CIRCLED LATIN SMALL LETTER E] case 0x2C78: // [LATIN SMALL LETTER E WITH NOTCH] case 0xFF45: // [FULLWIDTH LATIN SMALL LETTER E] output[outputPos++] = L'e'; break; case 0x24A0: // [PARENTHESIZED LATIN SMALL LETTER E] output[outputPos++] = L'('; output[outputPos++] = L'e'; output[outputPos++] = L')'; break; case 0x0191: // [LATIN CAPITAL LETTER F WITH HOOK] case 0x1E1E: // [LATIN CAPITAL LETTER F WITH DOT ABOVE] case 0x24BB: // [CIRCLED LATIN CAPITAL LETTER F] case 0xA730: // [LATIN LETTER SMALL CAPITAL F] case 0xA77B: // [LATIN CAPITAL LETTER INSULAR F] case 0xA7FB: // [LATIN EPIGRAPHIC LETTER REVERSED F] case 0xFF26: // [FULLWIDTH LATIN CAPITAL LETTER F] output[outputPos++] = L'F'; break; case 0x0192: // [LATIN SMALL LETTER F WITH HOOK] case 0x1D6E: // [LATIN SMALL LETTER F WITH MIDDLE TILDE] case 0x1D82: // [LATIN SMALL LETTER F WITH PALATAL HOOK] case 0x1E1F: // [LATIN SMALL LETTER F WITH DOT ABOVE] case 0x1E9B: // [LATIN SMALL LETTER LONG S WITH DOT ABOVE] case 0x24D5: // [CIRCLED LATIN SMALL LETTER F] case 0xA77C: // [LATIN SMALL LETTER INSULAR F] case 0xFF46: // [FULLWIDTH LATIN SMALL LETTER F] output[outputPos++] = L'f'; break; case 0x24A1: // [PARENTHESIZED LATIN SMALL LETTER F] output[outputPos++] = L'('; output[outputPos++] = L'f'; output[outputPos++] = L')'; break; case 0xFB00: // [LATIN SMALL LIGATURE FF] output[outputPos++] = L'f'; output[outputPos++] = L'f'; break; case 0xFB03: // [LATIN SMALL LIGATURE FFI] output[outputPos++] = L'f'; output[outputPos++] = L'f'; output[outputPos++] = L'i'; break; case 0xFB04: // [LATIN SMALL LIGATURE FFL] output[outputPos++] = L'f'; output[outputPos++] = L'f'; output[outputPos++] = L'l'; break; case 0xFB01: // [LATIN SMALL LIGATURE FI] output[outputPos++] = L'f'; output[outputPos++] = L'i'; break; case 0xFB02: // [LATIN SMALL LIGATURE FL] output[outputPos++] = L'f'; output[outputPos++] = L'l'; break; case 0x011C: // [LATIN CAPITAL LETTER G WITH CIRCUMFLEX] case 0x011E: // [LATIN CAPITAL LETTER G WITH BREVE] case 0x0120: // [LATIN CAPITAL LETTER G WITH DOT ABOVE] case 0x0122: // [LATIN CAPITAL LETTER G WITH CEDILLA] case 0x0193: // [LATIN CAPITAL LETTER G WITH HOOK] case 0x01E4: // [LATIN CAPITAL LETTER G WITH STROKE] case 0x01E5: // [LATIN SMALL LETTER G WITH STROKE] case 0x01E6: // [LATIN CAPITAL LETTER G WITH CARON] case 0x01E7: // [LATIN SMALL LETTER G WITH CARON] case 0x01F4: // [LATIN CAPITAL LETTER G WITH ACUTE] case 0x0262: // [LATIN LETTER SMALL CAPITAL G] case 0x029B: // [LATIN LETTER SMALL CAPITAL G WITH HOOK] case 0x1E20: // [LATIN CAPITAL LETTER G WITH MACRON] case 0x24BC: // [CIRCLED LATIN CAPITAL LETTER G] case 0xA77D: // [LATIN CAPITAL LETTER INSULAR G] case 0xA77E: // [LATIN CAPITAL LETTER TURNED INSULAR G] case 0xFF27: // [FULLWIDTH LATIN CAPITAL LETTER G] output[outputPos++] = L'G'; break; case 0x011D: // [LATIN SMALL LETTER G WITH CIRCUMFLEX] case 0x011F: // [LATIN SMALL LETTER G WITH BREVE] case 0x0121: // [LATIN SMALL LETTER G WITH DOT ABOVE] case 0x0123: // [LATIN SMALL LETTER G WITH CEDILLA] case 0x01F5: // [LATIN SMALL LETTER G WITH ACUTE] case 0x0260: // [LATIN SMALL LETTER G WITH HOOK] case 0x0261: // [LATIN SMALL LETTER SCRIPT G] case 0x1D77: // [LATIN SMALL LETTER TURNED G] case 0x1D79: // [LATIN SMALL LETTER INSULAR G] case 0x1D83: // [LATIN SMALL LETTER G WITH PALATAL HOOK] case 0x1E21: // [LATIN SMALL LETTER G WITH MACRON] case 0x24D6: // [CIRCLED LATIN SMALL LETTER G] case 0xA77F: // [LATIN SMALL LETTER TURNED INSULAR G] case 0xFF47: // [FULLWIDTH LATIN SMALL LETTER G] output[outputPos++] = L'g'; break; case 0x24A2: // [PARENTHESIZED LATIN SMALL LETTER G] output[outputPos++] = L'('; output[outputPos++] = L'g'; output[outputPos++] = L')'; break; case 0x0124: // [LATIN CAPITAL LETTER H WITH CIRCUMFLEX] case 0x0126: // [LATIN CAPITAL LETTER H WITH STROKE] case 0x021E: // [LATIN CAPITAL LETTER H WITH CARON] case 0x029C: // [LATIN LETTER SMALL CAPITAL H] case 0x1E22: // [LATIN CAPITAL LETTER H WITH DOT ABOVE] case 0x1E24: // [LATIN CAPITAL LETTER H WITH DOT BELOW] case 0x1E26: // [LATIN CAPITAL LETTER H WITH DIAERESIS] case 0x1E28: // [LATIN CAPITAL LETTER H WITH CEDILLA] case 0x1E2A: // [LATIN CAPITAL LETTER H WITH BREVE BELOW] case 0x24BD: // [CIRCLED LATIN CAPITAL LETTER H] case 0x2C67: // [LATIN CAPITAL LETTER H WITH DESCENDER] case 0x2C75: // [LATIN CAPITAL LETTER HALF H] case 0xFF28: // [FULLWIDTH LATIN CAPITAL LETTER H] output[outputPos++] = L'H'; break; case 0x0125: // [LATIN SMALL LETTER H WITH CIRCUMFLEX] case 0x0127: // [LATIN SMALL LETTER H WITH STROKE] case 0x021F: // [LATIN SMALL LETTER H WITH CARON] case 0x0265: // [LATIN SMALL LETTER TURNED H] case 0x0266: // [LATIN SMALL LETTER H WITH HOOK] case 0x02AE: // [LATIN SMALL LETTER TURNED H WITH FISHHOOK] case 0x02AF: // [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL] case 0x1E23: // [LATIN SMALL LETTER H WITH DOT ABOVE] case 0x1E25: // [LATIN SMALL LETTER H WITH DOT BELOW] case 0x1E27: // [LATIN SMALL LETTER H WITH DIAERESIS] case 0x1E29: // [LATIN SMALL LETTER H WITH CEDILLA] case 0x1E2B: // [LATIN SMALL LETTER H WITH BREVE BELOW] case 0x1E96: // [LATIN SMALL LETTER H WITH LINE BELOW] case 0x24D7: // [CIRCLED LATIN SMALL LETTER H] case 0x2C68: // [LATIN SMALL LETTER H WITH DESCENDER] case 0x2C76: // [LATIN SMALL LETTER HALF H] case 0xFF48: // [FULLWIDTH LATIN SMALL LETTER H] output[outputPos++] = L'h'; break; case 0x01F6: // [LATIN CAPITAL LETTER HWAIR] output[outputPos++] = L'H'; output[outputPos++] = L'V'; break; case 0x24A3: // [PARENTHESIZED LATIN SMALL LETTER H] output[outputPos++] = L'('; output[outputPos++] = L'h'; output[outputPos++] = L')'; break; case 0x0195: // [LATIN SMALL LETTER HV] output[outputPos++] = L'h'; output[outputPos++] = L'v'; break; case 0x00CC: // [LATIN CAPITAL LETTER I WITH GRAVE] case 0x00CD: // [LATIN CAPITAL LETTER I WITH ACUTE] case 0x00CE: // [LATIN CAPITAL LETTER I WITH CIRCUMFLEX] case 0x00CF: // [LATIN CAPITAL LETTER I WITH DIAERESIS] case 0x0128: // [LATIN CAPITAL LETTER I WITH TILDE] case 0x012A: // [LATIN CAPITAL LETTER I WITH MACRON] case 0x012C: // [LATIN CAPITAL LETTER I WITH BREVE] case 0x012E: // [LATIN CAPITAL LETTER I WITH OGONEK] case 0x0130: // [LATIN CAPITAL LETTER I WITH DOT ABOVE] case 0x0196: // [LATIN CAPITAL LETTER IOTA] case 0x0197: // [LATIN CAPITAL LETTER I WITH STROKE] case 0x01CF: // [LATIN CAPITAL LETTER I WITH CARON] case 0x0208: // [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE] case 0x020A: // [LATIN CAPITAL LETTER I WITH INVERTED BREVE] case 0x026A: // [LATIN LETTER SMALL CAPITAL I] case 0x1D7B: // [LATIN SMALL CAPITAL LETTER I WITH STROKE] case 0x1E2C: // [LATIN CAPITAL LETTER I WITH TILDE BELOW] case 0x1E2E: // [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE] case 0x1EC8: // [LATIN CAPITAL LETTER I WITH HOOK ABOVE] case 0x1ECA: // [LATIN CAPITAL LETTER I WITH DOT BELOW] case 0x24BE: // [CIRCLED LATIN CAPITAL LETTER I] case 0xA7FE: // [LATIN EPIGRAPHIC LETTER I LONGA] case 0xFF29: // [FULLWIDTH LATIN CAPITAL LETTER I] output[outputPos++] = L'I'; break; case 0x00EC: // [LATIN SMALL LETTER I WITH GRAVE] case 0x00ED: // [LATIN SMALL LETTER I WITH ACUTE] case 0x00EE: // [LATIN SMALL LETTER I WITH CIRCUMFLEX] case 0x00EF: // [LATIN SMALL LETTER I WITH DIAERESIS] case 0x0129: // [LATIN SMALL LETTER I WITH TILDE] case 0x012B: // [LATIN SMALL LETTER I WITH MACRON] case 0x012D: // [LATIN SMALL LETTER I WITH BREVE] case 0x012F: // [LATIN SMALL LETTER I WITH OGONEK] case 0x0131: // [LATIN SMALL LETTER DOTLESS I] case 0x01D0: // [LATIN SMALL LETTER I WITH CARON] case 0x0209: // [LATIN SMALL LETTER I WITH DOUBLE GRAVE] case 0x020B: // [LATIN SMALL LETTER I WITH INVERTED BREVE] case 0x0268: // [LATIN SMALL LETTER I WITH STROKE] case 0x1D09: // [LATIN SMALL LETTER TURNED I] case 0x1D62: // [LATIN SUBSCRIPT SMALL LETTER I] case 0x1D7C: // [LATIN SMALL LETTER IOTA WITH STROKE] case 0x1D96: // [LATIN SMALL LETTER I WITH RETROFLEX HOOK] case 0x1E2D: // [LATIN SMALL LETTER I WITH TILDE BELOW] case 0x1E2F: // [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE] case 0x1EC9: // [LATIN SMALL LETTER I WITH HOOK ABOVE] case 0x1ECB: // [LATIN SMALL LETTER I WITH DOT BELOW] case 0x2071: // [SUPERSCRIPT LATIN SMALL LETTER I] case 0x24D8: // [CIRCLED LATIN SMALL LETTER I] case 0xFF49: // [FULLWIDTH LATIN SMALL LETTER I] output[outputPos++] = L'i'; break; case 0x0132: // [LATIN CAPITAL LIGATURE IJ] output[outputPos++] = L'I'; output[outputPos++] = L'J'; break; case 0x24A4: // [PARENTHESIZED LATIN SMALL LETTER I] output[outputPos++] = L'('; output[outputPos++] = L'i'; output[outputPos++] = L')'; break; case 0x0133: // [LATIN SMALL LIGATURE IJ] output[outputPos++] = L'i'; output[outputPos++] = L'j'; break; case 0x0134: // [LATIN CAPITAL LETTER J WITH CIRCUMFLEX] case 0x0248: // [LATIN CAPITAL LETTER J WITH STROKE] case 0x1D0A: // [LATIN LETTER SMALL CAPITAL J] case 0x24BF: // [CIRCLED LATIN CAPITAL LETTER J] case 0xFF2A: // [FULLWIDTH LATIN CAPITAL LETTER J] output[outputPos++] = L'J'; break; case 0x0135: // [LATIN SMALL LETTER J WITH CIRCUMFLEX] case 0x01F0: // [LATIN SMALL LETTER J WITH CARON] case 0x0237: // [LATIN SMALL LETTER DOTLESS J] case 0x0249: // [LATIN SMALL LETTER J WITH STROKE] case 0x025F: // [LATIN SMALL LETTER DOTLESS J WITH STROKE] case 0x0284: // [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK] case 0x029D: // [LATIN SMALL LETTER J WITH CROSSED-TAIL] case 0x24D9: // [CIRCLED LATIN SMALL LETTER J] case 0x2C7C: // [LATIN SUBSCRIPT SMALL LETTER J] case 0xFF4A: // [FULLWIDTH LATIN SMALL LETTER J] output[outputPos++] = L'j'; break; case 0x24A5: // [PARENTHESIZED LATIN SMALL LETTER J] output[outputPos++] = L'('; output[outputPos++] = L'j'; output[outputPos++] = L')'; break; case 0x0136: // [LATIN CAPITAL LETTER K WITH CEDILLA] case 0x0198: // [LATIN CAPITAL LETTER K WITH HOOK] case 0x01E8: // [LATIN CAPITAL LETTER K WITH CARON] case 0x1D0B: // [LATIN LETTER SMALL CAPITAL K] case 0x1E30: // [LATIN CAPITAL LETTER K WITH ACUTE] case 0x1E32: // [LATIN CAPITAL LETTER K WITH DOT BELOW] case 0x1E34: // [LATIN CAPITAL LETTER K WITH LINE BELOW] case 0x24C0: // [CIRCLED LATIN CAPITAL LETTER K] case 0x2C69: // [LATIN CAPITAL LETTER K WITH DESCENDER] case 0xA740: // [LATIN CAPITAL LETTER K WITH STROKE] case 0xA742: // [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE] case 0xA744: // [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE] case 0xFF2B: // [FULLWIDTH LATIN CAPITAL LETTER K] output[outputPos++] = L'K'; break; case 0x0137: // [LATIN SMALL LETTER K WITH CEDILLA] case 0x0199: // [LATIN SMALL LETTER K WITH HOOK] case 0x01E9: // [LATIN SMALL LETTER K WITH CARON] case 0x029E: // [LATIN SMALL LETTER TURNED K] case 0x1D84: // [LATIN SMALL LETTER K WITH PALATAL HOOK] case 0x1E31: // [LATIN SMALL LETTER K WITH ACUTE] case 0x1E33: // [LATIN SMALL LETTER K WITH DOT BELOW] case 0x1E35: // [LATIN SMALL LETTER K WITH LINE BELOW] case 0x24DA: // [CIRCLED LATIN SMALL LETTER K] case 0x2C6A: // [LATIN SMALL LETTER K WITH DESCENDER] case 0xA741: // [LATIN SMALL LETTER K WITH STROKE] case 0xA743: // [LATIN SMALL LETTER K WITH DIAGONAL STROKE] case 0xA745: // [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE] case 0xFF4B: // [FULLWIDTH LATIN SMALL LETTER K] output[outputPos++] = L'k'; break; case 0x24A6: // [PARENTHESIZED LATIN SMALL LETTER K] output[outputPos++] = L'('; output[outputPos++] = L'k'; output[outputPos++] = L')'; break; case 0x0139: // [LATIN CAPITAL LETTER L WITH ACUTE] case 0x013B: // [LATIN CAPITAL LETTER L WITH CEDILLA] case 0x013D: // [LATIN CAPITAL LETTER L WITH CARON] case 0x013F: // [LATIN CAPITAL LETTER L WITH MIDDLE DOT] case 0x0141: // [LATIN CAPITAL LETTER L WITH STROKE] case 0x023D: // [LATIN CAPITAL LETTER L WITH BAR] case 0x029F: // [LATIN LETTER SMALL CAPITAL L] case 0x1D0C: // [LATIN LETTER SMALL CAPITAL L WITH STROKE] case 0x1E36: // [LATIN CAPITAL LETTER L WITH DOT BELOW] case 0x1E38: // [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON] case 0x1E3A: // [LATIN CAPITAL LETTER L WITH LINE BELOW] case 0x1E3C: // [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW] case 0x24C1: // [CIRCLED LATIN CAPITAL LETTER L] case 0x2C60: // [LATIN CAPITAL LETTER L WITH DOUBLE BAR] case 0x2C62: // [LATIN CAPITAL LETTER L WITH MIDDLE TILDE] case 0xA746: // [LATIN CAPITAL LETTER BROKEN L] case 0xA748: // [LATIN CAPITAL LETTER L WITH HIGH STROKE] case 0xA780: // [LATIN CAPITAL LETTER TURNED L] case 0xFF2C: // [FULLWIDTH LATIN CAPITAL LETTER L] output[outputPos++] = L'L'; break; case 0x013A: // [LATIN SMALL LETTER L WITH ACUTE] case 0x013C: // [LATIN SMALL LETTER L WITH CEDILLA] case 0x013E: // [LATIN SMALL LETTER L WITH CARON] case 0x0140: // [LATIN SMALL LETTER L WITH MIDDLE DOT] case 0x0142: // [LATIN SMALL LETTER L WITH STROKE] case 0x019A: // [LATIN SMALL LETTER L WITH BAR] case 0x0234: // [LATIN SMALL LETTER L WITH CURL] case 0x026B: // [LATIN SMALL LETTER L WITH MIDDLE TILDE] case 0x026C: // [LATIN SMALL LETTER L WITH BELT] case 0x026D: // [LATIN SMALL LETTER L WITH RETROFLEX HOOK] case 0x1D85: // [LATIN SMALL LETTER L WITH PALATAL HOOK] case 0x1E37: // [LATIN SMALL LETTER L WITH DOT BELOW] case 0x1E39: // [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON] case 0x1E3B: // [LATIN SMALL LETTER L WITH LINE BELOW] case 0x1E3D: // [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW] case 0x24DB: // [CIRCLED LATIN SMALL LETTER L] case 0x2C61: // [LATIN SMALL LETTER L WITH DOUBLE BAR] case 0xA747: // [LATIN SMALL LETTER BROKEN L] case 0xA749: // [LATIN SMALL LETTER L WITH HIGH STROKE] case 0xA781: // [LATIN SMALL LETTER TURNED L] case 0xFF4C: // [FULLWIDTH LATIN SMALL LETTER L] output[outputPos++] = L'l'; break; case 0x01C7: // [LATIN CAPITAL LETTER LJ] output[outputPos++] = L'L'; output[outputPos++] = L'J'; break; case 0x1EFA: // [LATIN CAPITAL LETTER MIDDLE-WELSH LL] output[outputPos++] = L'L'; output[outputPos++] = L'L'; break; case 0x01C8: // [LATIN CAPITAL LETTER L WITH SMALL LETTER J] output[outputPos++] = L'L'; output[outputPos++] = L'j'; break; case 0x24A7: // [PARENTHESIZED LATIN SMALL LETTER L] output[outputPos++] = L'('; output[outputPos++] = L'l'; output[outputPos++] = L')'; break; case 0x01C9: // [LATIN SMALL LETTER LJ] output[outputPos++] = L'l'; output[outputPos++] = L'j'; break; case 0x1EFB: // [LATIN SMALL LETTER MIDDLE-WELSH LL] output[outputPos++] = L'l'; output[outputPos++] = L'l'; break; case 0x02AA: // [LATIN SMALL LETTER LS DIGRAPH] output[outputPos++] = L'l'; output[outputPos++] = L's'; break; case 0x02AB: // [LATIN SMALL LETTER LZ DIGRAPH] output[outputPos++] = L'l'; output[outputPos++] = L'z'; break; case 0x019C: // [LATIN CAPITAL LETTER TURNED M] case 0x1D0D: // [LATIN LETTER SMALL CAPITAL M] case 0x1E3E: // [LATIN CAPITAL LETTER M WITH ACUTE] case 0x1E40: // [LATIN CAPITAL LETTER M WITH DOT ABOVE] case 0x1E42: // [LATIN CAPITAL LETTER M WITH DOT BELOW] case 0x24C2: // [CIRCLED LATIN CAPITAL LETTER M] case 0x2C6E: // [LATIN CAPITAL LETTER M WITH HOOK] case 0xA7FD: // [LATIN EPIGRAPHIC LETTER INVERTED M] case 0xA7FF: // [LATIN EPIGRAPHIC LETTER ARCHAIC M] case 0xFF2D: // [FULLWIDTH LATIN CAPITAL LETTER M] output[outputPos++] = L'M'; break; case 0x026F: // [LATIN SMALL LETTER TURNED M] case 0x0270: // [LATIN SMALL LETTER TURNED M WITH LONG LEG] case 0x0271: // [LATIN SMALL LETTER M WITH HOOK] case 0x1D6F: // [LATIN SMALL LETTER M WITH MIDDLE TILDE] case 0x1D86: // [LATIN SMALL LETTER M WITH PALATAL HOOK] case 0x1E3F: // [LATIN SMALL LETTER M WITH ACUTE] case 0x1E41: // [LATIN SMALL LETTER M WITH DOT ABOVE] case 0x1E43: // [LATIN SMALL LETTER M WITH DOT BELOW] case 0x24DC: // [CIRCLED LATIN SMALL LETTER M] case 0xFF4D: // [FULLWIDTH LATIN SMALL LETTER M] output[outputPos++] = L'm'; break; case 0x24A8: // [PARENTHESIZED LATIN SMALL LETTER M] output[outputPos++] = L'('; output[outputPos++] = L'm'; output[outputPos++] = L')'; break; case 0x00D1: // [LATIN CAPITAL LETTER N WITH TILDE] case 0x0143: // [LATIN CAPITAL LETTER N WITH ACUTE] case 0x0145: // [LATIN CAPITAL LETTER N WITH CEDILLA] case 0x0147: // [LATIN CAPITAL LETTER N WITH CARON] case 0x014A: // [LATIN CAPITAL LETTER ENG] case 0x019D: // [LATIN CAPITAL LETTER N WITH LEFT HOOK] case 0x01F8: // [LATIN CAPITAL LETTER N WITH GRAVE] case 0x0220: // [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG] case 0x0274: // [LATIN LETTER SMALL CAPITAL N] case 0x1D0E: // [LATIN LETTER SMALL CAPITAL REVERSED N] case 0x1E44: // [LATIN CAPITAL LETTER N WITH DOT ABOVE] case 0x1E46: // [LATIN CAPITAL LETTER N WITH DOT BELOW] case 0x1E48: // [LATIN CAPITAL LETTER N WITH LINE BELOW] case 0x1E4A: // [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW] case 0x24C3: // [CIRCLED LATIN CAPITAL LETTER N] case 0xFF2E: // [FULLWIDTH LATIN CAPITAL LETTER N] output[outputPos++] = L'N'; break; case 0x00F1: // [LATIN SMALL LETTER N WITH TILDE] case 0x0144: // [LATIN SMALL LETTER N WITH ACUTE] case 0x0146: // [LATIN SMALL LETTER N WITH CEDILLA] case 0x0148: // [LATIN SMALL LETTER N WITH CARON] case 0x0149: // [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE] case 0x014B: // [LATIN SMALL LETTER ENG] case 0x019E: // [LATIN SMALL LETTER N WITH LONG RIGHT LEG] case 0x01F9: // [LATIN SMALL LETTER N WITH GRAVE] case 0x0235: // [LATIN SMALL LETTER N WITH CURL] case 0x0272: // [LATIN SMALL LETTER N WITH LEFT HOOK] case 0x0273: // [LATIN SMALL LETTER N WITH RETROFLEX HOOK] case 0x1D70: // [LATIN SMALL LETTER N WITH MIDDLE TILDE] case 0x1D87: // [LATIN SMALL LETTER N WITH PALATAL HOOK] case 0x1E45: // [LATIN SMALL LETTER N WITH DOT ABOVE] case 0x1E47: // [LATIN SMALL LETTER N WITH DOT BELOW] case 0x1E49: // [LATIN SMALL LETTER N WITH LINE BELOW] case 0x1E4B: // [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW] case 0x207F: // [SUPERSCRIPT LATIN SMALL LETTER N] case 0x24DD: // [CIRCLED LATIN SMALL LETTER N] case 0xFF4E: // [FULLWIDTH LATIN SMALL LETTER N] output[outputPos++] = L'n'; break; case 0x01CA: // [LATIN CAPITAL LETTER NJ] output[outputPos++] = L'N'; output[outputPos++] = L'J'; break; case 0x01CB: // [LATIN CAPITAL LETTER N WITH SMALL LETTER J] output[outputPos++] = L'N'; output[outputPos++] = L'j'; break; case 0x24A9: // [PARENTHESIZED LATIN SMALL LETTER N] output[outputPos++] = L'('; output[outputPos++] = L'n'; output[outputPos++] = L')'; break; case 0x01CC: // [LATIN SMALL LETTER NJ] output[outputPos++] = L'n'; output[outputPos++] = L'j'; break; case 0x00D2: // [LATIN CAPITAL LETTER O WITH GRAVE] case 0x00D3: // [LATIN CAPITAL LETTER O WITH ACUTE] case 0x00D4: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX] case 0x00D5: // [LATIN CAPITAL LETTER O WITH TILDE] case 0x00D6: // [LATIN CAPITAL LETTER O WITH DIAERESIS] case 0x00D8: // [LATIN CAPITAL LETTER O WITH STROKE] case 0x014C: // [LATIN CAPITAL LETTER O WITH MACRON] case 0x014E: // [LATIN CAPITAL LETTER O WITH BREVE] case 0x0150: // [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE] case 0x0186: // [LATIN CAPITAL LETTER OPEN O] case 0x019F: // [LATIN CAPITAL LETTER O WITH MIDDLE TILDE] case 0x01A0: // [LATIN CAPITAL LETTER O WITH HORN] case 0x01D1: // [LATIN CAPITAL LETTER O WITH CARON] case 0x01EA: // [LATIN CAPITAL LETTER O WITH OGONEK] case 0x01EC: // [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON] case 0x01FE: // [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE] case 0x020C: // [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE] case 0x020E: // [LATIN CAPITAL LETTER O WITH INVERTED BREVE] case 0x022A: // [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON] case 0x022C: // [LATIN CAPITAL LETTER O WITH TILDE AND MACRON] case 0x022E: // [LATIN CAPITAL LETTER O WITH DOT ABOVE] case 0x0230: // [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON] case 0x1D0F: // [LATIN LETTER SMALL CAPITAL O] case 0x1D10: // [LATIN LETTER SMALL CAPITAL OPEN O] case 0x1E4C: // [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE] case 0x1E4E: // [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS] case 0x1E50: // [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE] case 0x1E52: // [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE] case 0x1ECC: // [LATIN CAPITAL LETTER O WITH DOT BELOW] case 0x1ECE: // [LATIN CAPITAL LETTER O WITH HOOK ABOVE] case 0x1ED0: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE] case 0x1ED2: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE] case 0x1ED4: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] case 0x1ED6: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE] case 0x1ED8: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW] case 0x1EDA: // [LATIN CAPITAL LETTER O WITH HORN AND ACUTE] case 0x1EDC: // [LATIN CAPITAL LETTER O WITH HORN AND GRAVE] case 0x1EDE: // [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE] case 0x1EE0: // [LATIN CAPITAL LETTER O WITH HORN AND TILDE] case 0x1EE2: // [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW] case 0x24C4: // [CIRCLED LATIN CAPITAL LETTER O] case 0xA74A: // [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY] case 0xA74C: // [LATIN CAPITAL LETTER O WITH LOOP] case 0xFF2F: // [FULLWIDTH LATIN CAPITAL LETTER O] output[outputPos++] = L'O'; break; case 0x00F2: // [LATIN SMALL LETTER O WITH GRAVE] case 0x00F3: // [LATIN SMALL LETTER O WITH ACUTE] case 0x00F4: // [LATIN SMALL LETTER O WITH CIRCUMFLEX] case 0x00F5: // [LATIN SMALL LETTER O WITH TILDE] case 0x00F6: // [LATIN SMALL LETTER O WITH DIAERESIS] case 0x00F8: // [LATIN SMALL LETTER O WITH STROKE] case 0x014D: // [LATIN SMALL LETTER O WITH MACRON] case 0x014F: // [LATIN SMALL LETTER O WITH BREVE] case 0x0151: // [LATIN SMALL LETTER O WITH DOUBLE ACUTE] case 0x01A1: // [LATIN SMALL LETTER O WITH HORN] case 0x01D2: // [LATIN SMALL LETTER O WITH CARON] case 0x01EB: // [LATIN SMALL LETTER O WITH OGONEK] case 0x01ED: // [LATIN SMALL LETTER O WITH OGONEK AND MACRON] case 0x01FF: // [LATIN SMALL LETTER O WITH STROKE AND ACUTE] case 0x020D: // [LATIN SMALL LETTER O WITH DOUBLE GRAVE] case 0x020F: // [LATIN SMALL LETTER O WITH INVERTED BREVE] case 0x022B: // [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON] case 0x022D: // [LATIN SMALL LETTER O WITH TILDE AND MACRON] case 0x022F: // [LATIN SMALL LETTER O WITH DOT ABOVE] case 0x0231: // [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON] case 0x0254: // [LATIN SMALL LETTER OPEN O] case 0x0275: // [LATIN SMALL LETTER BARRED O] case 0x1D16: // [LATIN SMALL LETTER TOP HALF O] case 0x1D17: // [LATIN SMALL LETTER BOTTOM HALF O] case 0x1D97: // [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK] case 0x1E4D: // [LATIN SMALL LETTER O WITH TILDE AND ACUTE] case 0x1E4F: // [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS] case 0x1E51: // [LATIN SMALL LETTER O WITH MACRON AND GRAVE] case 0x1E53: // [LATIN SMALL LETTER O WITH MACRON AND ACUTE] case 0x1ECD: // [LATIN SMALL LETTER O WITH DOT BELOW] case 0x1ECF: // [LATIN SMALL LETTER O WITH HOOK ABOVE] case 0x1ED1: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE] case 0x1ED3: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE] case 0x1ED5: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] case 0x1ED7: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE] case 0x1ED9: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW] case 0x1EDB: // [LATIN SMALL LETTER O WITH HORN AND ACUTE] case 0x1EDD: // [LATIN SMALL LETTER O WITH HORN AND GRAVE] case 0x1EDF: // [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE] case 0x1EE1: // [LATIN SMALL LETTER O WITH HORN AND TILDE] case 0x1EE3: // [LATIN SMALL LETTER O WITH HORN AND DOT BELOW] case 0x2092: // [LATIN SUBSCRIPT SMALL LETTER O] case 0x24DE: // [CIRCLED LATIN SMALL LETTER O] case 0x2C7A: // [LATIN SMALL LETTER O WITH LOW RING INSIDE] case 0xA74B: // [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY] case 0xA74D: // [LATIN SMALL LETTER O WITH LOOP] case 0xFF4F: // [FULLWIDTH LATIN SMALL LETTER O] output[outputPos++] = L'o'; break; case 0x0152: // [LATIN CAPITAL LIGATURE OE] case 0x0276: // [LATIN LETTER SMALL CAPITAL OE] output[outputPos++] = L'O'; output[outputPos++] = L'E'; break; case 0xA74E: // [LATIN CAPITAL LETTER OO] output[outputPos++] = L'O'; output[outputPos++] = L'O'; break; case 0x0222: // [LATIN CAPITAL LETTER OU] case 0x1D15: // [LATIN LETTER SMALL CAPITAL OU] output[outputPos++] = L'O'; output[outputPos++] = L'U'; break; case 0x24AA: // [PARENTHESIZED LATIN SMALL LETTER O] output[outputPos++] = L'('; output[outputPos++] = L'o'; output[outputPos++] = L')'; break; case 0x0153: // [LATIN SMALL LIGATURE OE] case 0x1D14: // [LATIN SMALL LETTER TURNED OE] output[outputPos++] = L'o'; output[outputPos++] = L'e'; break; case 0xA74F: // [LATIN SMALL LETTER OO] output[outputPos++] = L'o'; output[outputPos++] = L'o'; break; case 0x0223: // [LATIN SMALL LETTER OU] output[outputPos++] = L'o'; output[outputPos++] = L'u'; break; case 0x01A4: // [LATIN CAPITAL LETTER P WITH HOOK] case 0x1D18: // [LATIN LETTER SMALL CAPITAL P] case 0x1E54: // [LATIN CAPITAL LETTER P WITH ACUTE] case 0x1E56: // [LATIN CAPITAL LETTER P WITH DOT ABOVE] case 0x24C5: // [CIRCLED LATIN CAPITAL LETTER P] case 0x2C63: // [LATIN CAPITAL LETTER P WITH STROKE] case 0xA750: // [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER] case 0xA752: // [LATIN CAPITAL LETTER P WITH FLOURISH] case 0xA754: // [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL] case 0xFF30: // [FULLWIDTH LATIN CAPITAL LETTER P] output[outputPos++] = L'P'; break; case 0x01A5: // [LATIN SMALL LETTER P WITH HOOK] case 0x1D71: // [LATIN SMALL LETTER P WITH MIDDLE TILDE] case 0x1D7D: // [LATIN SMALL LETTER P WITH STROKE] case 0x1D88: // [LATIN SMALL LETTER P WITH PALATAL HOOK] case 0x1E55: // [LATIN SMALL LETTER P WITH ACUTE] case 0x1E57: // [LATIN SMALL LETTER P WITH DOT ABOVE] case 0x24DF: // [CIRCLED LATIN SMALL LETTER P] case 0xA751: // [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER] case 0xA753: // [LATIN SMALL LETTER P WITH FLOURISH] case 0xA755: // [LATIN SMALL LETTER P WITH SQUIRREL TAIL] case 0xA7FC: // [LATIN EPIGRAPHIC LETTER REVERSED P] case 0xFF50: // [FULLWIDTH LATIN SMALL LETTER P] output[outputPos++] = L'p'; break; case 0x24AB: // [PARENTHESIZED LATIN SMALL LETTER P] output[outputPos++] = L'('; output[outputPos++] = L'p'; output[outputPos++] = L')'; break; case 0x024A: // [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL] case 0x24C6: // [CIRCLED LATIN CAPITAL LETTER Q] case 0xA756: // [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER] case 0xA758: // [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE] case 0xFF31: // [FULLWIDTH LATIN CAPITAL LETTER Q] output[outputPos++] = L'Q'; break; case 0x0138: // [LATIN SMALL LETTER KRA] case 0x024B: // [LATIN SMALL LETTER Q WITH HOOK TAIL] case 0x02A0: // [LATIN SMALL LETTER Q WITH HOOK] case 0x24E0: // [CIRCLED LATIN SMALL LETTER Q] case 0xA757: // [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER] case 0xA759: // [LATIN SMALL LETTER Q WITH DIAGONAL STROKE] case 0xFF51: // [FULLWIDTH LATIN SMALL LETTER Q] output[outputPos++] = L'q'; break; case 0x24AC: // [PARENTHESIZED LATIN SMALL LETTER Q] output[outputPos++] = L'('; output[outputPos++] = L'q'; output[outputPos++] = L')'; break; case 0x0239: // [LATIN SMALL LETTER QP DIGRAPH] output[outputPos++] = L'q'; output[outputPos++] = L'p'; break; case 0x0154: // [LATIN CAPITAL LETTER R WITH ACUTE] case 0x0156: // [LATIN CAPITAL LETTER R WITH CEDILLA] case 0x0158: // [LATIN CAPITAL LETTER R WITH CARON] case 0x0210: // [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE] case 0x0212: // [LATIN CAPITAL LETTER R WITH INVERTED BREVE] case 0x024C: // [LATIN CAPITAL LETTER R WITH STROKE] case 0x0280: // [LATIN LETTER SMALL CAPITAL R] case 0x0281: // [LATIN LETTER SMALL CAPITAL INVERTED R] case 0x1D19: // [LATIN LETTER SMALL CAPITAL REVERSED R] case 0x1D1A: // [LATIN LETTER SMALL CAPITAL TURNED R] case 0x1E58: // [LATIN CAPITAL LETTER R WITH DOT ABOVE] case 0x1E5A: // [LATIN CAPITAL LETTER R WITH DOT BELOW] case 0x1E5C: // [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON] case 0x1E5E: // [LATIN CAPITAL LETTER R WITH LINE BELOW] case 0x24C7: // [CIRCLED LATIN CAPITAL LETTER R] case 0x2C64: // [LATIN CAPITAL LETTER R WITH TAIL] case 0xA75A: // [LATIN CAPITAL LETTER R ROTUNDA] case 0xA782: // [LATIN CAPITAL LETTER INSULAR R] case 0xFF32: // [FULLWIDTH LATIN CAPITAL LETTER R] output[outputPos++] = L'R'; break; case 0x0155: // [LATIN SMALL LETTER R WITH ACUTE] case 0x0157: // [LATIN SMALL LETTER R WITH CEDILLA] case 0x0159: // [LATIN SMALL LETTER R WITH CARON] case 0x0211: // [LATIN SMALL LETTER R WITH DOUBLE GRAVE] case 0x0213: // [LATIN SMALL LETTER R WITH INVERTED BREVE] case 0x024D: // [LATIN SMALL LETTER R WITH STROKE] case 0x027C: // [LATIN SMALL LETTER R WITH LONG LEG] case 0x027D: // [LATIN SMALL LETTER R WITH TAIL] case 0x027E: // [LATIN SMALL LETTER R WITH FISHHOOK] case 0x027F: // [LATIN SMALL LETTER REVERSED R WITH FISHHOOK] case 0x1D63: // [LATIN SUBSCRIPT SMALL LETTER R] case 0x1D72: // [LATIN SMALL LETTER R WITH MIDDLE TILDE] case 0x1D73: // [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE] case 0x1D89: // [LATIN SMALL LETTER R WITH PALATAL HOOK] case 0x1E59: // [LATIN SMALL LETTER R WITH DOT ABOVE] case 0x1E5B: // [LATIN SMALL LETTER R WITH DOT BELOW] case 0x1E5D: // [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON] case 0x1E5F: // [LATIN SMALL LETTER R WITH LINE BELOW] case 0x24E1: // [CIRCLED LATIN SMALL LETTER R] case 0xA75B: // [LATIN SMALL LETTER R ROTUNDA] case 0xA783: // [LATIN SMALL LETTER INSULAR R] case 0xFF52: // [FULLWIDTH LATIN SMALL LETTER R] output[outputPos++] = L'r'; break; case 0x24AD: // [PARENTHESIZED LATIN SMALL LETTER R] output[outputPos++] = L'('; output[outputPos++] = L'r'; output[outputPos++] = L')'; break; case 0x015A: // [LATIN CAPITAL LETTER S WITH ACUTE] case 0x015C: // [LATIN CAPITAL LETTER S WITH CIRCUMFLEX] case 0x015E: // [LATIN CAPITAL LETTER S WITH CEDILLA] case 0x0160: // [LATIN CAPITAL LETTER S WITH CARON] case 0x0218: // [LATIN CAPITAL LETTER S WITH COMMA BELOW] case 0x1E60: // [LATIN CAPITAL LETTER S WITH DOT ABOVE] case 0x1E62: // [LATIN CAPITAL LETTER S WITH DOT BELOW] case 0x1E64: // [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE] case 0x1E66: // [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE] case 0x1E68: // [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE] case 0x24C8: // [CIRCLED LATIN CAPITAL LETTER S] case 0xA731: // [LATIN LETTER SMALL CAPITAL S] case 0xA785: // [LATIN SMALL LETTER INSULAR S] case 0xFF33: // [FULLWIDTH LATIN CAPITAL LETTER S] output[outputPos++] = L'S'; break; case 0x015B: // [LATIN SMALL LETTER S WITH ACUTE] case 0x015D: // [LATIN SMALL LETTER S WITH CIRCUMFLEX] case 0x015F: // [LATIN SMALL LETTER S WITH CEDILLA] case 0x0161: // [LATIN SMALL LETTER S WITH CARON] case 0x017F: // [LATIN SMALL LETTER LONG S] case 0x0219: // [LATIN SMALL LETTER S WITH COMMA BELOW] case 0x023F: // [LATIN SMALL LETTER S WITH SWASH TAIL] case 0x0282: // [LATIN SMALL LETTER S WITH HOOK] case 0x1D74: // [LATIN SMALL LETTER S WITH MIDDLE TILDE] case 0x1D8A: // [LATIN SMALL LETTER S WITH PALATAL HOOK] case 0x1E61: // [LATIN SMALL LETTER S WITH DOT ABOVE] case 0x1E63: // [LATIN SMALL LETTER S WITH DOT BELOW] case 0x1E65: // [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE] case 0x1E67: // [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE] case 0x1E69: // [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE] case 0x1E9C: // [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE] case 0x1E9D: // [LATIN SMALL LETTER LONG S WITH HIGH STROKE] case 0x24E2: // [CIRCLED LATIN SMALL LETTER S] case 0xA784: // [LATIN CAPITAL LETTER INSULAR S] case 0xFF53: // [FULLWIDTH LATIN SMALL LETTER S] output[outputPos++] = L's'; break; case 0x1E9E: // [LATIN CAPITAL LETTER SHARP S] output[outputPos++] = L'S'; output[outputPos++] = L'S'; break; case 0x24AE: // [PARENTHESIZED LATIN SMALL LETTER S] output[outputPos++] = L'('; output[outputPos++] = L's'; output[outputPos++] = L')'; break; case 0x00DF: // [LATIN SMALL LETTER SHARP S] output[outputPos++] = L's'; output[outputPos++] = L's'; break; case 0xFB06: // [LATIN SMALL LIGATURE ST] output[outputPos++] = L's'; output[outputPos++] = L't'; break; case 0x0162: // [LATIN CAPITAL LETTER T WITH CEDILLA] case 0x0164: // [LATIN CAPITAL LETTER T WITH CARON] case 0x0166: // [LATIN CAPITAL LETTER T WITH STROKE] case 0x01AC: // [LATIN CAPITAL LETTER T WITH HOOK] case 0x01AE: // [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK] case 0x021A: // [LATIN CAPITAL LETTER T WITH COMMA BELOW] case 0x023E: // [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE] case 0x1D1B: // [LATIN LETTER SMALL CAPITAL T] case 0x1E6A: // [LATIN CAPITAL LETTER T WITH DOT ABOVE] case 0x1E6C: // [LATIN CAPITAL LETTER T WITH DOT BELOW] case 0x1E6E: // [LATIN CAPITAL LETTER T WITH LINE BELOW] case 0x1E70: // [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW] case 0x24C9: // [CIRCLED LATIN CAPITAL LETTER T] case 0xA786: // [LATIN CAPITAL LETTER INSULAR T] case 0xFF34: // [FULLWIDTH LATIN CAPITAL LETTER T] output[outputPos++] = L'T'; break; case 0x0163: // [LATIN SMALL LETTER T WITH CEDILLA] case 0x0165: // [LATIN SMALL LETTER T WITH CARON] case 0x0167: // [LATIN SMALL LETTER T WITH STROKE] case 0x01AB: // [LATIN SMALL LETTER T WITH PALATAL HOOK] case 0x01AD: // [LATIN SMALL LETTER T WITH HOOK] case 0x021B: // [LATIN SMALL LETTER T WITH COMMA BELOW] case 0x0236: // [LATIN SMALL LETTER T WITH CURL] case 0x0287: // [LATIN SMALL LETTER TURNED T] case 0x0288: // [LATIN SMALL LETTER T WITH RETROFLEX HOOK] case 0x1D75: // [LATIN SMALL LETTER T WITH MIDDLE TILDE] case 0x1E6B: // [LATIN SMALL LETTER T WITH DOT ABOVE] case 0x1E6D: // [LATIN SMALL LETTER T WITH DOT BELOW] case 0x1E6F: // [LATIN SMALL LETTER T WITH LINE BELOW] case 0x1E71: // [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW] case 0x1E97: // [LATIN SMALL LETTER T WITH DIAERESIS] case 0x24E3: // [CIRCLED LATIN SMALL LETTER T] case 0x2C66: // [LATIN SMALL LETTER T WITH DIAGONAL STROKE] case 0xFF54: // [FULLWIDTH LATIN SMALL LETTER T] output[outputPos++] = L't'; break; case 0x00DE: // [LATIN CAPITAL LETTER THORN] case 0xA766: // [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER] output[outputPos++] = L'T'; output[outputPos++] = L'H'; break; case 0xA728: // [LATIN CAPITAL LETTER TZ] output[outputPos++] = L'T'; output[outputPos++] = L'Z'; break; case 0x24AF: // [PARENTHESIZED LATIN SMALL LETTER T] output[outputPos++] = L'('; output[outputPos++] = L't'; output[outputPos++] = L')'; break; case 0x02A8: // [LATIN SMALL LETTER TC DIGRAPH WITH CURL] output[outputPos++] = L't'; output[outputPos++] = L'c'; break; case 0x00FE: // [LATIN SMALL LETTER THORN] case 0x1D7A: // [LATIN SMALL LETTER TH WITH STRIKETHROUGH] case 0xA767: // [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER] output[outputPos++] = L't'; output[outputPos++] = L'h'; break; case 0x02A6: // [LATIN SMALL LETTER TS DIGRAPH] output[outputPos++] = L't'; output[outputPos++] = L's'; break; case 0xA729: // [LATIN SMALL LETTER TZ] output[outputPos++] = L't'; output[outputPos++] = L'z'; break; case 0x00D9: // [LATIN CAPITAL LETTER U WITH GRAVE] case 0x00DA: // [LATIN CAPITAL LETTER U WITH ACUTE] case 0x00DB: // [LATIN CAPITAL LETTER U WITH CIRCUMFLEX] case 0x00DC: // [LATIN CAPITAL LETTER U WITH DIAERESIS] case 0x0168: // [LATIN CAPITAL LETTER U WITH TILDE] case 0x016A: // [LATIN CAPITAL LETTER U WITH MACRON] case 0x016C: // [LATIN CAPITAL LETTER U WITH BREVE] case 0x016E: // [LATIN CAPITAL LETTER U WITH RING ABOVE] case 0x0170: // [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE] case 0x0172: // [LATIN CAPITAL LETTER U WITH OGONEK] case 0x01AF: // [LATIN CAPITAL LETTER U WITH HORN] case 0x01D3: // [LATIN CAPITAL LETTER U WITH CARON] case 0x01D5: // [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON] case 0x01D7: // [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE] case 0x01D9: // [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON] case 0x01DB: // [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE] case 0x0214: // [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE] case 0x0216: // [LATIN CAPITAL LETTER U WITH INVERTED BREVE] case 0x0244: // [LATIN CAPITAL LETTER U BAR] case 0x1D1C: // [LATIN LETTER SMALL CAPITAL U] case 0x1D7E: // [LATIN SMALL CAPITAL LETTER U WITH STROKE] case 0x1E72: // [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW] case 0x1E74: // [LATIN CAPITAL LETTER U WITH TILDE BELOW] case 0x1E76: // [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW] case 0x1E78: // [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE] case 0x1E7A: // [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS] case 0x1EE4: // [LATIN CAPITAL LETTER U WITH DOT BELOW] case 0x1EE6: // [LATIN CAPITAL LETTER U WITH HOOK ABOVE] case 0x1EE8: // [LATIN CAPITAL LETTER U WITH HORN AND ACUTE] case 0x1EEA: // [LATIN CAPITAL LETTER U WITH HORN AND GRAVE] case 0x1EEC: // [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE] case 0x1EEE: // [LATIN CAPITAL LETTER U WITH HORN AND TILDE] case 0x1EF0: // [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW] case 0x24CA: // [CIRCLED LATIN CAPITAL LETTER U] case 0xFF35: // [FULLWIDTH LATIN CAPITAL LETTER U] output[outputPos++] = L'U'; break; case 0x00F9: // [LATIN SMALL LETTER U WITH GRAVE] case 0x00FA: // [LATIN SMALL LETTER U WITH ACUTE] case 0x00FB: // [LATIN SMALL LETTER U WITH CIRCUMFLEX] case 0x00FC: // [LATIN SMALL LETTER U WITH DIAERESIS] case 0x0169: // [LATIN SMALL LETTER U WITH TILDE] case 0x016B: // [LATIN SMALL LETTER U WITH MACRON] case 0x016D: // [LATIN SMALL LETTER U WITH BREVE] case 0x016F: // [LATIN SMALL LETTER U WITH RING ABOVE] case 0x0171: // [LATIN SMALL LETTER U WITH DOUBLE ACUTE] case 0x0173: // [LATIN SMALL LETTER U WITH OGONEK] case 0x01B0: // [LATIN SMALL LETTER U WITH HORN] case 0x01D4: // [LATIN SMALL LETTER U WITH CARON] case 0x01D6: // [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON] case 0x01D8: // [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE] case 0x01DA: // [LATIN SMALL LETTER U WITH DIAERESIS AND CARON] case 0x01DC: // [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE] case 0x0215: // [LATIN SMALL LETTER U WITH DOUBLE GRAVE] case 0x0217: // [LATIN SMALL LETTER U WITH INVERTED BREVE] case 0x0289: // [LATIN SMALL LETTER U BAR] case 0x1D64: // [LATIN SUBSCRIPT SMALL LETTER U] case 0x1D99: // [LATIN SMALL LETTER U WITH RETROFLEX HOOK] case 0x1E73: // [LATIN SMALL LETTER U WITH DIAERESIS BELOW] case 0x1E75: // [LATIN SMALL LETTER U WITH TILDE BELOW] case 0x1E77: // [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW] case 0x1E79: // [LATIN SMALL LETTER U WITH TILDE AND ACUTE] case 0x1E7B: // [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS] case 0x1EE5: // [LATIN SMALL LETTER U WITH DOT BELOW] case 0x1EE7: // [LATIN SMALL LETTER U WITH HOOK ABOVE] case 0x1EE9: // [LATIN SMALL LETTER U WITH HORN AND ACUTE] case 0x1EEB: // [LATIN SMALL LETTER U WITH HORN AND GRAVE] case 0x1EED: // [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE] case 0x1EEF: // [LATIN SMALL LETTER U WITH HORN AND TILDE] case 0x1EF1: // [LATIN SMALL LETTER U WITH HORN AND DOT BELOW] case 0x24E4: // [CIRCLED LATIN SMALL LETTER U] case 0xFF55: // [FULLWIDTH LATIN SMALL LETTER U] output[outputPos++] = L'u'; break; case 0x24B0: // [PARENTHESIZED LATIN SMALL LETTER U] output[outputPos++] = L'('; output[outputPos++] = L'u'; output[outputPos++] = L')'; break; case 0x1D6B: // [LATIN SMALL LETTER UE] output[outputPos++] = L'u'; output[outputPos++] = L'e'; break; case 0x01B2: // [LATIN CAPITAL LETTER V WITH HOOK] case 0x0245: // [LATIN CAPITAL LETTER TURNED V] case 0x1D20: // [LATIN LETTER SMALL CAPITAL V] case 0x1E7C: // [LATIN CAPITAL LETTER V WITH TILDE] case 0x1E7E: // [LATIN CAPITAL LETTER V WITH DOT BELOW] case 0x1EFC: // [LATIN CAPITAL LETTER MIDDLE-WELSH V] case 0x24CB: // [CIRCLED LATIN CAPITAL LETTER V] case 0xA75E: // [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE] case 0xA768: // [LATIN CAPITAL LETTER VEND] case 0xFF36: // [FULLWIDTH LATIN CAPITAL LETTER V] output[outputPos++] = L'V'; break; case 0x028B: // [LATIN SMALL LETTER V WITH HOOK] case 0x028C: // [LATIN SMALL LETTER TURNED V] case 0x1D65: // [LATIN SUBSCRIPT SMALL LETTER V] case 0x1D8C: // [LATIN SMALL LETTER V WITH PALATAL HOOK] case 0x1E7D: // [LATIN SMALL LETTER V WITH TILDE] case 0x1E7F: // [LATIN SMALL LETTER V WITH DOT BELOW] case 0x24E5: // [CIRCLED LATIN SMALL LETTER V] case 0x2C71: // [LATIN SMALL LETTER V WITH RIGHT HOOK] case 0x2C74: // [LATIN SMALL LETTER V WITH CURL] case 0xA75F: // [LATIN SMALL LETTER V WITH DIAGONAL STROKE] case 0xFF56: // [FULLWIDTH LATIN SMALL LETTER V] output[outputPos++] = L'v'; break; case 0xA760: // [LATIN CAPITAL LETTER VY] output[outputPos++] = L'V'; output[outputPos++] = L'Y'; break; case 0x24B1: // [PARENTHESIZED LATIN SMALL LETTER V] output[outputPos++] = L'('; output[outputPos++] = L'v'; output[outputPos++] = L')'; break; case 0xA761: // [LATIN SMALL LETTER VY] output[outputPos++] = L'v'; output[outputPos++] = L'y'; break; case 0x0174: // [LATIN CAPITAL LETTER W WITH CIRCUMFLEX] case 0x01F7: // [LATIN CAPITAL LETTER WYNN] case 0x1D21: // [LATIN LETTER SMALL CAPITAL W] case 0x1E80: // [LATIN CAPITAL LETTER W WITH GRAVE] case 0x1E82: // [LATIN CAPITAL LETTER W WITH ACUTE] case 0x1E84: // [LATIN CAPITAL LETTER W WITH DIAERESIS] case 0x1E86: // [LATIN CAPITAL LETTER W WITH DOT ABOVE] case 0x1E88: // [LATIN CAPITAL LETTER W WITH DOT BELOW] case 0x24CC: // [CIRCLED LATIN CAPITAL LETTER W] case 0x2C72: // [LATIN CAPITAL LETTER W WITH HOOK] case 0xFF37: // [FULLWIDTH LATIN CAPITAL LETTER W] output[outputPos++] = L'W'; break; case 0x0175: // [LATIN SMALL LETTER W WITH CIRCUMFLEX] case 0x01BF: // [LATIN LETTER WYNN] case 0x028D: // [LATIN SMALL LETTER TURNED W] case 0x1E81: // [LATIN SMALL LETTER W WITH GRAVE] case 0x1E83: // [LATIN SMALL LETTER W WITH ACUTE] case 0x1E85: // [LATIN SMALL LETTER W WITH DIAERESIS] case 0x1E87: // [LATIN SMALL LETTER W WITH DOT ABOVE] case 0x1E89: // [LATIN SMALL LETTER W WITH DOT BELOW] case 0x1E98: // [LATIN SMALL LETTER W WITH RING ABOVE] case 0x24E6: // [CIRCLED LATIN SMALL LETTER W] case 0x2C73: // [LATIN SMALL LETTER W WITH HOOK] case 0xFF57: // [FULLWIDTH LATIN SMALL LETTER W] output[outputPos++] = L'w'; break; case 0x24B2: // [PARENTHESIZED LATIN SMALL LETTER W] output[outputPos++] = L'('; output[outputPos++] = L'w'; output[outputPos++] = L')'; break; case 0x1E8A: // [LATIN CAPITAL LETTER X WITH DOT ABOVE] case 0x1E8C: // [LATIN CAPITAL LETTER X WITH DIAERESIS] case 0x24CD: // [CIRCLED LATIN CAPITAL LETTER X] case 0xFF38: // [FULLWIDTH LATIN CAPITAL LETTER X] output[outputPos++] = L'X'; break; case 0x1D8D: // [LATIN SMALL LETTER X WITH PALATAL HOOK] case 0x1E8B: // [LATIN SMALL LETTER X WITH DOT ABOVE] case 0x1E8D: // [LATIN SMALL LETTER X WITH DIAERESIS] case 0x2093: // [LATIN SUBSCRIPT SMALL LETTER X] case 0x24E7: // [CIRCLED LATIN SMALL LETTER X] case 0xFF58: // [FULLWIDTH LATIN SMALL LETTER X] output[outputPos++] = L'x'; break; case 0x24B3: // [PARENTHESIZED LATIN SMALL LETTER X] output[outputPos++] = L'('; output[outputPos++] = L'x'; output[outputPos++] = L')'; break; case 0x00DD: // [LATIN CAPITAL LETTER Y WITH ACUTE] case 0x0176: // [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX] case 0x0178: // [LATIN CAPITAL LETTER Y WITH DIAERESIS] case 0x01B3: // [LATIN CAPITAL LETTER Y WITH HOOK] case 0x0232: // [LATIN CAPITAL LETTER Y WITH MACRON] case 0x024E: // [LATIN CAPITAL LETTER Y WITH STROKE] case 0x028F: // [LATIN LETTER SMALL CAPITAL Y] case 0x1E8E: // [LATIN CAPITAL LETTER Y WITH DOT ABOVE] case 0x1EF2: // [LATIN CAPITAL LETTER Y WITH GRAVE] case 0x1EF4: // [LATIN CAPITAL LETTER Y WITH DOT BELOW] case 0x1EF6: // [LATIN CAPITAL LETTER Y WITH HOOK ABOVE] case 0x1EF8: // [LATIN CAPITAL LETTER Y WITH TILDE] case 0x1EFE: // [LATIN CAPITAL LETTER Y WITH LOOP] case 0x24CE: // [CIRCLED LATIN CAPITAL LETTER Y] case 0xFF39: // [FULLWIDTH LATIN CAPITAL LETTER Y] output[outputPos++] = L'Y'; break; case 0x00FD: // [LATIN SMALL LETTER Y WITH ACUTE] case 0x00FF: // [LATIN SMALL LETTER Y WITH DIAERESIS] case 0x0177: // [LATIN SMALL LETTER Y WITH CIRCUMFLEX] case 0x01B4: // [LATIN SMALL LETTER Y WITH HOOK] case 0x0233: // [LATIN SMALL LETTER Y WITH MACRON] case 0x024F: // [LATIN SMALL LETTER Y WITH STROKE] case 0x028E: // [LATIN SMALL LETTER TURNED Y] case 0x1E8F: // [LATIN SMALL LETTER Y WITH DOT ABOVE] case 0x1E99: // [LATIN SMALL LETTER Y WITH RING ABOVE] case 0x1EF3: // [LATIN SMALL LETTER Y WITH GRAVE] case 0x1EF5: // [LATIN SMALL LETTER Y WITH DOT BELOW] case 0x1EF7: // [LATIN SMALL LETTER Y WITH HOOK ABOVE] case 0x1EF9: // [LATIN SMALL LETTER Y WITH TILDE] case 0x1EFF: // [LATIN SMALL LETTER Y WITH LOOP] case 0x24E8: // [CIRCLED LATIN SMALL LETTER Y] case 0xFF59: // [FULLWIDTH LATIN SMALL LETTER Y] output[outputPos++] = L'y'; break; case 0x24B4: // [PARENTHESIZED LATIN SMALL LETTER Y] output[outputPos++] = L'('; output[outputPos++] = L'y'; output[outputPos++] = L')'; break; case 0x0179: // [LATIN CAPITAL LETTER Z WITH ACUTE] case 0x017B: // [LATIN CAPITAL LETTER Z WITH DOT ABOVE] case 0x017D: // [LATIN CAPITAL LETTER Z WITH CARON] case 0x01B5: // [LATIN CAPITAL LETTER Z WITH STROKE] case 0x021C: // [LATIN CAPITAL LETTER YOGH] case 0x0224: // [LATIN CAPITAL LETTER Z WITH HOOK] case 0x1D22: // [LATIN LETTER SMALL CAPITAL Z] case 0x1E90: // [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX] case 0x1E92: // [LATIN CAPITAL LETTER Z WITH DOT BELOW] case 0x1E94: // [LATIN CAPITAL LETTER Z WITH LINE BELOW] case 0x24CF: // [CIRCLED LATIN CAPITAL LETTER Z] case 0x2C6B: // [LATIN CAPITAL LETTER Z WITH DESCENDER] case 0xA762: // [LATIN CAPITAL LETTER VISIGOTHIC Z] case 0xFF3A: // [FULLWIDTH LATIN CAPITAL LETTER Z] output[outputPos++] = L'Z'; break; case 0x017A: // [LATIN SMALL LETTER Z WITH ACUTE] case 0x017C: // [LATIN SMALL LETTER Z WITH DOT ABOVE] case 0x017E: // [LATIN SMALL LETTER Z WITH CARON] case 0x01B6: // [LATIN SMALL LETTER Z WITH STROKE] case 0x021D: // [LATIN SMALL LETTER YOGH] case 0x0225: // [LATIN SMALL LETTER Z WITH HOOK] case 0x0240: // [LATIN SMALL LETTER Z WITH SWASH TAIL] case 0x0290: // [LATIN SMALL LETTER Z WITH RETROFLEX HOOK] case 0x0291: // [LATIN SMALL LETTER Z WITH CURL] case 0x1D76: // [LATIN SMALL LETTER Z WITH MIDDLE TILDE] case 0x1D8E: // [LATIN SMALL LETTER Z WITH PALATAL HOOK] case 0x1E91: // [LATIN SMALL LETTER Z WITH CIRCUMFLEX] case 0x1E93: // [LATIN SMALL LETTER Z WITH DOT BELOW] case 0x1E95: // [LATIN SMALL LETTER Z WITH LINE BELOW] case 0x24E9: // [CIRCLED LATIN SMALL LETTER Z] case 0x2C6C: // [LATIN SMALL LETTER Z WITH DESCENDER] case 0xA763: // [LATIN SMALL LETTER VISIGOTHIC Z] case 0xFF5A: // [FULLWIDTH LATIN SMALL LETTER Z] output[outputPos++] = L'z'; break; case 0x24B5: // [PARENTHESIZED LATIN SMALL LETTER Z] output[outputPos++] = L'('; output[outputPos++] = L'z'; output[outputPos++] = L')'; break; case 0x2070: // [SUPERSCRIPT ZERO] case 0x2080: // [SUBSCRIPT ZERO] case 0x24EA: // [CIRCLED DIGIT ZERO] case 0x24FF: // [NEGATIVE CIRCLED DIGIT ZERO] case 0xFF10: // [FULLWIDTH DIGIT ZERO] output[outputPos++] = L'0'; break; case 0x00B9: // [SUPERSCRIPT ONE] case 0x2081: // [SUBSCRIPT ONE] case 0x2460: // [CIRCLED DIGIT ONE] case 0x24F5: // [DOUBLE CIRCLED DIGIT ONE] case 0x2776: // [DINGBAT NEGATIVE CIRCLED DIGIT ONE] case 0x2780: // [DINGBAT CIRCLED SANS-SERIF DIGIT ONE] case 0x278A: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE] case 0xFF11: // [FULLWIDTH DIGIT ONE] output[outputPos++] = L'1'; break; case 0x2488: // [DIGIT ONE FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'.'; break; case 0x2474: // [PARENTHESIZED DIGIT ONE] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L')'; break; case 0x00B2: // [SUPERSCRIPT TWO] case 0x2082: // [SUBSCRIPT TWO] case 0x2461: // [CIRCLED DIGIT TWO] case 0x24F6: // [DOUBLE CIRCLED DIGIT TWO] case 0x2777: // [DINGBAT NEGATIVE CIRCLED DIGIT TWO] case 0x2781: // [DINGBAT CIRCLED SANS-SERIF DIGIT TWO] case 0x278B: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO] case 0xFF12: // [FULLWIDTH DIGIT TWO] output[outputPos++] = L'2'; break; case 0x2489: // [DIGIT TWO FULL STOP] output[outputPos++] = L'2'; output[outputPos++] = L'.'; break; case 0x2475: // [PARENTHESIZED DIGIT TWO] output[outputPos++] = L'('; output[outputPos++] = L'2'; output[outputPos++] = L')'; break; case 0x00B3: // [SUPERSCRIPT THREE] case 0x2083: // [SUBSCRIPT THREE] case 0x2462: // [CIRCLED DIGIT THREE] case 0x24F7: // [DOUBLE CIRCLED DIGIT THREE] case 0x2778: // [DINGBAT NEGATIVE CIRCLED DIGIT THREE] case 0x2782: // [DINGBAT CIRCLED SANS-SERIF DIGIT THREE] case 0x278C: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE] case 0xFF13: // [FULLWIDTH DIGIT THREE] output[outputPos++] = L'3'; break; case 0x248A: // [DIGIT THREE FULL STOP] output[outputPos++] = L'3'; output[outputPos++] = L'.'; break; case 0x2476: // [PARENTHESIZED DIGIT THREE] output[outputPos++] = L'('; output[outputPos++] = L'3'; output[outputPos++] = L')'; break; case 0x2074: // [SUPERSCRIPT FOUR] case 0x2084: // [SUBSCRIPT FOUR] case 0x2463: // [CIRCLED DIGIT FOUR] case 0x24F8: // [DOUBLE CIRCLED DIGIT FOUR] case 0x2779: // [DINGBAT NEGATIVE CIRCLED DIGIT FOUR] case 0x2783: // [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR] case 0x278D: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR] case 0xFF14: // [FULLWIDTH DIGIT FOUR] output[outputPos++] = L'4'; break; case 0x248B: // [DIGIT FOUR FULL STOP] output[outputPos++] = L'4'; output[outputPos++] = L'.'; break; case 0x2477: // [PARENTHESIZED DIGIT FOUR] output[outputPos++] = L'('; output[outputPos++] = L'4'; output[outputPos++] = L')'; break; case 0x2075: // [SUPERSCRIPT FIVE] case 0x2085: // [SUBSCRIPT FIVE] case 0x2464: // [CIRCLED DIGIT FIVE] case 0x24F9: // [DOUBLE CIRCLED DIGIT FIVE] case 0x277A: // [DINGBAT NEGATIVE CIRCLED DIGIT FIVE] case 0x2784: // [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE] case 0x278E: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE] case 0xFF15: // [FULLWIDTH DIGIT FIVE] output[outputPos++] = L'5'; break; case 0x248C: // [DIGIT FIVE FULL STOP] output[outputPos++] = L'5'; output[outputPos++] = L'.'; break; case 0x2478: // [PARENTHESIZED DIGIT FIVE] output[outputPos++] = L'('; output[outputPos++] = L'5'; output[outputPos++] = L')'; break; case 0x2076: // [SUPERSCRIPT SIX] case 0x2086: // [SUBSCRIPT SIX] case 0x2465: // [CIRCLED DIGIT SIX] case 0x24FA: // [DOUBLE CIRCLED DIGIT SIX] case 0x277B: // [DINGBAT NEGATIVE CIRCLED DIGIT SIX] case 0x2785: // [DINGBAT CIRCLED SANS-SERIF DIGIT SIX] case 0x278F: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX] case 0xFF16: // [FULLWIDTH DIGIT SIX] output[outputPos++] = L'6'; break; case 0x248D: // [DIGIT SIX FULL STOP] output[outputPos++] = L'6'; output[outputPos++] = L'.'; break; case 0x2479: // [PARENTHESIZED DIGIT SIX] output[outputPos++] = L'('; output[outputPos++] = L'6'; output[outputPos++] = L')'; break; case 0x2077: // [SUPERSCRIPT SEVEN] case 0x2087: // [SUBSCRIPT SEVEN] case 0x2466: // [CIRCLED DIGIT SEVEN] case 0x24FB: // [DOUBLE CIRCLED DIGIT SEVEN] case 0x277C: // [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN] case 0x2786: // [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN] case 0x2790: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN] case 0xFF17: // [FULLWIDTH DIGIT SEVEN] output[outputPos++] = L'7'; break; case 0x248E: // [DIGIT SEVEN FULL STOP] output[outputPos++] = L'7'; output[outputPos++] = L'.'; break; case 0x247A: // [PARENTHESIZED DIGIT SEVEN] output[outputPos++] = L'('; output[outputPos++] = L'7'; output[outputPos++] = L')'; break; case 0x2078: // [SUPERSCRIPT EIGHT] case 0x2088: // [SUBSCRIPT EIGHT] case 0x2467: // [CIRCLED DIGIT EIGHT] case 0x24FC: // [DOUBLE CIRCLED DIGIT EIGHT] case 0x277D: // [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT] case 0x2787: // [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT] case 0x2791: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT] case 0xFF18: // [FULLWIDTH DIGIT EIGHT] output[outputPos++] = L'8'; break; case 0x248F: // [DIGIT EIGHT FULL STOP] output[outputPos++] = L'8'; output[outputPos++] = L'.'; break; case 0x247B: // [PARENTHESIZED DIGIT EIGHT] output[outputPos++] = L'('; output[outputPos++] = L'8'; output[outputPos++] = L')'; break; case 0x2079: // [SUPERSCRIPT NINE] case 0x2089: // [SUBSCRIPT NINE] case 0x2468: // [CIRCLED DIGIT NINE] case 0x24FD: // [DOUBLE CIRCLED DIGIT NINE] case 0x277E: // [DINGBAT NEGATIVE CIRCLED DIGIT NINE] case 0x2788: // [DINGBAT CIRCLED SANS-SERIF DIGIT NINE] case 0x2792: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE] case 0xFF19: // [FULLWIDTH DIGIT NINE] output[outputPos++] = L'9'; break; case 0x2490: // [DIGIT NINE FULL STOP] output[outputPos++] = L'9'; output[outputPos++] = L'.'; break; case 0x247C: // [PARENTHESIZED DIGIT NINE] output[outputPos++] = L'('; output[outputPos++] = L'9'; output[outputPos++] = L')'; break; case 0x2469: // [CIRCLED NUMBER TEN] case 0x24FE: // [DOUBLE CIRCLED NUMBER TEN] case 0x277F: // [DINGBAT NEGATIVE CIRCLED NUMBER TEN] case 0x2789: // [DINGBAT CIRCLED SANS-SERIF NUMBER TEN] case 0x2793: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN] output[outputPos++] = L'1'; output[outputPos++] = L'0'; break; case 0x2491: // [NUMBER TEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'0'; output[outputPos++] = L'.'; break; case 0x247D: // [PARENTHESIZED NUMBER TEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'0'; output[outputPos++] = L')'; break; case 0x246A: // [CIRCLED NUMBER ELEVEN] case 0x24EB: // [NEGATIVE CIRCLED NUMBER ELEVEN] output[outputPos++] = L'1'; output[outputPos++] = L'1'; break; case 0x2492: // [NUMBER ELEVEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'1'; output[outputPos++] = L'.'; break; case 0x247E: // [PARENTHESIZED NUMBER ELEVEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'1'; output[outputPos++] = L')'; break; case 0x246B: // [CIRCLED NUMBER TWELVE] case 0x24EC: // [NEGATIVE CIRCLED NUMBER TWELVE] output[outputPos++] = L'1'; output[outputPos++] = L'2'; break; case 0x2493: // [NUMBER TWELVE FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'2'; output[outputPos++] = L'.'; break; case 0x247F: // [PARENTHESIZED NUMBER TWELVE] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'2'; output[outputPos++] = L')'; break; case 0x246C: // [CIRCLED NUMBER THIRTEEN] case 0x24ED: // [NEGATIVE CIRCLED NUMBER THIRTEEN] output[outputPos++] = L'1'; output[outputPos++] = L'3'; break; case 0x2494: // [NUMBER THIRTEEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'3'; output[outputPos++] = L'.'; break; case 0x2480: // [PARENTHESIZED NUMBER THIRTEEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'3'; output[outputPos++] = L')'; break; case 0x246D: // [CIRCLED NUMBER FOURTEEN] case 0x24EE: // [NEGATIVE CIRCLED NUMBER FOURTEEN] output[outputPos++] = L'1'; output[outputPos++] = L'4'; break; case 0x2495: // [NUMBER FOURTEEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'4'; output[outputPos++] = L'.'; break; case 0x2481: // [PARENTHESIZED NUMBER FOURTEEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'4'; output[outputPos++] = L')'; break; case 0x246E: // [CIRCLED NUMBER FIFTEEN] case 0x24EF: // [NEGATIVE CIRCLED NUMBER FIFTEEN] output[outputPos++] = L'1'; output[outputPos++] = L'5'; break; case 0x2496: // [NUMBER FIFTEEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'5'; output[outputPos++] = L'.'; break; case 0x2482: // [PARENTHESIZED NUMBER FIFTEEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'5'; output[outputPos++] = L')'; break; case 0x246F: // [CIRCLED NUMBER SIXTEEN] case 0x24F0: // [NEGATIVE CIRCLED NUMBER SIXTEEN] output[outputPos++] = L'1'; output[outputPos++] = L'6'; break; case 0x2497: // [NUMBER SIXTEEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'6'; output[outputPos++] = L'.'; break; case 0x2483: // [PARENTHESIZED NUMBER SIXTEEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'6'; output[outputPos++] = L')'; break; case 0x2470: // [CIRCLED NUMBER SEVENTEEN] case 0x24F1: // [NEGATIVE CIRCLED NUMBER SEVENTEEN] output[outputPos++] = L'1'; output[outputPos++] = L'7'; break; case 0x2498: // [NUMBER SEVENTEEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'7'; output[outputPos++] = L'.'; break; case 0x2484: // [PARENTHESIZED NUMBER SEVENTEEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'7'; output[outputPos++] = L')'; break; case 0x2471: // [CIRCLED NUMBER EIGHTEEN] case 0x24F2: // [NEGATIVE CIRCLED NUMBER EIGHTEEN] output[outputPos++] = L'1'; output[outputPos++] = L'8'; break; case 0x2499: // [NUMBER EIGHTEEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'8'; output[outputPos++] = L'.'; break; case 0x2485: // [PARENTHESIZED NUMBER EIGHTEEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'8'; output[outputPos++] = L')'; break; case 0x2472: // [CIRCLED NUMBER NINETEEN] case 0x24F3: // [NEGATIVE CIRCLED NUMBER NINETEEN] output[outputPos++] = L'1'; output[outputPos++] = L'9'; break; case 0x249A: // [NUMBER NINETEEN FULL STOP] output[outputPos++] = L'1'; output[outputPos++] = L'9'; output[outputPos++] = L'.'; break; case 0x2486: // [PARENTHESIZED NUMBER NINETEEN] output[outputPos++] = L'('; output[outputPos++] = L'1'; output[outputPos++] = L'9'; output[outputPos++] = L')'; break; case 0x2473: // [CIRCLED NUMBER TWENTY] case 0x24F4: // [NEGATIVE CIRCLED NUMBER TWENTY] output[outputPos++] = L'2'; output[outputPos++] = L'0'; break; case 0x249B: // [NUMBER TWENTY FULL STOP] output[outputPos++] = L'2'; output[outputPos++] = L'0'; output[outputPos++] = L'.'; break; case 0x2487: // [PARENTHESIZED NUMBER TWENTY] output[outputPos++] = L'('; output[outputPos++] = L'2'; output[outputPos++] = L'0'; output[outputPos++] = L')'; break; case 0x00AB: // [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK] case 0x00BB: // [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK] case 0x201C: // [LEFT DOUBLE QUOTATION MARK] case 0x201D: // [RIGHT DOUBLE QUOTATION MARK] case 0x201E: // [DOUBLE LOW-9 QUOTATION MARK] case 0x2033: // [DOUBLE PRIME] case 0x2036: // [REVERSED DOUBLE PRIME] case 0x275D: // [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT] case 0x275E: // [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT] case 0x276E: // [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT] case 0x276F: // [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT] case 0xFF02: // [FULLWIDTH QUOTATION MARK] output[outputPos++] = L'"'; break; case 0x2018: // [LEFT SINGLE QUOTATION MARK] case 0x2019: // [RIGHT SINGLE QUOTATION MARK] case 0x201A: // [SINGLE LOW-9 QUOTATION MARK] case 0x201B: // [SINGLE HIGH-REVERSED-9 QUOTATION MARK] case 0x2032: // [PRIME] case 0x2035: // [REVERSED PRIME] case 0x2039: // [SINGLE LEFT-POINTING ANGLE QUOTATION MARK] case 0x203A: // [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK] case 0x275B: // [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT] case 0x275C: // [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT] case 0xFF07: // [FULLWIDTH APOSTROPHE] output[outputPos++] = L'\''; break; case 0x2010: // [HYPHEN] case 0x2011: // [NON-BREAKING HYPHEN] case 0x2012: // [FIGURE DASH] case 0x2013: // [EN DASH] case 0x2014: // [EM DASH] case 0x207B: // [SUPERSCRIPT MINUS] case 0x208B: // [SUBSCRIPT MINUS] case 0xFF0D: // [FULLWIDTH HYPHEN-MINUS] output[outputPos++] = L'-'; break; case 0x2045: // [LEFT SQUARE BRACKET WITH QUILL] case 0x2772: // [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT] case 0xFF3B: // [FULLWIDTH LEFT SQUARE BRACKET] output[outputPos++] = L'['; break; case 0x2046: // [RIGHT SQUARE BRACKET WITH QUILL] case 0x2773: // [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT] case 0xFF3D: // [FULLWIDTH RIGHT SQUARE BRACKET] output[outputPos++] = L']'; break; case 0x207D: // [SUPERSCRIPT LEFT PARENTHESIS] case 0x208D: // [SUBSCRIPT LEFT PARENTHESIS] case 0x2768: // [MEDIUM LEFT PARENTHESIS ORNAMENT] case 0x276A: // [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT] case 0xFF08: // [FULLWIDTH LEFT PARENTHESIS] output[outputPos++] = L'('; break; case 0x2E28: // [LEFT DOUBLE PARENTHESIS] output[outputPos++] = L'('; output[outputPos++] = L'('; break; case 0x207E: // [SUPERSCRIPT RIGHT PARENTHESIS] case 0x208E: // [SUBSCRIPT RIGHT PARENTHESIS] case 0x2769: // [MEDIUM RIGHT PARENTHESIS ORNAMENT] case 0x276B: // [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT] case 0xFF09: // [FULLWIDTH RIGHT PARENTHESIS] output[outputPos++] = L')'; break; case 0x2E29: // [RIGHT DOUBLE PARENTHESIS] output[outputPos++] = L')'; output[outputPos++] = L')'; break; case 0x276C: // [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT] case 0x2770: // [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT] case 0xFF1C: // [FULLWIDTH LESS-THAN SIGN] output[outputPos++] = L'<'; break; case 0x276D: // [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT] case 0x2771: // [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT] case 0xFF1E: // [FULLWIDTH GREATER-THAN SIGN] output[outputPos++] = L'>'; break; case 0x2774: // [MEDIUM LEFT CURLY BRACKET ORNAMENT] case 0xFF5B: // [FULLWIDTH LEFT CURLY BRACKET] output[outputPos++] = L'{'; break; case 0x2775: // [MEDIUM RIGHT CURLY BRACKET ORNAMENT] case 0xFF5D: // [FULLWIDTH RIGHT CURLY BRACKET] output[outputPos++] = L'}'; break; case 0x207A: // [SUPERSCRIPT PLUS SIGN] case 0x208A: // [SUBSCRIPT PLUS SIGN] case 0xFF0B: // [FULLWIDTH PLUS SIGN] output[outputPos++] = L'+'; break; case 0x207C: // [SUPERSCRIPT EQUALS SIGN] case 0x208C: // [SUBSCRIPT EQUALS SIGN] case 0xFF1D: // [FULLWIDTH EQUALS SIGN] output[outputPos++] = L'='; break; case 0xFF01: // [FULLWIDTH EXCLAMATION MARK] output[outputPos++] = L'!'; break; case 0x203C: // [DOUBLE EXCLAMATION MARK] output[outputPos++] = L'!'; output[outputPos++] = L'!'; break; case 0x2049: // [EXCLAMATION QUESTION MARK] output[outputPos++] = L'!'; output[outputPos++] = L'?'; break; case 0xFF03: // [FULLWIDTH NUMBER SIGN] output[outputPos++] = L'#'; break; case 0xFF04: // [FULLWIDTH DOLLAR SIGN] output[outputPos++] = L'$'; break; case 0x2052: // [COMMERCIAL MINUS SIGN] case 0xFF05: // [FULLWIDTH PERCENT SIGN] output[outputPos++] = L'%'; break; case 0xFF06: // [FULLWIDTH AMPERSAND] output[outputPos++] = L'&'; break; case 0x204E: // [LOW ASTERISK] case 0xFF0A: // [FULLWIDTH ASTERISK] output[outputPos++] = L'*'; break; case 0xFF0C: // [FULLWIDTH COMMA] output[outputPos++] = L','; break; case 0xFF0E: // [FULLWIDTH FULL STOP] output[outputPos++] = L'.'; break; case 0x2044: // [FRACTION SLASH] case 0xFF0F: // [FULLWIDTH SOLIDUS] output[outputPos++] = L'/'; break; case 0xFF1A: // [FULLWIDTH COLON] output[outputPos++] = L':'; break; case 0x204F: // [REVERSED SEMICOLON] case 0xFF1B: // [FULLWIDTH SEMICOLON] output[outputPos++] = L';'; break; case 0xFF1F: // [FULLWIDTH QUESTION MARK] output[outputPos++] = L'?'; break; case 0x2047: // [DOUBLE QUESTION MARK] output[outputPos++] = L'?'; output[outputPos++] = L'?'; break; case 0x2048: // [QUESTION EXCLAMATION MARK] output[outputPos++] = L'?'; output[outputPos++] = L'!'; break; case 0xFF20: // [FULLWIDTH COMMERCIAL AT] output[outputPos++] = L'@'; break; case 0xFF3C: // [FULLWIDTH REVERSE SOLIDUS] output[outputPos++] = L'\\'; break; case 0x2038: // [CARET] case 0xFF3E: // [FULLWIDTH CIRCUMFLEX ACCENT] output[outputPos++] = L'^'; break; case 0xFF3F: // [FULLWIDTH LOW LINE] output[outputPos++] = L'_'; break; case 0x2053: // [SWUNG DASH] case 0xFF5E: // [FULLWIDTH TILDE] output[outputPos++] = L'~'; break; default: output[outputPos++] = c; break; } } } } } LucenePlusPlus-rel_3.0.9/src/core/analysis/Analyzer.cpp000066400000000000000000000020131456444476200231570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Analyzer.h" #include "Fieldable.h" namespace Lucene { Analyzer::~Analyzer() { } TokenStreamPtr Analyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { return tokenStream(fieldName, reader); } LuceneObjectPtr Analyzer::getPreviousTokenStream() { return tokenStreams.get(); } void Analyzer::setPreviousTokenStream(const LuceneObjectPtr& stream) { tokenStreams.set(stream); } int32_t Analyzer::getPositionIncrementGap(const String& fieldName) { return 0; } int32_t Analyzer::getOffsetGap(const FieldablePtr& field) { return field->isTokenized() ? 1 : 0; } void Analyzer::close() { tokenStreams.close(); } } LucenePlusPlus-rel_3.0.9/src/core/analysis/BaseCharFilter.cpp000066400000000000000000000034511456444476200242170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BaseCharFilter.h" #include "MiscUtils.h" namespace Lucene { BaseCharFilter::BaseCharFilter(const CharStreamPtr& in) : CharFilter(in) { size = 0; } BaseCharFilter::~BaseCharFilter() { } int32_t BaseCharFilter::correct(int32_t currentOff) { if (!offsets || currentOff < offsets[0]) { return currentOff; } int32_t hi = size - 1; if (currentOff >= offsets[hi]) { return currentOff + diffs[hi]; } int32_t lo = 0; int32_t mid = -1; while (hi >= lo) { mid = MiscUtils::unsignedShift(lo + hi, 1); if (currentOff < offsets[mid]) { hi = mid - 1; } else if (currentOff > offsets[mid]) { lo = mid + 1; } else { return currentOff + diffs[mid]; } } if (currentOff < offsets[mid]) { return mid == 0 ? currentOff : currentOff + diffs[mid - 1]; } else { return currentOff + diffs[mid]; } } int32_t BaseCharFilter::getLastCumulativeDiff() { return !offsets ? 0 : diffs[size - 1]; } void BaseCharFilter::addOffCorrectMap(int32_t off, int32_t cumulativeDiff) { if (!offsets) { offsets = IntArray::newInstance(64); diffs = IntArray::newInstance(64); } else if (size == offsets.size()) { offsets.resize(MiscUtils::getNextSize(offsets.size())); diffs.resize(MiscUtils::getNextSize(diffs.size())); } offsets[size] = off; diffs[size++] = cumulativeDiff; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/CachingTokenFilter.cpp000066400000000000000000000026171456444476200251070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CachingTokenFilter.h" namespace Lucene { CachingTokenFilter::CachingTokenFilter(const TokenStreamPtr& input) : TokenFilter(input) { } CachingTokenFilter::~CachingTokenFilter() { } bool CachingTokenFilter::incrementToken() { if (!cache) { // fill cache lazily cache = Collection::newInstance(); fillCache(); iterator = cache.begin(); } if (iterator == cache.end()) { // the cache is exhausted, return false return false; } // Since the TokenFilter can be reset, the tokens need to be preserved as immutable. restoreState(*iterator++); return true; } void CachingTokenFilter::end() { if (finalState) { restoreState(finalState); } } void CachingTokenFilter::reset() { if (cache) { iterator = cache.begin(); } } void CachingTokenFilter::fillCache() { while (input->incrementToken()) { cache.add(captureState()); } // capture final state input->end(); finalState = captureState(); } } LucenePlusPlus-rel_3.0.9/src/core/analysis/CharArraySet.cpp000066400000000000000000000037371456444476200237400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CharArraySet.h" #include "StringUtils.h" namespace Lucene { CharArraySet::CharArraySet(bool ignoreCase) { this->ignoreCase = ignoreCase; this->entries = HashSet::newInstance(); } CharArraySet::CharArraySet(HashSet entries, bool ignoreCase) { this->ignoreCase = ignoreCase; this->entries = HashSet::newInstance(); if (entries) { for (HashSet::iterator entry = entries.begin(); entry != entries.end(); ++entry) { add(*entry); } } } CharArraySet::CharArraySet(Collection entries, bool ignoreCase) { this->ignoreCase = ignoreCase; this->entries = HashSet::newInstance(); if (entries) { for (Collection::iterator entry = entries.begin(); entry != entries.end(); ++entry) { add(*entry); } } } CharArraySet::~CharArraySet() { } bool CharArraySet::contains(const String& text) { return entries.contains(ignoreCase ? StringUtils::toLower(text) : text); } bool CharArraySet::contains(const wchar_t* text, int32_t offset, int32_t length) { return contains(String(text + offset, length)); } bool CharArraySet::add(const String& text) { return entries.add(ignoreCase ? StringUtils::toLower(text) : text); } bool CharArraySet::add(CharArray text) { return add(String(text.get(), text.size())); } int32_t CharArraySet::size() { return entries.size(); } bool CharArraySet::isEmpty() { return entries.empty(); } HashSet::iterator CharArraySet::begin() { return entries.begin(); } HashSet::iterator CharArraySet::end() { return entries.end(); } } LucenePlusPlus-rel_3.0.9/src/core/analysis/CharFilter.cpp000066400000000000000000000020721456444476200234220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CharFilter.h" namespace Lucene { CharFilter::CharFilter(const CharStreamPtr& in) { input = in; } CharFilter::~CharFilter() { } int32_t CharFilter::correct(int32_t currentOff) { return currentOff; } int32_t CharFilter::correctOffset(int32_t currentOff) { return input->correctOffset(correct(currentOff)); } void CharFilter::close() { input->close(); } int32_t CharFilter::read(wchar_t* buffer, int32_t offset, int32_t length) { return input->read(buffer, offset, length); } bool CharFilter::markSupported() { return input->markSupported(); } void CharFilter::mark(int32_t readAheadLimit) { input->mark(readAheadLimit); } void CharFilter::reset() { input->reset(); } } LucenePlusPlus-rel_3.0.9/src/core/analysis/CharReader.cpp000066400000000000000000000022621456444476200234000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CharReader.h" namespace Lucene { CharReader::CharReader(const ReaderPtr& in) { input = in; } CharReader::~CharReader() { } CharStreamPtr CharReader::get(const ReaderPtr& input) { CharStreamPtr charStream(boost::dynamic_pointer_cast(input)); return charStream ? charStream : newLucene(input); } int32_t CharReader::correctOffset(int32_t currentOff) { return currentOff; } void CharReader::close() { if (input) { input->close(); } } int32_t CharReader::read(wchar_t* buffer, int32_t offset, int32_t length) { return input->read(buffer, offset, length); } bool CharReader::markSupported() { return input->markSupported(); } void CharReader::mark(int32_t readAheadLimit) { input->mark(readAheadLimit); } void CharReader::reset() { input->reset(); } } LucenePlusPlus-rel_3.0.9/src/core/analysis/CharStream.cpp000066400000000000000000000006701456444476200234320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CharStream.h" namespace Lucene { CharStream::~CharStream() { } } LucenePlusPlus-rel_3.0.9/src/core/analysis/CharTokenizer.cpp000066400000000000000000000063121456444476200241500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CharTokenizer.h" #include "OffsetAttribute.h" #include "TermAttribute.h" #include "Reader.h" namespace Lucene { const int32_t CharTokenizer::MAX_WORD_LEN = 255; const int32_t CharTokenizer::IO_BUFFER_SIZE = 4096; CharTokenizer::CharTokenizer(const ReaderPtr& input) : Tokenizer(input) { offset = 0; bufferIndex = 0; dataLen = 0; ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); offsetAtt = addAttribute(); termAtt = addAttribute(); } CharTokenizer::CharTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : Tokenizer(source, input) { offset = 0; bufferIndex = 0; dataLen = 0; ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); offsetAtt = addAttribute(); termAtt = addAttribute(); } CharTokenizer::CharTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : Tokenizer(factory, input) { offset = 0; bufferIndex = 0; dataLen = 0; ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); offsetAtt = addAttribute(); termAtt = addAttribute(); } CharTokenizer::~CharTokenizer() { } wchar_t CharTokenizer::normalize(wchar_t c) { return c; } bool CharTokenizer::incrementToken() { clearAttributes(); int32_t length = 0; int32_t start = bufferIndex; CharArray buffer(termAtt->termBuffer()); while (true) { if (bufferIndex >= dataLen) { offset += dataLen; dataLen = input->read(ioBuffer.get(), 0, ioBuffer.size()); if (dataLen == -1) { dataLen = 0; // so next offset += dataLen won't decrement offset if (length > 0) { break; } else { return false; } } bufferIndex = 0; } wchar_t c = ioBuffer[bufferIndex++]; if (isTokenChar(c)) { // if it's a token char if (length == 0) { start = offset + bufferIndex - 1; } else if (length == buffer.size()) { buffer = termAtt->resizeTermBuffer(1 + length); } buffer[length++] = normalize(c); // buffer it, normalized if (length == MAX_WORD_LEN) { // buffer overflow! break; } } else if (length > 0) { // at non-Letter with chars break; // return them } } termAtt->setTermLength(length); offsetAtt->setOffset(correctOffset(start), correctOffset(start + length)); return true; } void CharTokenizer::end() { // set final offset int32_t finalOffset = correctOffset(offset); offsetAtt->setOffset(finalOffset, finalOffset); } void CharTokenizer::reset(const ReaderPtr& input) { Tokenizer::reset(input); bufferIndex = 0; offset = 0; dataLen = 0; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/ISOLatin1AccentFilter.cpp000066400000000000000000000150501456444476200253660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ISOLatin1AccentFilter.h" #include "TermAttribute.h" namespace Lucene { ISOLatin1AccentFilter::ISOLatin1AccentFilter(const TokenStreamPtr& input) : TokenFilter(input) { output = CharArray::newInstance(256); outputPos = 0; termAtt = addAttribute(); } ISOLatin1AccentFilter::~ISOLatin1AccentFilter() { } bool ISOLatin1AccentFilter::incrementToken() { if (input->incrementToken()) { wchar_t* buffer = termAtt->termBufferArray(); int32_t length = termAtt->termLength(); // If no characters actually require rewriting then we just return token as-is for (int32_t i = 0; i < length; ++i) { wchar_t c = buffer[i]; if (c >= 0x00c0 && c <= 0xfb06) { removeAccents(buffer, length); termAtt->setTermBuffer(output.get(), 0, outputPos); break; } } return true; } else { return false; } } void ISOLatin1AccentFilter::removeAccents(const wchar_t* input, int32_t length) { // Worst-case length required int32_t maxSizeNeeded = 2 * length; int32_t size = output.size(); while (size < maxSizeNeeded) { size *= 2; } if (size != output.size()) { output.resize(size); } outputPos = 0; int32_t pos = 0; wchar_t* output = this->output.get(); for (int32_t i = 0; i < length; ++i, ++pos) { wchar_t c = input[pos]; // Quick test: if it's not in range then just keep current character if (c < 0x00C0 || c > 0xFB06) { output[outputPos++] = c; } else { switch (c) { case 0x00C0: case 0x00C1: case 0x00C2: case 0x00C3: case 0x00C4: case 0x00C5: output[outputPos++] = L'A'; break; case 0x00C6: output[outputPos++] = L'A'; output[outputPos++] = L'E'; break; case 0x00C7: output[outputPos++] = L'C'; break; case 0x00C8: case 0x00C9: case 0x00CA: case 0x00CB: output[outputPos++] = L'E'; break; case 0x00CC: case 0x00CD: case 0x00CE: case 0x00CF: output[outputPos++] = L'I'; break; case 0x0132: output[outputPos++] = L'I'; output[outputPos++] = L'J'; break; case 0x00D0: output[outputPos++] = L'D'; break; case 0x00D1: output[outputPos++] = L'N'; break; case 0x00D2: case 0x00D3: case 0x00D4: case 0x00D5: case 0x00D6: case 0x00D8: output[outputPos++] = L'O'; break; case 0x0152: output[outputPos++] = L'O'; output[outputPos++] = L'E'; break; case 0x00DE: output[outputPos++] = L'T'; output[outputPos++] = L'H'; break; case 0x00D9: case 0x00DA: case 0x00DB: case 0x00DC: output[outputPos++] = L'U'; break; case 0x00DD: case 0x0178: output[outputPos++] = L'Y'; break; case 0x00E0: case 0x00E1: case 0x00E2: case 0x00E3: case 0x00E4: case 0x00E5: output[outputPos++] = L'a'; break; case 0x00E6: output[outputPos++] = L'a'; output[outputPos++] = L'e'; break; case 0x00E7: output[outputPos++] = L'c'; break; case 0x00E8: case 0x00E9: case 0x00EA: case 0x00EB: output[outputPos++] = L'e'; break; case 0x00EC: case 0x00ED: case 0x00EE: case 0x00EF: output[outputPos++] = L'i'; break; case 0x0133: output[outputPos++] = L'i'; output[outputPos++] = L'j'; break; case 0x00F0: output[outputPos++] = L'd'; break; case 0x00F1: output[outputPos++] = L'n'; break; case 0x00F2: case 0x00F3: case 0x00F4: case 0x00F5: case 0x00F6: case 0x00F8: output[outputPos++] = L'o'; break; case 0x0153: output[outputPos++] = L'o'; output[outputPos++] = L'e'; break; case 0x00DF: output[outputPos++] = L's'; output[outputPos++] = L's'; break; case 0x00FE: output[outputPos++] = L't'; output[outputPos++] = L'h'; break; case 0x00F9: case 0x00FA: case 0x00FB: case 0x00FC: output[outputPos++] = L'u'; break; case 0x00FD: case 0x00FF: output[outputPos++] = L'y'; break; case 0xFB00: output[outputPos++] = L'f'; output[outputPos++] = L'f'; break; case 0xFB01: output[outputPos++] = L'f'; output[outputPos++] = L'i'; break; case 0xFB02: output[outputPos++] = L'f'; output[outputPos++] = L'l'; break; case 0xFB05: output[outputPos++] = L'f'; output[outputPos++] = L't'; break; case 0xFB06: output[outputPos++] = L's'; output[outputPos++] = L't'; break; default : output[outputPos++] = c; break; } } } } } LucenePlusPlus-rel_3.0.9/src/core/analysis/KeywordAnalyzer.cpp000066400000000000000000000020061456444476200245260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "KeywordAnalyzer.h" #include "KeywordTokenizer.h" namespace Lucene { KeywordAnalyzer::~KeywordAnalyzer() { } TokenStreamPtr KeywordAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { return newLucene(reader); } TokenStreamPtr KeywordAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { TokenizerPtr tokenizer(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!tokenizer) { tokenizer = newLucene(reader); setPreviousTokenStream(tokenizer); } else { tokenizer->reset(reader); } return tokenizer; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/KeywordTokenizer.cpp000066400000000000000000000044361456444476200247240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "KeywordTokenizer.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "Reader.h" namespace Lucene { const int32_t KeywordTokenizer::DEFAULT_BUFFER_SIZE = 256; KeywordTokenizer::KeywordTokenizer(const ReaderPtr& input) : Tokenizer(input) { init(DEFAULT_BUFFER_SIZE); } KeywordTokenizer::KeywordTokenizer(const ReaderPtr& input, int32_t bufferSize) : Tokenizer(input) { init(bufferSize); } KeywordTokenizer::KeywordTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input, int32_t bufferSize) : Tokenizer(source, input) { init(bufferSize); } KeywordTokenizer::KeywordTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input, int32_t bufferSize) : Tokenizer(factory, input) { init(bufferSize); } KeywordTokenizer::~KeywordTokenizer() { } void KeywordTokenizer::init(int32_t bufferSize) { this->done = false; this->finalOffset = 0; this->termAtt = addAttribute(); this->offsetAtt = addAttribute(); this->termAtt->resizeTermBuffer(bufferSize); } bool KeywordTokenizer::incrementToken() { if (!done) { clearAttributes(); done = true; int32_t upto = 0; CharArray buffer(termAtt->termBuffer()); while (true) { int32_t length = input->read(buffer.get(), upto, buffer.size() - upto); if (length == -1) { break; } upto += length; if (upto == buffer.size()) { buffer = termAtt->resizeTermBuffer(buffer.size() + 1); } } termAtt->setTermLength(upto); finalOffset = correctOffset(upto); offsetAtt->setOffset(correctOffset(0), finalOffset); return true; } return false; } void KeywordTokenizer::end() { // set final offset offsetAtt->setOffset(finalOffset, finalOffset); } void KeywordTokenizer::reset() { Tokenizer::reset(input); done = false; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/LengthFilter.cpp000066400000000000000000000020301456444476200237600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LengthFilter.h" #include "TermAttribute.h" namespace Lucene { LengthFilter::LengthFilter(const TokenStreamPtr& input, int32_t min, int32_t max) : TokenFilter(input) { this->min = min; this->max = max; this->termAtt = addAttribute(); } LengthFilter::~LengthFilter() { } bool LengthFilter::incrementToken() { // return the first non-stop word found while (input->incrementToken()) { int32_t len = termAtt->termLength(); if (len >= min && len <= max) { return true; } // note: else we ignore it but should we index each part of it? } // reached EOS -- return false return false; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/LetterTokenizer.cpp000066400000000000000000000016441456444476200245350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LetterTokenizer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { LetterTokenizer::LetterTokenizer(const ReaderPtr& input) : CharTokenizer(input) { } LetterTokenizer::LetterTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : CharTokenizer(source, input) { } LetterTokenizer::LetterTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : CharTokenizer(factory, input) { } LetterTokenizer::~LetterTokenizer() { } bool LetterTokenizer::isTokenChar(wchar_t c) { return UnicodeUtil::isAlpha(c); } } LucenePlusPlus-rel_3.0.9/src/core/analysis/LowerCaseFilter.cpp000066400000000000000000000015661456444476200244400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LowerCaseFilter.h" #include "TermAttribute.h" #include "CharFolder.h" namespace Lucene { LowerCaseFilter::LowerCaseFilter(const TokenStreamPtr& input) : TokenFilter(input) { termAtt = addAttribute(); } LowerCaseFilter::~LowerCaseFilter() { } bool LowerCaseFilter::incrementToken() { if (input->incrementToken()) { wchar_t* buffer = termAtt->termBufferArray(); CharFolder::toLower(buffer, buffer + termAtt->termLength()); return true; } return false; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/LowerCaseTokenizer.cpp000066400000000000000000000016571456444476200251660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LowerCaseTokenizer.h" #include "CharFolder.h" namespace Lucene { LowerCaseTokenizer::LowerCaseTokenizer(const ReaderPtr& input) : LetterTokenizer(input) { } LowerCaseTokenizer::LowerCaseTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : LetterTokenizer(source, input) { } LowerCaseTokenizer::LowerCaseTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : LetterTokenizer(factory, input) { } LowerCaseTokenizer::~LowerCaseTokenizer() { } wchar_t LowerCaseTokenizer::normalize(wchar_t c) { return CharFolder::toLower(c); } } LucenePlusPlus-rel_3.0.9/src/core/analysis/MappingCharFilter.cpp000066400000000000000000000070461456444476200247440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MappingCharFilter.h" #include "NormalizeCharMap.h" #include "CharReader.h" namespace Lucene { MappingCharFilter::MappingCharFilter(const NormalizeCharMapPtr& normMap, const CharStreamPtr& in) : BaseCharFilter(in) { this->normMap = normMap; this->charPointer = 0; this->nextCharCounter = 0; } MappingCharFilter::MappingCharFilter(const NormalizeCharMapPtr& normMap, const ReaderPtr& in) : BaseCharFilter(CharReader::get(in)) { this->normMap = normMap; this->charPointer = 0; this->nextCharCounter = 0; } MappingCharFilter::~MappingCharFilter() { } int32_t MappingCharFilter::read() { while (true) { if (charPointer < (int32_t)replacement.length()) { return (int32_t)replacement[charPointer++]; } int32_t firstChar = nextChar(); if (firstChar == -1) { return -1; } NormalizeCharMapPtr nm(normMap->submap ? normMap->submap.get((wchar_t)firstChar) : NormalizeCharMapPtr()); if (!nm) { return firstChar; } NormalizeCharMapPtr result(match(nm)); if (!result) { return firstChar; } replacement = result->normStr; charPointer = 0; if (result->diff != 0) { int32_t prevCumulativeDiff = getLastCumulativeDiff(); if (result->diff < 0) { for (int32_t i = 0; i < -result->diff; ++i) { addOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i); } } else { addOffCorrectMap(nextCharCounter - result->diff - prevCumulativeDiff, prevCumulativeDiff + result->diff); } } } } int32_t MappingCharFilter::nextChar() { ++nextCharCounter; if (buffer && !buffer.empty()) { return buffer.removeFirst(); } return input->read(); } void MappingCharFilter::pushChar(int32_t c) { --nextCharCounter; if (!buffer) { buffer = Collection::newInstance(); } buffer.add(0, (wchar_t)c); } void MappingCharFilter::pushLastChar(int32_t c) { if (!buffer) { buffer = Collection::newInstance(); } buffer.add((wchar_t)c); } NormalizeCharMapPtr MappingCharFilter::match(const NormalizeCharMapPtr& map) { NormalizeCharMapPtr result; if (map->submap) { int32_t chr = nextChar(); if (chr != -1) { NormalizeCharMapPtr subMap(map->submap.get((wchar_t)chr)); if (subMap) { result = match(subMap); } if (!result) { pushChar(chr); } } } if (!result) { result = map; } return result; } int32_t MappingCharFilter::read(wchar_t* buffer, int32_t offset, int32_t length) { CharArray tmp(CharArray::newInstance(length)); int32_t l = input->read(tmp.get(), 0, length); if (l != -1) { for (int32_t i = 0; i < l; ++i) { pushLastChar(tmp[i]); } } l = 0; for (int32_t i = offset; i < offset + length; ++i) { int32_t c = read(); if (c == -1) { break; } buffer[i] = (wchar_t)c; ++l; } return l == 0 ? -1 : l; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/NormalizeCharMap.cpp000066400000000000000000000024671456444476200246030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NormalizeCharMap.h" namespace Lucene { NormalizeCharMap::NormalizeCharMap() { diff = 0; } NormalizeCharMap::~NormalizeCharMap() { } void NormalizeCharMap::add(const String& singleMatch, const String& replacement) { NormalizeCharMapPtr currMap(shared_from_this()); for (String::const_iterator c = singleMatch.begin(); c != singleMatch.end(); ++c) { if (!currMap->submap) { currMap->submap = MapCharNormalizeCharMap::newInstance(); } NormalizeCharMapPtr map(currMap->submap.get(*c)); if (!map) { map = newLucene(); currMap->submap.put(*c, map); } currMap = map; } if (!currMap->normStr.empty()) { boost::throw_exception(RuntimeException(L"MappingCharFilter: there is already a mapping for " + singleMatch)); } currMap->normStr = replacement; currMap->diff = (int32_t)(singleMatch.length() - replacement.length()); } } LucenePlusPlus-rel_3.0.9/src/core/analysis/NumericTokenStream.cpp000066400000000000000000000107611456444476200251620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NumericTokenStream.h" #include "NumericUtils.h" #include "AttributeSource.h" #include "TermAttribute.h" #include "TypeAttribute.h" #include "PositionIncrementAttribute.h" namespace Lucene { NumericTokenStream::NumericTokenStream() { this->shift = 0; this->valSize = 0; this->termAtt = addAttribute(); this->typeAtt = addAttribute(); this->posIncrAtt = addAttribute(); this->precisionStep = NumericUtils::PRECISION_STEP_DEFAULT; } NumericTokenStream::NumericTokenStream(int32_t precisionStep) { this->shift = 0; this->valSize = 0; this->termAtt = addAttribute(); this->typeAtt = addAttribute(); this->posIncrAtt = addAttribute(); this->precisionStep = precisionStep; if (precisionStep < 1) { boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); } } NumericTokenStream::NumericTokenStream(const AttributeSourcePtr& source, int32_t precisionStep) : TokenStream(source) { this->shift = 0; this->valSize = 0; this->termAtt = addAttribute(); this->typeAtt = addAttribute(); this->posIncrAtt = addAttribute(); this->precisionStep = precisionStep; if (precisionStep < 1) { boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); } } NumericTokenStream::NumericTokenStream(const AttributeFactoryPtr& factory, int32_t precisionStep) : TokenStream(factory) { this->shift = 0; this->valSize = 0; this->termAtt = addAttribute(); this->typeAtt = addAttribute(); this->posIncrAtt = addAttribute(); this->precisionStep = precisionStep; if (precisionStep < 1) { boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); } } NumericTokenStream::~NumericTokenStream() { } const String& NumericTokenStream::TOKEN_TYPE_FULL_PREC() { static String _TOKEN_TYPE_FULL_PREC(L"fullPrecNumeric"); return _TOKEN_TYPE_FULL_PREC; } const String& NumericTokenStream::TOKEN_TYPE_LOWER_PREC() { static String _TOKEN_TYPE_LOWER_PREC(L"lowerPrecNumeric"); return _TOKEN_TYPE_LOWER_PREC; } NumericTokenStreamPtr NumericTokenStream::setLongValue(int64_t value) { this->value = value; valSize = 64; shift = 0; return shared_from_this(); } NumericTokenStreamPtr NumericTokenStream::setIntValue(int32_t value) { this->value = (int64_t)value; valSize = 32; shift = 0; return shared_from_this(); } NumericTokenStreamPtr NumericTokenStream::setDoubleValue(double value) { this->value = NumericUtils::doubleToSortableLong(value); valSize = 64; shift = 0; return shared_from_this(); } void NumericTokenStream::reset() { if (valSize == 0) { boost::throw_exception(IllegalStateException(L"call setValue() before usage")); } shift = 0; } bool NumericTokenStream::incrementToken() { if (valSize == 0) { boost::throw_exception(IllegalStateException(L"call setValue() before usage")); } if (shift >= valSize) { return false; } clearAttributes(); CharArray buffer; switch (valSize) { case 64: buffer = termAtt->resizeTermBuffer(NumericUtils::BUF_SIZE_LONG); termAtt->setTermLength(NumericUtils::longToPrefixCoded(value, shift, buffer)); break; case 32: buffer = termAtt->resizeTermBuffer(NumericUtils::BUF_SIZE_INT); termAtt->setTermLength(NumericUtils::intToPrefixCoded((int32_t)value, shift, buffer)); break; default: // should not happen boost::throw_exception(IllegalArgumentException(L"valSize must be 32 or 64")); } typeAtt->setType(shift == 0 ? TOKEN_TYPE_FULL_PREC() : TOKEN_TYPE_LOWER_PREC()); posIncrAtt->setPositionIncrement(shift == 0 ? 1 : 0); shift += precisionStep; return true; } String NumericTokenStream::toString() { StringStream buffer; buffer << L"(numeric,valSize=" << valSize << L",precisionStep=" << precisionStep << L")"; return buffer.str(); } } LucenePlusPlus-rel_3.0.9/src/core/analysis/PerFieldAnalyzerWrapper.cpp000066400000000000000000000045101456444476200261370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PerFieldAnalyzerWrapper.h" #include "Fieldable.h" namespace Lucene { PerFieldAnalyzerWrapper::PerFieldAnalyzerWrapper(const AnalyzerPtr& defaultAnalyzer) { this->defaultAnalyzer = defaultAnalyzer; this->analyzerMap = MapStringAnalyzer::newInstance(); } PerFieldAnalyzerWrapper::PerFieldAnalyzerWrapper(const AnalyzerPtr& defaultAnalyzer, MapStringAnalyzer fieldAnalyzers) { this->defaultAnalyzer = defaultAnalyzer; this->analyzerMap = MapStringAnalyzer::newInstance(); if (fieldAnalyzers) { analyzerMap.putAll(fieldAnalyzers.begin(), fieldAnalyzers.end()); } } PerFieldAnalyzerWrapper::~PerFieldAnalyzerWrapper() { } void PerFieldAnalyzerWrapper::addAnalyzer(const String& fieldName, const AnalyzerPtr& analyzer) { analyzerMap.put(fieldName, analyzer); } TokenStreamPtr PerFieldAnalyzerWrapper::tokenStream(const String& fieldName, const ReaderPtr& reader) { AnalyzerPtr analyzer(analyzerMap.get(fieldName)); if (!analyzer) { analyzer = defaultAnalyzer; } return analyzer->tokenStream(fieldName, reader); } TokenStreamPtr PerFieldAnalyzerWrapper::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { AnalyzerPtr analyzer(analyzerMap.get(fieldName)); if (!analyzer) { analyzer = defaultAnalyzer; } return analyzer->reusableTokenStream(fieldName, reader); } int32_t PerFieldAnalyzerWrapper::getPositionIncrementGap(const String& fieldName) { AnalyzerPtr analyzer(analyzerMap.get(fieldName)); if (!analyzer) { analyzer = defaultAnalyzer; } return analyzer->getPositionIncrementGap(fieldName); } int32_t PerFieldAnalyzerWrapper::getOffsetGap(const FieldablePtr& field) { AnalyzerPtr analyzer(analyzerMap.get(field->name())); if (!analyzer) { analyzer = defaultAnalyzer; } return analyzer->getOffsetGap(field); } String PerFieldAnalyzerWrapper::toString() { return L"PerFieldAnalyzerWrapper(default=" + defaultAnalyzer->toString() + L")"; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/PorterStemFilter.cpp000066400000000000000000000017411456444476200246530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PorterStemFilter.h" #include "PorterStemmer.h" #include "TermAttribute.h" namespace Lucene { PorterStemFilter::PorterStemFilter(const TokenStreamPtr& input) : TokenFilter(input) { stemmer = newLucene(); termAtt = addAttribute(); } PorterStemFilter::~PorterStemFilter() { } bool PorterStemFilter::incrementToken() { if (!input->incrementToken()) { return false; } if (stemmer->stem(termAtt->termBufferArray(), termAtt->termLength() - 1)) { termAtt->setTermBuffer(stemmer->getResultBuffer(), 0, stemmer->getResultLength()); } return true; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/PorterStemmer.cpp000066400000000000000000000227531456444476200242170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PorterStemmer.h" namespace Lucene { PorterStemmer::PorterStemmer() { b = NULL; k = 0; j = 0; i = 0; dirty = false; } PorterStemmer::~PorterStemmer() { } bool PorterStemmer::stem(CharArray word) { return stem(word.get(), word.size() - 1); } bool PorterStemmer::stem(wchar_t* b, int32_t k) { this->b = b; this->k = k; this->j = 0; this->i = k; dirty = false; if (k <= 1) { return false; // DEPARTURE } // With these lines, strings of length 1 or 2 don't go through the stemming process, although no mention // is made of this in the published algorithm. Remove the line to match the published algorithm. step1ab(); step1c(); step2(); step3(); step4(); step5(); if (i != this->k) { dirty = true; } return dirty; } wchar_t* PorterStemmer::getResultBuffer() { return b; } int32_t PorterStemmer::getResultLength() { return k + 1; } bool PorterStemmer::cons(int32_t i) { switch (b[i]) { case L'a': case L'e': case L'i': case L'o': case L'u': return false; case L'y': return (i == 0) ? true : !cons(i - 1); default: return true; } } int32_t PorterStemmer::m() { int32_t n = 0; int32_t i = 0; while (true) { if (i > j) { return n; } if (!cons(i)) { break; } ++i; } ++i; while (true) { while (true) { if (i > j) { return n; } if (cons(i)) { break; } ++i; } ++i; ++n; while (true) { if (i > j) { return n; } if (!cons(i)) { break; } ++i; } ++i; } } bool PorterStemmer::vowelinstem() { for (int32_t i = 0; i <= j; ++i) { if (!cons(i)) { return true; } } return false; } bool PorterStemmer::doublec(int32_t j) { if (j < 1) { return false; } if (b[j] != b[j - 1]) { return false; } return cons(j); } bool PorterStemmer::cvc(int32_t i) { if (i < 2 || !cons(i) || cons(i - 1) || !cons(i - 2)) { return false; } int32_t ch = b[i]; if (ch == L'w' || ch == L'x' || ch == L'y') { return false; } return true; } bool PorterStemmer::ends(const wchar_t* s) { int32_t length = s[0]; if (s[length] != b[k]) { return false; // tiny speed-up } if (length > k + 1) { return false; } if (std::memcmp(b + k - length + 1, s + 1, length) != 0) { return false; } j = k - length; return true; } void PorterStemmer::setto(const wchar_t* s) { int32_t length = s[0]; std::memmove(b + j + 1, s + 1, length); k = j + length; dirty = true; } void PorterStemmer::r(const wchar_t* s) { if (m() > 0) { setto(s); } } void PorterStemmer::step1ab() { if (b[k] == L's') { if (ends(L"\04" L"sses")) { k -= 2; } else if (ends(L"\03" L"ies")) { setto(L"\01" L"i"); } else if (b[k - 1] != L's') { --k; } } if (ends(L"\03" L"eed")) { if (m() > 0) { --k; } } else if ((ends(L"\02" L"ed") || ends(L"\03" L"ing")) && vowelinstem()) { k = j; if (ends(L"\02" L"at")) { setto(L"\03" L"ate"); } else if (ends(L"\02" L"bl")) { setto(L"\03" L"ble"); } else if (ends(L"\02" L"iz")) { setto(L"\03" L"ize"); } else if (doublec(k)) { --k; int32_t ch = b[k]; if (ch == L'l' || ch == L's' || ch == L'z') { ++k; } } else if (m() == 1 && cvc(k)) { setto(L"\01" L"e"); } } } void PorterStemmer::step1c() { if (ends(L"\01" L"y") && vowelinstem()) { b[k] = L'i'; dirty = true; } } void PorterStemmer::step2() { if (k == 0) { return; } switch (b[k - 1]) { case L'a': if (ends(L"\07" L"ational")) { r(L"\03" L"ate"); break; } if (ends(L"\06" L"tional")) { r(L"\04" L"tion"); break; } break; case L'c': if (ends(L"\04" L"enci")) { r(L"\04" L"ence"); break; } if (ends(L"\04" L"anci")) { r(L"\04" L"ance"); break; } break; case L'e': if (ends(L"\04" L"izer")) { r(L"\03" L"ize"); break; } break; case L'l': if (ends(L"\03" L"bli")) { // DEPARTURE r(L"\03" L"ble"); break; } if (ends(L"\04" L"alli")) { r(L"\02" L"al"); break; } if (ends(L"\05" L"entli")) { r(L"\03" L"ent"); break; } if (ends(L"\03" L"eli")) { r(L"\01" L"e"); break; } if (ends(L"\05" L"ousli")) { r(L"\03" L"ous"); break; } break; case L'o': if (ends(L"\07" L"ization")) { r(L"\03" L"ize"); break; } if (ends(L"\05" L"ation")) { r(L"\03" L"ate"); break; } if (ends(L"\04" L"ator")) { r(L"\03" L"ate"); break; } break; case L's': if (ends(L"\05" L"alism")) { r(L"\02" L"al"); break; } if (ends(L"\07" L"iveness")) { r(L"\03" L"ive"); break; } if (ends(L"\07" L"fulness")) { r(L"\03" L"ful"); break; } if (ends(L"\07" L"ousness")) { r(L"\03" L"ous"); break; } break; case L't': if (ends(L"\05" L"aliti")) { r(L"\02" L"al"); break; } if (ends(L"\05" L"iviti")) { r(L"\03" L"ive"); break; } if (ends(L"\06" L"biliti")) { r(L"\03" L"ble"); break; } break; case L'g': if (ends(L"\04" L"logi")) { // DEPARTURE r(L"\03" L"log"); break; } } } void PorterStemmer::step3() { switch (b[k]) { case L'e': if (ends(L"\05" L"icate")) { r(L"\02" L"ic"); break; } if (ends(L"\05" L"ative")) { r(L"\00" L""); break; } if (ends(L"\05" L"alize")) { r(L"\02" L"al"); break; } break; case L'i': if (ends(L"\05" L"iciti")) { r(L"\02" L"ic"); break; } break; case L'l': if (ends(L"\04" L"ical")) { r(L"\02" L"ic"); break; } if (ends(L"\03" L"ful")) { r(L"\00" L""); break; } break; case L's': if (ends(L"\04" L"ness")) { r(L"\00" L""); break; } break; } } void PorterStemmer::step4() { if (k == 0) { return; } switch (b[k - 1]) { case L'a': if (ends(L"\02" L"al")) { break; } return; case L'c': if (ends(L"\04" L"ance")) { break; } if (ends(L"\04" L"ence")) { break; } return; case L'e': if (ends(L"\02" L"er")) { break; } return; case L'i': if (ends(L"\02" L"ic")) { break; } return; case L'l': if (ends(L"\04" L"able")) { break; } if (ends(L"\04" L"ible")) { break; } return; case L'n': if (ends(L"\03" L"ant")) { break; } if (ends(L"\05" L"ement")) { break; } if (ends(L"\04" L"ment")) { break; } if (ends(L"\03" L"ent")) { break; } return; case L'o': if (ends(L"\03" L"ion") && (b[j] == L's' || b[j] == L't')) { break; } if (ends(L"\02" L"ou")) { break; } return; // takes care of -ous case L's': if (ends(L"\03" L"ism")) { break; } return; case L't': if (ends(L"\03" L"ate")) { break; } if (ends(L"\03" L"iti")) { break; } return; case L'u': if (ends(L"\03" L"ous")) { break; } return; case L'v': if (ends(L"\03" L"ive")) { break; } return; case L'z': if (ends(L"\03" L"ize")) { break; } return; default: return; } if (m() > 1) { k = j; } } void PorterStemmer::step5() { j = k; if (b[k] == L'e') { int32_t a = m(); if (a > 1 || (a == 1 && !cvc(k - 1))) { --k; } } if (b[k] == L'l' && doublec(k) && m() > 1) { --k; } } } LucenePlusPlus-rel_3.0.9/src/core/analysis/SimpleAnalyzer.cpp000066400000000000000000000020071456444476200243340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SimpleAnalyzer.h" #include "LowerCaseTokenizer.h" namespace Lucene { SimpleAnalyzer::~SimpleAnalyzer() { } TokenStreamPtr SimpleAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { return newLucene(reader); } TokenStreamPtr SimpleAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { TokenizerPtr tokenizer(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!tokenizer) { tokenizer = newLucene(reader); setPreviousTokenStream(tokenizer); } else { tokenizer->reset(reader); } return tokenizer; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/StopAnalyzer.cpp000066400000000000000000000057751456444476200240470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StopAnalyzer.h" #include "_StopAnalyzer.h" #include "StopFilter.h" #include "WordlistLoader.h" #include "Reader.h" #include "LowerCaseTokenizer.h" namespace Lucene { const wchar_t* StopAnalyzer::_ENGLISH_STOP_WORDS_SET[] = { L"a", L"an", L"and", L"are", L"as", L"at", L"be", L"but", L"by", L"for", L"if", L"in", L"into", L"is", L"it", L"no", L"not", L"of", L"on", L"or", L"such", L"that", L"the", L"their", L"then", L"there", L"these", L"they", L"this", L"to", L"was", L"will", L"with" }; StopAnalyzer::StopAnalyzer(LuceneVersion::Version matchVersion) { stopWords = ENGLISH_STOP_WORDS_SET(); enablePositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); } StopAnalyzer::StopAnalyzer(LuceneVersion::Version matchVersion, HashSet stopWords) { this->stopWords = stopWords; enablePositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); } StopAnalyzer::StopAnalyzer(LuceneVersion::Version matchVersion, const String& stopwordsFile) { stopWords = WordlistLoader::getWordSet(stopwordsFile); enablePositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); } StopAnalyzer::StopAnalyzer(LuceneVersion::Version matchVersion, const ReaderPtr& stopwords) { stopWords = WordlistLoader::getWordSet(stopwords); enablePositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); } StopAnalyzer::~StopAnalyzer() { } const HashSet StopAnalyzer::ENGLISH_STOP_WORDS_SET() { static HashSet __ENGLISH_STOP_WORDS_SET; LUCENE_RUN_ONCE( __ENGLISH_STOP_WORDS_SET = HashSet::newInstance(_ENGLISH_STOP_WORDS_SET, _ENGLISH_STOP_WORDS_SET + SIZEOF_ARRAY(_ENGLISH_STOP_WORDS_SET)); ); return __ENGLISH_STOP_WORDS_SET; } TokenStreamPtr StopAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { return newLucene(enablePositionIncrements, newLucene(reader), stopWords); } TokenStreamPtr StopAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { StopAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!streams) { streams = newLucene(); streams->source = newLucene(reader); streams->result = newLucene(enablePositionIncrements, streams->source, stopWords); setPreviousTokenStream(streams); } else { streams->source->reset(reader); } return streams->result; } StopAnalyzerSavedStreams::~StopAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.9/src/core/analysis/StopFilter.cpp000066400000000000000000000045141456444476200234750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StopFilter.h" #include "CharArraySet.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" namespace Lucene { StopFilter::StopFilter(bool enablePositionIncrements, const TokenStreamPtr& input, HashSet stopWords, bool ignoreCase) : TokenFilter(input) { this->stopWords = newLucene(stopWords, ignoreCase); this->enablePositionIncrements = enablePositionIncrements; termAtt = addAttribute(); posIncrAtt = addAttribute(); } StopFilter::StopFilter(bool enablePositionIncrements, const TokenStreamPtr& input, const CharArraySetPtr& stopWords, bool ignoreCase) : TokenFilter(input) { this->stopWords = stopWords; this->enablePositionIncrements = enablePositionIncrements; termAtt = addAttribute(); posIncrAtt = addAttribute(); } StopFilter::~StopFilter() { } HashSet StopFilter::makeStopSet(Collection stopWords) { return HashSet::newInstance(stopWords.begin(), stopWords.end()); } bool StopFilter::incrementToken() { // return the first non-stop word found int32_t skippedPositions = 0; while (input->incrementToken()) { if (!stopWords->contains(termAtt->termBufferArray(), 0, termAtt->termLength())) { if (enablePositionIncrements) { posIncrAtt->setPositionIncrement(posIncrAtt->getPositionIncrement() + skippedPositions); } return true; } skippedPositions += posIncrAtt->getPositionIncrement(); } // reached EOS -- return false return false; } bool StopFilter::getEnablePositionIncrementsVersionDefault(LuceneVersion::Version matchVersion) { return LuceneVersion::onOrAfter(matchVersion, LuceneVersion::LUCENE_29); } bool StopFilter::getEnablePositionIncrements() { return enablePositionIncrements; } void StopFilter::setEnablePositionIncrements(bool enable) { this->enablePositionIncrements = enable; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/TeeSinkTokenFilter.cpp000066400000000000000000000104301456444476200251050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TeeSinkTokenFilter.h" #include "Attribute.h" namespace Lucene { TeeSinkTokenFilter::TeeSinkTokenFilter(const TokenStreamPtr& input) : TokenFilter(input) { this->sinks = Collection::newInstance(); } TeeSinkTokenFilter::~TeeSinkTokenFilter() { } SinkTokenStreamPtr TeeSinkTokenFilter::newSinkTokenStream() { static SinkFilterPtr ACCEPT_ALL_FILTER; LUCENE_RUN_ONCE( ACCEPT_ALL_FILTER = newLucene(); CycleCheck::addStatic(ACCEPT_ALL_FILTER); ); return newSinkTokenStream(ACCEPT_ALL_FILTER); } SinkTokenStreamPtr TeeSinkTokenFilter::newSinkTokenStream(const SinkFilterPtr& filter) { SinkTokenStreamPtr sink(newLucene(this->cloneAttributes(), filter)); this->sinks.add(sink); return sink; } void TeeSinkTokenFilter::addSinkTokenStream(const SinkTokenStreamPtr& sink) { // check that sink has correct factory if (this->getAttributeFactory() != sink->getAttributeFactory()) { boost::throw_exception(IllegalArgumentException(L"The supplied sink is not compatible to this tee.")); } // add eventually missing attribute impls to the existing sink Collection attrImpls(this->cloneAttributes()->getAttributes()); for (Collection::iterator it = attrImpls.begin(); it != attrImpls.end(); ++it) { sink->addAttribute((*it)->getClassName(), *it); } this->sinks.add(sink); } void TeeSinkTokenFilter::consumeAllTokens() { while (incrementToken()) { } } bool TeeSinkTokenFilter::incrementToken() { if (input->incrementToken()) { // capture state lazily - maybe no SinkFilter accepts this state AttributeSourceStatePtr state; for (Collection::iterator ref = sinks.begin(); ref != sinks.end(); ++ref) { if (*ref) { if ((*ref)->accept(shared_from_this())) { if (!state) { state = this->captureState(); } (*ref)->addState(state); } } } return true; } return false; } void TeeSinkTokenFilter::end() { TokenFilter::end(); AttributeSourceStatePtr finalState(captureState()); for (Collection::iterator ref = sinks.begin(); ref != sinks.end(); ++ref) { if (*ref) { (*ref)->setFinalState(finalState); } } } SinkFilter::~SinkFilter() { } void SinkFilter::reset() { // nothing to do; can be overridden } AcceptAllSinkFilter::~AcceptAllSinkFilter() { } bool AcceptAllSinkFilter::accept(const AttributeSourcePtr& source) { return true; } SinkTokenStream::SinkTokenStream(const AttributeSourcePtr& source, const SinkFilterPtr& filter) : TokenStream(source) { this->filter = filter; this->cachedStates = Collection::newInstance(); this->it = cachedStates.begin(); this->initIterator = false; } SinkTokenStream::~SinkTokenStream() { } bool SinkTokenStream::accept(const AttributeSourcePtr& source) { return filter->accept(source); } void SinkTokenStream::addState(const AttributeSourceStatePtr& state) { if (initIterator) { boost::throw_exception(IllegalStateException(L"The tee must be consumed before sinks are consumed.")); } cachedStates.add(state); } void SinkTokenStream::setFinalState(const AttributeSourceStatePtr& finalState) { this->finalState = finalState; } bool SinkTokenStream::incrementToken() { // lazy init the iterator if (!initIterator) { it = cachedStates.begin(); initIterator = true; } if (it == cachedStates.end()) { return false; } AttributeSourceStatePtr state = *it++; restoreState(state); return true; } void SinkTokenStream::end() { if (finalState) { restoreState(finalState); } } void SinkTokenStream::reset() { it = cachedStates.begin(); initIterator = false; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/Token.cpp000066400000000000000000000361231456444476200224630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Token.h" #include "Payload.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "PositionIncrementAttribute.h" #include "PayloadAttribute.h" #include "FlagsAttribute.h" #include "TypeAttribute.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { const int32_t Token::MIN_BUFFER_SIZE = 10; Token::Token() { ConstructToken(0, 0, DEFAULT_TYPE(), 0); } Token::Token(int32_t start, int32_t end) { ConstructToken(start, end, DEFAULT_TYPE(), 0); } Token::Token(int32_t start, int32_t end, const String& type) { ConstructToken(start, end, type, 0); } Token::Token(int32_t start, int32_t end, int32_t flags) { ConstructToken(start, end, DEFAULT_TYPE(), flags); } Token::Token(const String& text, int32_t start, int32_t end) { ConstructToken(start, end, DEFAULT_TYPE(), 0); setTermBuffer(text); } Token::Token(const String& text, int32_t start, int32_t end, const String& type) { ConstructToken(start, end, type, 0); setTermBuffer(text); } Token::Token(const String& text, int32_t start, int32_t end, int32_t flags) { ConstructToken(start, end, DEFAULT_TYPE(), flags); setTermBuffer(text); } Token::Token(CharArray startTermBuffer, int32_t termBufferOffset, int32_t termBufferLength, int32_t start, int32_t end) { ConstructToken(start, end, DEFAULT_TYPE(), 0); setTermBuffer(startTermBuffer.get(), termBufferOffset, termBufferLength); } Token::~Token() { } void Token::ConstructToken(int32_t start, int32_t end, const String& type, int32_t flags) { this->_termLength = 0; this->_startOffset = start; this->_endOffset = end; this->_type = type; this->flags = flags; this->positionIncrement = 1; } const String& Token::DEFAULT_TYPE() { static String _DEFAULT_TYPE(L"word"); return _DEFAULT_TYPE; } void Token::setPositionIncrement(int32_t positionIncrement) { if (positionIncrement < 0) { boost::throw_exception(IllegalArgumentException(L"Increment must be zero or greater: " + StringUtils::toString(positionIncrement))); } this->positionIncrement = positionIncrement; } int32_t Token::getPositionIncrement() { return positionIncrement; } String Token::term() { initTermBuffer(); return String(_termBuffer.get(), _termLength); } void Token::setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length) { growTermBuffer(length); MiscUtils::arrayCopy(buffer, offset, _termBuffer.get(), 0, length); _termLength = length; } void Token::setTermBuffer(const String& buffer) { int32_t length = (int32_t)buffer.size(); growTermBuffer(length); MiscUtils::arrayCopy(buffer.begin(), 0, _termBuffer.get(), 0, length); _termLength = length; } void Token::setTermBuffer(const String& buffer, int32_t offset, int32_t length) { BOOST_ASSERT(offset <= (int32_t)buffer.length()); BOOST_ASSERT(offset + length <= (int32_t)buffer.length()); growTermBuffer(length); MiscUtils::arrayCopy(buffer.begin(), offset, _termBuffer.get(), 0, length); _termLength = length; } CharArray Token::termBuffer() { if (!_termBuffer) { initTermBuffer(); } return _termBuffer; } wchar_t* Token::termBufferArray() { if (!_termBuffer) { initTermBuffer(); } return _termBuffer.get(); } CharArray Token::resizeTermBuffer(int32_t newSize) { if (!_termBuffer) { // The buffer is always at least MIN_BUFFER_SIZE _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(std::max(newSize, MIN_BUFFER_SIZE))); } else { if (_termBuffer.size() < newSize) { // Not big enough; create a new array with slight over allocation and preserve content _termBuffer.resize(MiscUtils::getNextSize(newSize)); } } return _termBuffer; } void Token::growTermBuffer(int32_t newSize) { _termBuffer = resizeTermBuffer(newSize); } void Token::initTermBuffer() { if (!_termBuffer) { _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(MIN_BUFFER_SIZE)); _termLength = 0; } } int32_t Token::termLength() { if (!_termBuffer) { initTermBuffer(); } return _termLength; } void Token::setTermLength(int32_t length) { initTermBuffer(); if (length > _termBuffer.size()) { boost::throw_exception(IllegalArgumentException(L"length " + StringUtils::toString(length) + L" exceeds the size of the termBuffer (" + StringUtils::toString(_termBuffer.size()) + L")")); } _termLength = length; } int32_t Token::startOffset() { return _startOffset; } void Token::setStartOffset(int32_t offset) { this->_startOffset = offset; } int32_t Token::endOffset() { return _endOffset; } void Token::setEndOffset(int32_t offset) { this->_endOffset = offset; } void Token::setOffset(int32_t startOffset, int32_t endOffset) { this->_startOffset = startOffset; this->_endOffset = endOffset; } String Token::type() { return _type; } void Token::setType(const String& type) { this->_type = type; } int32_t Token::getFlags() { return flags; } void Token::setFlags(int32_t flags) { this->flags = flags; } PayloadPtr Token::getPayload() { return this->payload; } void Token::setPayload(const PayloadPtr& payload) { this->payload = payload; } String Token::toString() { StringStream buffer; initTermBuffer(); buffer << L"("; if (!_termBuffer) { buffer << L"null"; } else { buffer << term() << L"," << _startOffset << L"," << _endOffset; } if (_type != L"word") { buffer << L",type=" << _type; } if (positionIncrement != 1) { buffer << L",posIncr=" << positionIncrement; } buffer << L")"; return buffer.str(); } void Token::clear() { payload.reset(); // Leave termBuffer to allow re-use _termLength = 0; positionIncrement = 1; flags = 0; _startOffset = 0; _endOffset = 0; _type = DEFAULT_TYPE(); } LuceneObjectPtr Token::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = Attribute::clone(other ? other : newLucene()); TokenPtr cloneToken(boost::dynamic_pointer_cast(clone)); cloneToken->_termLength = _termLength; cloneToken->_startOffset = _startOffset; cloneToken->_endOffset = _endOffset; cloneToken->_type = _type; cloneToken->flags = flags; cloneToken->positionIncrement = positionIncrement; // Do a deep clone if (_termBuffer) { cloneToken->_termBuffer = CharArray::newInstance(_termBuffer.size()); MiscUtils::arrayCopy(_termBuffer.get(), 0, cloneToken->_termBuffer.get(), 0, _termBuffer.size()); } if (payload) { cloneToken->payload = boost::dynamic_pointer_cast(payload->clone()); } return cloneToken; } TokenPtr Token::clone(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset) { TokenPtr clone(newLucene(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset)); clone->positionIncrement = positionIncrement; clone->flags = flags; clone->_type = _type; if (payload) { clone->payload = boost::dynamic_pointer_cast(payload->clone()); } return clone; } bool Token::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } TokenPtr otherToken(boost::dynamic_pointer_cast(other)); if (otherToken) { initTermBuffer(); otherToken->initTermBuffer(); if (_termLength == otherToken->_termLength && _startOffset == otherToken->_startOffset && _endOffset == otherToken->_endOffset && flags == otherToken->flags && positionIncrement == otherToken->positionIncrement && _type == otherToken->_type && (payload ? payload->equals(otherToken->payload) : !otherToken->payload)) { for (int32_t i = 0; i < _termLength; ++i) { if (_termBuffer[i] != otherToken->_termBuffer[i]) { return false; } } return true; } else { return false; } } else { return false; } } int32_t Token::hashCode() { initTermBuffer(); int32_t code = _termLength; code = code * 31 + _startOffset; code = code * 31 + _endOffset; code = code * 31 + flags; code = code * 31 + positionIncrement; code = code * 31 + StringUtils::hashCode(_type); code = payload ? code * 31 + payload->hashCode() : code; code = code * 31 + MiscUtils::hashCode(_termBuffer.get(), 0, _termLength); return code; } void Token::clearNoTermBuffer() { payload.reset(); positionIncrement = 1; flags = 0; _startOffset = 0; _endOffset = 0; _type = DEFAULT_TYPE(); } TokenPtr Token::reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType) { clearNoTermBuffer(); payload.reset(); positionIncrement = 1; setTermBuffer(newTermBuffer.get(), newTermOffset, newTermLength); _startOffset = newStartOffset; _endOffset = newEndOffset; _type = newType; return shared_from_this(); } TokenPtr Token::reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset) { clearNoTermBuffer(); setTermBuffer(newTermBuffer.get(), newTermOffset, newTermLength); _startOffset = newStartOffset; _endOffset = newEndOffset; _type = DEFAULT_TYPE(); return shared_from_this(); } TokenPtr Token::reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset, const String& newType) { clearNoTermBuffer(); setTermBuffer(newTerm); _startOffset = newStartOffset; _endOffset = newEndOffset; _type = newType; return shared_from_this(); } TokenPtr Token::reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType) { clearNoTermBuffer(); setTermBuffer(newTerm, newTermOffset, newTermLength); _startOffset = newStartOffset; _endOffset = newEndOffset; _type = newType; return shared_from_this(); } TokenPtr Token::reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset) { clearNoTermBuffer(); setTermBuffer(newTerm); _startOffset = newStartOffset; _endOffset = newEndOffset; _type = DEFAULT_TYPE(); return shared_from_this(); } TokenPtr Token::reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset) { clearNoTermBuffer(); setTermBuffer(newTerm, newTermOffset, newTermLength); _startOffset = newStartOffset; _endOffset = newEndOffset; _type = DEFAULT_TYPE(); return shared_from_this(); } void Token::reinit(const TokenPtr& prototype) { prototype->initTermBuffer(); setTermBuffer(prototype->_termBuffer.get(), 0, prototype->_termLength); positionIncrement = prototype->positionIncrement; flags = prototype->flags; _startOffset = prototype->_startOffset; _endOffset = prototype->_endOffset; _type = prototype->_type; payload = prototype->payload; } void Token::reinit(const TokenPtr& prototype, const String& newTerm) { setTermBuffer(newTerm); positionIncrement = prototype->positionIncrement; flags = prototype->flags; _startOffset = prototype->_startOffset; _endOffset = prototype->_endOffset; _type = prototype->_type; payload = prototype->payload; } void Token::reinit(const TokenPtr& prototype, CharArray newTermBuffer, int32_t offset, int32_t length) { setTermBuffer(newTermBuffer.get(), offset, length); positionIncrement = prototype->positionIncrement; flags = prototype->flags; _startOffset = prototype->_startOffset; _endOffset = prototype->_endOffset; _type = prototype->_type; payload = prototype->payload; } void Token::copyTo(const AttributePtr& target) { TokenPtr targetToken(boost::dynamic_pointer_cast(target)); if (targetToken) { targetToken->reinit(shared_from_this()); // reinit shares the payload, so clone it if (payload) { targetToken->payload = boost::dynamic_pointer_cast(payload->clone()); } } else { initTermBuffer(); TermAttributePtr targetTermAttribute(boost::dynamic_pointer_cast(target)); if (targetTermAttribute) { targetTermAttribute->setTermBuffer(_termBuffer.get(), 0, _termLength); } OffsetAttributePtr targetOffsetAttribute(boost::dynamic_pointer_cast(target)); if (targetOffsetAttribute) { targetOffsetAttribute->setOffset(_startOffset, _endOffset); } PositionIncrementAttributePtr targetPositionIncrementAttribute(boost::dynamic_pointer_cast(target)); if (targetPositionIncrementAttribute) { targetPositionIncrementAttribute->setPositionIncrement(positionIncrement); } PayloadAttributePtr targetPayloadAttribute(boost::dynamic_pointer_cast(target)); if (targetPayloadAttribute) { targetPayloadAttribute->setPayload(payload ? boost::dynamic_pointer_cast(payload->clone()) : PayloadPtr()); } FlagsAttributePtr targetFlagsAttribute(boost::dynamic_pointer_cast(target)); if (targetFlagsAttribute) { targetFlagsAttribute->setFlags(flags); } TypeAttributePtr targetTypeAttribute(boost::dynamic_pointer_cast(target)); if (targetTypeAttribute) { targetTypeAttribute->setType(_type); } } } AttributeFactoryPtr Token::TOKEN_ATTRIBUTE_FACTORY() { static AttributeFactoryPtr _TOKEN_ATTRIBUTE_FACTORY; LUCENE_RUN_ONCE( _TOKEN_ATTRIBUTE_FACTORY = newLucene(AttributeFactory::DEFAULT_ATTRIBUTE_FACTORY()); CycleCheck::addStatic(_TOKEN_ATTRIBUTE_FACTORY); ); return _TOKEN_ATTRIBUTE_FACTORY; } TokenAttributeFactory::TokenAttributeFactory(const AttributeFactoryPtr& delegate) { this->delegate = delegate; } TokenAttributeFactory::~TokenAttributeFactory() { } AttributePtr TokenAttributeFactory::createAttributeInstance(const String& className) { return newLucene(); } bool TokenAttributeFactory::equals(const LuceneObjectPtr& other) { if (AttributeFactory::equals(other)) { return true; } TokenAttributeFactoryPtr otherTokenAttributeFactory(boost::dynamic_pointer_cast(other)); if (otherTokenAttributeFactory) { return this->delegate->equals(otherTokenAttributeFactory->delegate); } return false; } int32_t TokenAttributeFactory::hashCode() { return (delegate->hashCode() ^ 0x0a45aa31); } } LucenePlusPlus-rel_3.0.9/src/core/analysis/TokenFilter.cpp000066400000000000000000000012711456444476200236250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TokenFilter.h" namespace Lucene { TokenFilter::TokenFilter(const TokenStreamPtr& input) : TokenStream(input) { this->input = input; } TokenFilter::~TokenFilter() { } void TokenFilter::end() { input->end(); } void TokenFilter::close() { input->close(); } void TokenFilter::reset() { input->reset(); } } LucenePlusPlus-rel_3.0.9/src/core/analysis/TokenStream.cpp000066400000000000000000000014101456444476200236260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TokenStream.h" namespace Lucene { TokenStream::TokenStream() { } TokenStream::TokenStream(const AttributeSourcePtr& input) : AttributeSource(input) { } TokenStream::TokenStream(const AttributeFactoryPtr& factory) : AttributeSource(factory) { } TokenStream::~TokenStream() { } void TokenStream::end() { // do nothing by default } void TokenStream::reset() { } void TokenStream::close() { } } LucenePlusPlus-rel_3.0.9/src/core/analysis/Tokenizer.cpp000066400000000000000000000031311456444476200233460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Tokenizer.h" #include "CharReader.h" namespace Lucene { Tokenizer::Tokenizer() { } Tokenizer::Tokenizer(const ReaderPtr& input) { this->input = CharReader::get(input); this->charStream = boost::dynamic_pointer_cast(this->input); } Tokenizer::Tokenizer(const AttributeFactoryPtr& factory) : TokenStream(factory) { } Tokenizer::Tokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : TokenStream(factory) { this->input = CharReader::get(input); this->charStream = boost::dynamic_pointer_cast(this->input); } Tokenizer::Tokenizer(const AttributeSourcePtr& source) : TokenStream(source) { } Tokenizer::Tokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : TokenStream(source) { this->input = CharReader::get(input); this->charStream = boost::dynamic_pointer_cast(this->input); } Tokenizer::~Tokenizer() { } void Tokenizer::close() { if (input) { input->close(); input.reset(); // don't hold onto Reader after close } } int32_t Tokenizer::correctOffset(int32_t currentOff) { return charStream ? charStream->correctOffset(currentOff) : currentOff; } void Tokenizer::reset(const ReaderPtr& input) { this->input = input; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/WhitespaceAnalyzer.cpp000066400000000000000000000020361456444476200252010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "WhitespaceAnalyzer.h" #include "WhitespaceTokenizer.h" namespace Lucene { WhitespaceAnalyzer::~WhitespaceAnalyzer() { } TokenStreamPtr WhitespaceAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { return newLucene(reader); } TokenStreamPtr WhitespaceAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { TokenizerPtr tokenizer(boost::dynamic_pointer_cast(getPreviousTokenStream())); if (!tokenizer) { tokenizer = newLucene(reader); setPreviousTokenStream(tokenizer); } else { tokenizer->reset(reader); } return tokenizer; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/WhitespaceTokenizer.cpp000066400000000000000000000017151456444476200253710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "WhitespaceTokenizer.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { WhitespaceTokenizer::WhitespaceTokenizer(const ReaderPtr& input) : CharTokenizer(input) { } WhitespaceTokenizer::WhitespaceTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : CharTokenizer(source, input) { } WhitespaceTokenizer::WhitespaceTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : CharTokenizer(factory, input) { } WhitespaceTokenizer::~WhitespaceTokenizer() { } bool WhitespaceTokenizer::isTokenChar(wchar_t c) { return !UnicodeUtil::isSpace(c); } } LucenePlusPlus-rel_3.0.9/src/core/analysis/WordlistLoader.cpp000066400000000000000000000051421456444476200243360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "WordlistLoader.h" #include "FileReader.h" #include "BufferedReader.h" namespace Lucene { WordlistLoader::~WordlistLoader() { } HashSet WordlistLoader::getWordSet(const String& wordfile, const String& comment) { HashSet result(HashSet::newInstance()); FileReaderPtr reader; LuceneException finally; try { reader = newLucene(wordfile); result = getWordSet(reader, comment); } catch (LuceneException& e) { finally = e; } if (reader) { reader->close(); } finally.throwException(); return result; } HashSet WordlistLoader::getWordSet(const ReaderPtr& reader, const String& comment) { HashSet result(HashSet::newInstance()); LuceneException finally; BufferedReaderPtr bufferedReader(boost::dynamic_pointer_cast(reader)); try { if (!bufferedReader) { bufferedReader = newLucene(reader); } String word; while (bufferedReader->readLine(word)) { if (comment.empty() || !boost::starts_with(word, comment)) { boost::trim(word); result.add(word); } } } catch (LuceneException& e) { finally = e; } if (bufferedReader) { bufferedReader->close(); } finally.throwException(); return result; } MapStringString WordlistLoader::getStemDict(const String& wordstemfile) { MapStringString result(MapStringString::newInstance()); BufferedReaderPtr bufferedReader; FileReaderPtr reader; LuceneException finally; try { reader = newLucene(wordstemfile); bufferedReader = newLucene(reader); String line; while (bufferedReader->readLine(line)) { String::size_type sep = line.find(L'\t'); if (sep != String::npos) { result.put(line.substr(0, sep), line.substr(sep + 1)); } } } catch (LuceneException& e) { finally = e; } if (reader) { reader->close(); } if (bufferedReader) { bufferedReader->close(); } finally.throwException(); return result; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/standard/000077500000000000000000000000001456444476200224725ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/analysis/standard/StandardAnalyzer.cpp000066400000000000000000000067521456444476200264560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StandardAnalyzer.h" #include "_StandardAnalyzer.h" #include "StandardTokenizer.h" #include "StandardFilter.h" #include "LowerCaseFilter.h" #include "StopAnalyzer.h" #include "StopFilter.h" #include "WordlistLoader.h" namespace Lucene { /// Construct an analyzer with the given stop words. const int32_t StandardAnalyzer::DEFAULT_MAX_TOKEN_LENGTH = 255; StandardAnalyzer::StandardAnalyzer(LuceneVersion::Version matchVersion) { ConstructAnalyser(matchVersion, StopAnalyzer::ENGLISH_STOP_WORDS_SET()); } StandardAnalyzer::StandardAnalyzer(LuceneVersion::Version matchVersion, HashSet stopWords) { ConstructAnalyser(matchVersion, stopWords); } StandardAnalyzer::StandardAnalyzer(LuceneVersion::Version matchVersion, const String& stopwords) { ConstructAnalyser(matchVersion, WordlistLoader::getWordSet(stopwords)); } StandardAnalyzer::StandardAnalyzer(LuceneVersion::Version matchVersion, const ReaderPtr& stopwords) { ConstructAnalyser(matchVersion, WordlistLoader::getWordSet(stopwords)); } StandardAnalyzer::~StandardAnalyzer() { } void StandardAnalyzer::ConstructAnalyser(LuceneVersion::Version matchVersion, HashSet stopWords) { stopSet = stopWords; enableStopPositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); replaceInvalidAcronym = LuceneVersion::onOrAfter(matchVersion, LuceneVersion::LUCENE_24); this->matchVersion = matchVersion; this->maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH; } TokenStreamPtr StandardAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { StandardTokenizerPtr tokenStream(newLucene(matchVersion, reader)); tokenStream->setMaxTokenLength(maxTokenLength); TokenStreamPtr result(newLucene(tokenStream)); result = newLucene(result); result = newLucene(enableStopPositionIncrements, result, stopSet); return result; } void StandardAnalyzer::setMaxTokenLength(int32_t length) { maxTokenLength = length; } int32_t StandardAnalyzer::getMaxTokenLength() { return maxTokenLength; } TokenStreamPtr StandardAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { StandardAnalyzerSavedStreamsPtr streams = boost::dynamic_pointer_cast(getPreviousTokenStream()); if (!streams) { streams = newLucene(); setPreviousTokenStream(streams); streams->tokenStream = newLucene(matchVersion, reader); streams->filteredTokenStream = newLucene(streams->tokenStream); streams->filteredTokenStream = newLucene(streams->filteredTokenStream); streams->filteredTokenStream = newLucene(enableStopPositionIncrements, streams->filteredTokenStream, stopSet); } else { streams->tokenStream->reset(reader); } streams->tokenStream->setMaxTokenLength(maxTokenLength); streams->tokenStream->setReplaceInvalidAcronym(replaceInvalidAcronym); return streams->filteredTokenStream; } StandardAnalyzerSavedStreams::~StandardAnalyzerSavedStreams() { } } LucenePlusPlus-rel_3.0.9/src/core/analysis/standard/StandardFilter.cpp000066400000000000000000000040141456444476200261030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StandardFilter.h" #include "StandardTokenizer.h" #include "TermAttribute.h" #include "TypeAttribute.h" namespace Lucene { StandardFilter::StandardFilter(const TokenStreamPtr& input) : TokenFilter(input) { termAtt = addAttribute(); typeAtt = addAttribute(); } StandardFilter::~StandardFilter() { } const String& StandardFilter::APOSTROPHE_TYPE() { static String _APOSTROPHE_TYPE; LUCENE_RUN_ONCE( _APOSTROPHE_TYPE = StandardTokenizer::TOKEN_TYPES()[StandardTokenizer::APOSTROPHE]; ); return _APOSTROPHE_TYPE; } const String& StandardFilter::ACRONYM_TYPE() { static String _ACRONYM_TYPE; LUCENE_RUN_ONCE( _ACRONYM_TYPE = StandardTokenizer::TOKEN_TYPES()[StandardTokenizer::ACRONYM] ); return _ACRONYM_TYPE; } bool StandardFilter::incrementToken() { if (!input->incrementToken()) { return false; } wchar_t* termBuffer = termAtt->termBufferArray(); int32_t bufferLength = termAtt->termLength(); String type(typeAtt->type()); if (type == APOSTROPHE_TYPE() && bufferLength >= 2 && termBuffer[bufferLength - 2] == L'\'' && (termBuffer[bufferLength - 1] == L's' || termBuffer[bufferLength - 1] == L'S')) { // remove 's // Strip last 2 characters off termAtt->setTermLength(bufferLength - 2); } else if (type == ACRONYM_TYPE()) { // remove dots int32_t upto = 0; for (int32_t i = 0; i < bufferLength; ++i) { wchar_t c = termBuffer[i]; if (c != L'.') { termBuffer[upto++] = c; } } termAtt->setTermLength(upto); } return true; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/standard/StandardTokenizer.cpp000066400000000000000000000115601456444476200266340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StandardTokenizer.h" #include "StandardTokenizerImpl.h" #include "StandardAnalyzer.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "PositionIncrementAttribute.h" #include "TypeAttribute.h" namespace Lucene { const int32_t StandardTokenizer::ALPHANUM = 0; const int32_t StandardTokenizer::APOSTROPHE = 1; const int32_t StandardTokenizer::ACRONYM = 2; const int32_t StandardTokenizer::COMPANY = 3; const int32_t StandardTokenizer::EMAIL = 4; const int32_t StandardTokenizer::HOST = 5; const int32_t StandardTokenizer::NUM = 6; const int32_t StandardTokenizer::CJ = 7; /// @deprecated this solves a bug where HOSTs that end with '.' are identified as ACRONYMs. const int32_t StandardTokenizer::ACRONYM_DEP = 8; StandardTokenizer::StandardTokenizer(LuceneVersion::Version matchVersion, const ReaderPtr& input) { this->scanner = newLucene(input); init(input, matchVersion); } StandardTokenizer::StandardTokenizer(LuceneVersion::Version matchVersion, const AttributeSourcePtr& source, const ReaderPtr& input) : Tokenizer(source) { this->scanner = newLucene(input); init(input, matchVersion); } StandardTokenizer::StandardTokenizer(LuceneVersion::Version matchVersion, const AttributeFactoryPtr& factory, const ReaderPtr& input) : Tokenizer(factory) { this->scanner = newLucene(input); init(input, matchVersion); } StandardTokenizer::~StandardTokenizer() { } const Collection StandardTokenizer::TOKEN_TYPES() { static Collection _TOKEN_TYPES; LUCENE_RUN_ONCE( _TOKEN_TYPES = newCollection( L"", L"", L"", L"", L"", L"", L"", L"", L"" ); ); return _TOKEN_TYPES; } void StandardTokenizer::init(const ReaderPtr& input, LuceneVersion::Version matchVersion) { replaceInvalidAcronym = LuceneVersion::onOrAfter(matchVersion, LuceneVersion::LUCENE_24); maxTokenLength = StandardAnalyzer::DEFAULT_MAX_TOKEN_LENGTH; this->input = input; termAtt = addAttribute(); offsetAtt = addAttribute(); posIncrAtt = addAttribute(); typeAtt = addAttribute(); } void StandardTokenizer::setMaxTokenLength(int32_t length) { this->maxTokenLength = length; } int32_t StandardTokenizer::getMaxTokenLength() { return maxTokenLength; } bool StandardTokenizer::incrementToken() { clearAttributes(); int32_t posIncr = 1; while (true) { int32_t tokenType = scanner->getNextToken(); if (tokenType == StandardTokenizerImpl::YYEOF) { return false; } if (scanner->yylength() <= maxTokenLength) { posIncrAtt->setPositionIncrement(posIncr); scanner->getText(termAtt); int32_t start = scanner->yychar(); offsetAtt->setOffset(correctOffset(start), correctOffset(start + termAtt->termLength())); // This 'if' should be removed in the next release. For now, it converts invalid acronyms to HOST. /// When removed, only the 'else' part should remain. if (tokenType == ACRONYM_DEP) { if (replaceInvalidAcronym) { typeAtt->setType(TOKEN_TYPES()[HOST]); termAtt->setTermLength(termAtt->termLength() - 1); // remove extra '.' } else { typeAtt->setType(TOKEN_TYPES()[ACRONYM]); } } else { typeAtt->setType(TOKEN_TYPES()[tokenType]); } return true; } else { // When we skip a too-long term, we still increment the position increment ++posIncr; } } } void StandardTokenizer::end() { // set final offset int32_t finalOffset = correctOffset(scanner->yychar() + scanner->yylength()); offsetAtt->setOffset(finalOffset, finalOffset); } void StandardTokenizer::reset(const ReaderPtr& input) { Tokenizer::reset(input); scanner->reset(input); } bool StandardTokenizer::isReplaceInvalidAcronym() { return replaceInvalidAcronym; } void StandardTokenizer::setReplaceInvalidAcronym(bool replaceInvalidAcronym) { this->replaceInvalidAcronym = replaceInvalidAcronym; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/standard/StandardTokenizerImpl.cpp000066400000000000000000000444051456444476200274620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StandardTokenizerImpl.h" #include "StandardTokenizer.h" #include "Reader.h" #include "Token.h" #include "TermAttribute.h" #include "MiscUtils.h" #include namespace Lucene { /// Initial size of the lookahead buffer const int32_t StandardTokenizerImpl::ZZ_BUFFERSIZE = 16384; /// Translates characters to character classes CharArray StandardTokenizerImpl::_ZZ_CMAP; const wchar_t StandardTokenizerImpl::ZZ_CMAP_PACKED[] = { L"\11\0\1\0\1\15\1\0\1\0\1\14\22\0\1\0\5\0\1\5" L"\1\3\4\0\1\11\1\7\1\4\1\11\12\2\6\0\1\6\32\12" L"\4\0\1\10\1\0\32\12\57\0\1\12\12\0\1\12\4\0\1\12" L"\5\0\27\12\1\0\37\12\1\0\u0128\12\2\0\22\12\34\0\136\12" L"\2\0\11\12\2\0\7\12\16\0\2\12\16\0\5\12\11\0\1\12" L"\213\0\1\12\13\0\1\12\1\0\3\12\1\0\1\12\1\0\24\12" L"\1\0\54\12\1\0\10\12\2\0\32\12\14\0\202\12\12\0\71\12" L"\2\0\2\12\2\0\2\12\3\0\46\12\2\0\2\12\67\0\46\12" L"\2\0\1\12\7\0\47\12\110\0\33\12\5\0\3\12\56\0\32\12" L"\5\0\13\12\25\0\12\2\7\0\143\12\1\0\1\12\17\0\2\12" L"\11\0\12\2\3\12\23\0\1\12\1\0\33\12\123\0\46\12\u015f\0" L"\65\12\3\0\1\12\22\0\1\12\7\0\12\12\4\0\12\2\25\0" L"\10\12\2\0\2\12\2\0\26\12\1\0\7\12\1\0\1\12\3\0" L"\4\12\42\0\2\12\1\0\3\12\4\0\12\2\2\12\23\0\6\12" L"\4\0\2\12\2\0\26\12\1\0\7\12\1\0\2\12\1\0\2\12" L"\1\0\2\12\37\0\4\12\1\0\1\12\7\0\12\2\2\0\3\12" L"\20\0\7\12\1\0\1\12\1\0\3\12\1\0\26\12\1\0\7\12" L"\1\0\2\12\1\0\5\12\3\0\1\12\22\0\1\12\17\0\1\12" L"\5\0\12\2\25\0\10\12\2\0\2\12\2\0\26\12\1\0\7\12" L"\1\0\2\12\2\0\4\12\3\0\1\12\36\0\2\12\1\0\3\12" L"\4\0\12\2\25\0\6\12\3\0\3\12\1\0\4\12\3\0\2\12" L"\1\0\1\12\1\0\2\12\3\0\2\12\3\0\3\12\3\0\10\12" L"\1\0\3\12\55\0\11\2\25\0\10\12\1\0\3\12\1\0\27\12" L"\1\0\12\12\1\0\5\12\46\0\2\12\4\0\12\2\25\0\10\12" L"\1\0\3\12\1\0\27\12\1\0\12\12\1\0\5\12\44\0\1\12" L"\1\0\2\12\4\0\12\2\25\0\10\12\1\0\3\12\1\0\27\12" L"\1\0\20\12\46\0\2\12\4\0\12\2\25\0\22\12\3\0\30\12" L"\1\0\11\12\1\0\1\12\2\0\7\12\71\0\1\1\60\12\1\1" L"\2\12\14\1\7\12\11\1\12\2\47\0\2\12\1\0\1\12\2\0" L"\2\12\1\0\1\12\2\0\1\12\6\0\4\12\1\0\7\12\1\0" L"\3\12\1\0\1\12\1\0\1\12\2\0\2\12\1\0\4\12\1\0" L"\2\12\11\0\1\12\2\0\5\12\1\0\1\12\11\0\12\2\2\0" L"\2\12\42\0\1\12\37\0\12\2\26\0\10\12\1\0\42\12\35\0" L"\4\12\164\0\42\12\1\0\5\12\1\0\2\12\25\0\12\2\6\0" L"\6\12\112\0\46\12\12\0\47\12\11\0\132\12\5\0\104\12\5\0" L"\122\12\6\0\7\12\1\0\77\12\1\0\1\12\1\0\4\12\2\0" L"\7\12\1\0\1\12\1\0\4\12\2\0\47\12\1\0\1\12\1\0" L"\4\12\2\0\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0" L"\1\12\1\0\4\12\2\0\7\12\1\0\7\12\1\0\27\12\1\0" L"\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0\47\12\1\0" L"\23\12\16\0\11\2\56\0\125\12\14\0\u026c\12\2\0\10\12\12\0" L"\32\12\5\0\113\12\225\0\64\12\54\0\12\2\46\0\12\2\6\0" L"\130\12\10\0\51\12\u0557\0\234\12\4\0\132\12\6\0\26\12\2\0" L"\6\12\2\0\46\12\2\0\6\12\2\0\10\12\1\0\1\12\1\0" L"\1\12\1\0\1\12\1\0\37\12\2\0\65\12\1\0\7\12\1\0" L"\1\12\3\0\3\12\1\0\7\12\3\0\4\12\2\0\6\12\4\0" L"\15\12\5\0\3\12\1\0\7\12\202\0\1\12\202\0\1\12\4\0" L"\1\12\2\0\12\12\1\0\1\12\3\0\5\12\6\0\1\12\1\0" L"\1\12\1\0\1\12\1\0\4\12\1\0\3\12\1\0\7\12\u0ecb\0" L"\2\12\52\0\5\12\12\0\1\13\124\13\10\13\2\13\2\13\132\13" L"\1\13\3\13\6\13\50\13\3\13\1\0\136\12\21\0\30\12\70\0" L"\20\13\u0100\0\200\13\200\0\u19b6\13\12\13\100\0\u51a6\13\132\13\u048d\12" L"\u0773\0\u2ba4\12\u215c\0\u012e\13\322\13\7\12\14\0\5\12\5\0\1\12" L"\1\0\12\12\1\0\15\12\1\0\5\12\1\0\1\12\1\0\2\12" L"\1\0\2\12\1\0\154\12\41\0\u016b\12\22\0\100\12\2\0\66\12" L"\50\0\14\12\164\0\3\12\1\0\1\12\1\0\207\12\23\0\12\2" L"\7\0\32\12\6\0\32\12\12\0\1\13\72\13\37\12\3\0\6\12" L"\2\0\6\12\2\0\6\12\2\0\3\12\43\0" }; const int32_t StandardTokenizerImpl::ZZ_CMAP_LENGTH = 65536; const int32_t StandardTokenizerImpl::ZZ_CMAP_PACKED_LENGTH = 1154; IntArray StandardTokenizerImpl::_ZZ_ACTION; const wchar_t StandardTokenizerImpl::ZZ_ACTION_PACKED_0[] = { L"\1\0\1\1\3\2\1\3\1\1\13\0\1\2\3\4" L"\2\0\1\5\1\0\1\5\3\4\6\5\1\6\1\4" L"\2\7\1\10\1\0\1\10\3\0\2\10\1\11\1\12" L"\1\4" }; const int32_t StandardTokenizerImpl::ZZ_ACTION_LENGTH = 51; const int32_t StandardTokenizerImpl::ZZ_ACTION_PACKED_LENGTH = 50; IntArray StandardTokenizerImpl::_ZZ_ROWMAP; const wchar_t StandardTokenizerImpl::ZZ_ROWMAP_PACKED_0[] = { L"\0\0\0\16\0\34\0\52\0\70\0\16\0\106\0\124" L"\0\142\0\160\0\176\0\214\0\232\0\250\0\266\0\304" L"\0\322\0\340\0\356\0\374\0\u010a\0\u0118\0\u0126\0\u0134" L"\0\u0142\0\u0150\0\u015e\0\u016c\0\u017a\0\u0188\0\u0196\0\u01a4" L"\0\u01b2\0\u01c0\0\u01ce\0\u01dc\0\u01ea\0\u01f8\0\322\0\u0206" L"\0\u0214\0\u0222\0\u0230\0\u023e\0\u024c\0\u025a\0\124\0\214" L"\0\u0268\0\u0276\0\u0284" }; const int32_t StandardTokenizerImpl::ZZ_ROWMAP_LENGTH = 51; const int32_t StandardTokenizerImpl::ZZ_ROWMAP_PACKED_LENGTH = 102; IntArray StandardTokenizerImpl::_ZZ_TRANS; const wchar_t StandardTokenizerImpl::ZZ_TRANS_PACKED_0[] = { L"\1\2\1\3\1\4\7\2\1\5\1\6\1\7\1\2" L"\17\0\2\3\1\0\1\10\1\0\1\11\2\12\1\13" L"\1\3\4\0\1\3\1\4\1\0\1\14\1\0\1\11" L"\2\15\1\16\1\4\4\0\1\3\1\4\1\17\1\20" L"\1\21\1\22\2\12\1\13\1\23\20\0\1\2\1\0" L"\1\24\1\25\7\0\1\26\4\0\2\27\7\0\1\27" L"\4\0\1\30\1\31\7\0\1\32\5\0\1\33\7\0" L"\1\13\4\0\1\34\1\35\7\0\1\36\4\0\1\37" L"\1\40\7\0\1\41\4\0\1\42\1\43\7\0\1\44" L"\15\0\1\45\4\0\1\24\1\25\7\0\1\46\15\0" L"\1\47\4\0\2\27\7\0\1\50\4\0\1\3\1\4" L"\1\17\1\10\1\21\1\22\2\12\1\13\1\23\4\0" L"\2\24\1\0\1\51\1\0\1\11\2\52\1\0\1\24" L"\4\0\1\24\1\25\1\0\1\53\1\0\1\11\2\54" L"\1\55\1\25\4\0\1\24\1\25\1\0\1\51\1\0" L"\1\11\2\52\1\0\1\26\4\0\2\27\1\0\1\56" L"\2\0\1\56\2\0\1\27\4\0\2\30\1\0\1\52" L"\1\0\1\11\2\52\1\0\1\30\4\0\1\30\1\31" L"\1\0\1\54\1\0\1\11\2\54\1\55\1\31\4\0" L"\1\30\1\31\1\0\1\52\1\0\1\11\2\52\1\0" L"\1\32\5\0\1\33\1\0\1\55\2\0\3\55\1\33" L"\4\0\2\34\1\0\1\57\1\0\1\11\2\12\1\13" L"\1\34\4\0\1\34\1\35\1\0\1\60\1\0\1\11" L"\2\15\1\16\1\35\4\0\1\34\1\35\1\0\1\57" L"\1\0\1\11\2\12\1\13\1\36\4\0\2\37\1\0" L"\1\12\1\0\1\11\2\12\1\13\1\37\4\0\1\37" L"\1\40\1\0\1\15\1\0\1\11\2\15\1\16\1\40" L"\4\0\1\37\1\40\1\0\1\12\1\0\1\11\2\12" L"\1\13\1\41\4\0\2\42\1\0\1\13\2\0\3\13" L"\1\42\4\0\1\42\1\43\1\0\1\16\2\0\3\16" L"\1\43\4\0\1\42\1\43\1\0\1\13\2\0\3\13" L"\1\44\6\0\1\17\6\0\1\45\4\0\1\24\1\25" L"\1\0\1\61\1\0\1\11\2\52\1\0\1\26\4\0" L"\2\27\1\0\1\56\2\0\1\56\2\0\1\50\4\0" L"\2\24\7\0\1\24\4\0\2\30\7\0\1\30\4\0" L"\2\34\7\0\1\34\4\0\2\37\7\0\1\37\4\0" L"\2\42\7\0\1\42\4\0\2\62\7\0\1\62\4\0" L"\2\24\7\0\1\63\4\0\2\62\1\0\1\56\2\0" L"\1\56\2\0\1\62\4\0\2\24\1\0\1\61\1\0" L"\1\11\2\52\1\0\1\24\3\0" }; const int32_t StandardTokenizerImpl::ZZ_TRANS_LENGTH = 658; const int32_t StandardTokenizerImpl::ZZ_TRANS_PACKED_LENGTH = 634; const int32_t StandardTokenizerImpl::ZZ_UNKNOWN_ERROR = 0; const int32_t StandardTokenizerImpl::ZZ_NO_MATCH = 1; const int32_t StandardTokenizerImpl::ZZ_PUSHBACK_2BIG = 2; const wchar_t* StandardTokenizerImpl::ZZ_ERROR_MSG[] = { L"Unknown internal scanner error", L"Error: could not match input", L"Error: pushback value was too large" }; IntArray StandardTokenizerImpl::_ZZ_ATTRIBUTE; const wchar_t StandardTokenizerImpl::ZZ_ATTRIBUTE_PACKED_0[] = { L"\1\0\1\11\3\1\1\11\1\1\13\0\4\1\2\0" L"\1\1\1\0\17\1\1\0\1\1\3\0\5\1" }; const int32_t StandardTokenizerImpl::ZZ_ATTRIBUTE_LENGTH = 51; const int32_t StandardTokenizerImpl::ZZ_ATTRIBUTE_PACKED_LENGTH = 30; /// This character denotes the end of file const int32_t StandardTokenizerImpl::YYEOF = -1; /// Lexical states const int32_t StandardTokenizerImpl::YYINITIAL = 0; StandardTokenizerImpl::StandardTokenizerImpl(const ReaderPtr& in) { this->zzState = 0; this->zzLexicalState = YYINITIAL; this->zzBuffer = CharArray::newInstance(ZZ_BUFFERSIZE); this->zzMarkedPos = 0; this->zzPushbackPos = 0; this->zzCurrentPos = 0; this->zzStartRead = 0; this->zzEndRead = 0; this->yyline = 0; this->_yychar = 0; this->yycolumn = 0; this->zzAtBOL = true; this->zzAtEOF = false; this->zzReader = in; } StandardTokenizerImpl::~StandardTokenizerImpl() { } void StandardTokenizerImpl::ZZ_CMAP_INIT() { _ZZ_CMAP = CharArray::newInstance(ZZ_CMAP_LENGTH); wchar_t* result = _ZZ_CMAP.get(); int32_t i = 0; // index in packed string int32_t j = 0; // index in unpacked array while (i < ZZ_CMAP_PACKED_LENGTH) { int32_t count = ZZ_CMAP_PACKED[i++]; wchar_t value = ZZ_CMAP_PACKED[i++]; do { result[j++] = value; } while (--count > 0); } } const wchar_t* StandardTokenizerImpl::ZZ_CMAP() { static boost::once_flag once = BOOST_ONCE_INIT; boost::call_once(once, ZZ_CMAP_INIT); return _ZZ_CMAP.get(); } void StandardTokenizerImpl::ZZ_ACTION_INIT() { _ZZ_ACTION = IntArray::newInstance(ZZ_ACTION_LENGTH); int32_t* result = _ZZ_ACTION.get(); int32_t i = 0; // index in packed string int32_t j = 0; // index in unpacked array while (i < ZZ_ACTION_PACKED_LENGTH) { int32_t count = ZZ_ACTION_PACKED_0[i++]; int32_t value = ZZ_ACTION_PACKED_0[i++]; do { result[j++] = value; } while (--count > 0); } } const int32_t* StandardTokenizerImpl::ZZ_ACTION() { static boost::once_flag once = BOOST_ONCE_INIT; boost::call_once(once, ZZ_ACTION_INIT); return _ZZ_ACTION.get(); } void StandardTokenizerImpl::ZZ_ROWMAP_INIT() { _ZZ_ROWMAP = IntArray::newInstance(ZZ_ROWMAP_LENGTH); int32_t* result = _ZZ_ROWMAP.get(); int32_t i = 0; // index in packed string int32_t j = 0; // index in unpacked array while (i < ZZ_ROWMAP_PACKED_LENGTH) { int32_t high = ZZ_ROWMAP_PACKED_0[i++] << 16; result[j++] = high | ZZ_ROWMAP_PACKED_0[i++]; } } const int32_t* StandardTokenizerImpl::ZZ_ROWMAP() { static boost::once_flag once = BOOST_ONCE_INIT; boost::call_once(once, ZZ_ROWMAP_INIT); return _ZZ_ROWMAP.get(); } void StandardTokenizerImpl::ZZ_TRANS_INIT() { _ZZ_TRANS = IntArray::newInstance(ZZ_TRANS_LENGTH); int32_t* result = _ZZ_TRANS.get(); int32_t i = 0; // index in packed string int32_t j = 0; // index in unpacked array while (i < ZZ_TRANS_PACKED_LENGTH) { int32_t count = ZZ_TRANS_PACKED_0[i++]; int32_t value = ZZ_TRANS_PACKED_0[i++]; --value; do { result[j++] = value; } while (--count > 0); } } const int32_t* StandardTokenizerImpl::ZZ_TRANS() { static boost::once_flag once = BOOST_ONCE_INIT; boost::call_once(once, ZZ_TRANS_INIT); return _ZZ_TRANS.get(); } void StandardTokenizerImpl::ZZ_ATTRIBUTE_INIT() { _ZZ_ATTRIBUTE = IntArray::newInstance(ZZ_ATTRIBUTE_LENGTH); int32_t* result = _ZZ_ATTRIBUTE.get(); int32_t i = 0; // index in packed string int32_t j = 0; // index in unpacked array while (i < ZZ_ATTRIBUTE_PACKED_LENGTH) { int32_t count = ZZ_ATTRIBUTE_PACKED_0[i++]; int32_t value = ZZ_ATTRIBUTE_PACKED_0[i++]; do { result[j++] = value; } while (--count > 0); } } const int32_t* StandardTokenizerImpl::ZZ_ATTRIBUTE() { static boost::once_flag once = BOOST_ONCE_INIT; boost::call_once(once, ZZ_ATTRIBUTE_INIT); return _ZZ_ATTRIBUTE.get(); } int32_t StandardTokenizerImpl::yychar() { return _yychar; } void StandardTokenizerImpl::reset(const ReaderPtr& r) { // reset to default buffer size, if buffer has grown if (zzBuffer.size() > ZZ_BUFFERSIZE) { zzBuffer.resize(ZZ_BUFFERSIZE); } yyreset(r); } void StandardTokenizerImpl::getText(const TokenPtr& t) { t->setTermBuffer(zzBuffer.get(), zzStartRead, zzMarkedPos - zzStartRead); } void StandardTokenizerImpl::getText(const TermAttributePtr& t) { t->setTermBuffer(zzBuffer.get(), zzStartRead, zzMarkedPos - zzStartRead); } bool StandardTokenizerImpl::zzRefill() { // first: make room (if you can) if (zzStartRead > 0) { MiscUtils::arrayCopy(zzBuffer.get(), zzStartRead, zzBuffer.get(), 0, zzEndRead - zzStartRead); // translate stored positions zzEndRead -= zzStartRead; zzCurrentPos -= zzStartRead; zzMarkedPos -= zzStartRead; zzPushbackPos -= zzStartRead; zzStartRead = 0; } // is the buffer big enough? if (zzCurrentPos >= zzBuffer.size()) { zzBuffer.resize(zzCurrentPos * 2); } // finally: fill the buffer with new input int32_t numRead = zzReader->read(zzBuffer.get(), zzEndRead, zzBuffer.size() - zzEndRead); if (numRead < 0) { return true; } else { zzEndRead += numRead; return false; } } void StandardTokenizerImpl::yyclose() { zzAtEOF = true; // indicate end of file zzEndRead = zzStartRead; // invalidate buffer if (zzReader) { zzReader->close(); } } void StandardTokenizerImpl::yyreset(const ReaderPtr& reader) { zzReader = reader; zzAtBOL = true; zzAtEOF = false; zzEndRead = 0; zzStartRead = 0; zzCurrentPos = 0; zzMarkedPos = 0; zzPushbackPos = 0; yyline = 0; _yychar = 0; yycolumn = 0; zzLexicalState = YYINITIAL; } int32_t StandardTokenizerImpl::yystate() { return zzLexicalState; } void StandardTokenizerImpl::yybegin(int32_t newState) { zzLexicalState = newState; } String StandardTokenizerImpl::yytext() { return String(zzBuffer.get() + zzStartRead, zzMarkedPos - zzStartRead); } wchar_t StandardTokenizerImpl::yycharat(int32_t pos) { return zzBuffer[zzStartRead + pos]; } int32_t StandardTokenizerImpl::yylength() { return zzMarkedPos - zzStartRead; } void StandardTokenizerImpl::zzScanError(int32_t errorCode) { boost::throw_exception(ParseException(ZZ_ERROR_MSG[errorCode])); } void StandardTokenizerImpl::yypushback(int32_t number) { if (number > yylength()) { zzScanError(ZZ_PUSHBACK_2BIG); } zzMarkedPos -= number; } int32_t StandardTokenizerImpl::getNextToken() { int32_t zzInput; int32_t zzAction; // cached fields int32_t zzCurrentPosL; int32_t zzMarkedPosL; int32_t zzEndReadL = zzEndRead; wchar_t* zzBufferL = zzBuffer.get(); const wchar_t* zzCMapL = ZZ_CMAP(); // This code was originally written in Java, which uses UTF-16, and it can't // correctly deal with 32bit wchar_t and characters outside of the Basic // Multilingual Plane. As a workaround to prevent crashes, treat all // characters above U+FFFF as letters in the tokenizer. // See https://github.com/luceneplusplus/LucenePlusPlus/issues/57 const wchar_t zzCMapFallback = zzCMapL['A']; #ifdef LPP_UNICODE_CHAR_SIZE_4 #define zzCMap_at(n) ((n) > 0xFFFF ? zzCMapFallback : zzCMapL[n]) #else // If the 16-bit value is in [0xD800, 0xDFFF], it is part of a multi-byte // UTF-16 character and its UTF code point is > U+FFFF, so handle as above. #define zzCMap_at(n) (((n) & 0xF800) == 0xD800 ? zzCMapFallback : zzCMapL[n]) #endif const int32_t* zzTransL = ZZ_TRANS(); const int32_t* zzRowMapL = ZZ_ROWMAP(); const int32_t* zzAttrL = ZZ_ATTRIBUTE(); const int32_t* zzActionL = ZZ_ACTION(); while (true) { zzMarkedPosL = zzMarkedPos; _yychar += zzMarkedPosL - zzStartRead; zzAction = -1; zzCurrentPosL = zzMarkedPosL; zzCurrentPos = zzMarkedPosL; zzStartRead = zzMarkedPosL; zzState = zzLexicalState; while (true) { if (zzCurrentPosL < zzEndReadL) { zzInput = zzBufferL[zzCurrentPosL++]; } else if (zzAtEOF) { zzInput = YYEOF; break; } else { // store back cached positions zzCurrentPos = zzCurrentPosL; zzMarkedPos = zzMarkedPosL; bool eof = zzRefill(); // get translated positions and possibly new buffer zzCurrentPosL = zzCurrentPos; zzMarkedPosL = zzMarkedPos; zzBufferL = zzBuffer.get(); zzEndReadL = zzEndRead; if (eof) { zzInput = YYEOF; break; } else { zzInput = zzBufferL[zzCurrentPosL++]; } } int32_t zzNext = zzTransL[zzRowMapL[zzState] + zzCMap_at(zzInput)]; if (zzNext == -1) { break; } zzState = zzNext; int32_t zzAttributes = zzAttrL[zzState]; if ((zzAttributes & 1) == 1) { zzAction = zzState; zzMarkedPosL = zzCurrentPosL; if ((zzAttributes & 8) == 8) { break; } } } // store back cached position zzMarkedPos = zzMarkedPosL; switch (zzAction < 0 ? zzAction : zzActionL[zzAction]) { case 4: return StandardTokenizer::HOST; case 11: break; case 9: return StandardTokenizer::ACRONYM; case 12: break; case 8: return StandardTokenizer::ACRONYM_DEP; case 13: break; case 1: // ignore case 14: break; case 5: return StandardTokenizer::NUM; case 15: break; case 3: return StandardTokenizer::CJ; case 16: break; case 2: return StandardTokenizer::ALPHANUM; case 17: break; case 7: return StandardTokenizer::COMPANY; case 18: break; case 6: return StandardTokenizer::APOSTROPHE; case 19: break; case 10: return StandardTokenizer::EMAIL; case 20: break; default: if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { zzAtEOF = true; return YYEOF; } else { zzScanError(ZZ_NO_MATCH); } } } return YYINITIAL; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/tokenattributes/000077500000000000000000000000001456444476200241215ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/analysis/tokenattributes/FlagsAttribute.cpp000066400000000000000000000031551456444476200275510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FlagsAttribute.h" #include "StringUtils.h" namespace Lucene { FlagsAttribute::FlagsAttribute() { flags = 0; } FlagsAttribute::~FlagsAttribute() { } String FlagsAttribute::toString() { return L"flags=" + StringUtils::toString(flags); } int32_t FlagsAttribute::getFlags() { return flags; } void FlagsAttribute::setFlags(int32_t flags) { this->flags = flags; } void FlagsAttribute::clear() { flags = 0; } bool FlagsAttribute::equals(const LuceneObjectPtr& other) { if (Attribute::equals(other)) { return true; } FlagsAttributePtr otherFlagsAttribute(boost::dynamic_pointer_cast(other)); if (otherFlagsAttribute) { return (otherFlagsAttribute->flags == flags); } return false; } int32_t FlagsAttribute::hashCode() { return flags; } void FlagsAttribute::copyTo(const AttributePtr& target) { boost::dynamic_pointer_cast(target)->setFlags(flags); } LuceneObjectPtr FlagsAttribute::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(); FlagsAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); cloneAttribute->flags = flags; return cloneAttribute; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/tokenattributes/OffsetAttribute.cpp000066400000000000000000000042461456444476200277450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "OffsetAttribute.h" #include "StringUtils.h" namespace Lucene { OffsetAttribute::OffsetAttribute() { _startOffset = 0; _endOffset = 0; } OffsetAttribute::~OffsetAttribute() { } String OffsetAttribute::toString() { return L"startOffset=" + StringUtils::toString(_startOffset) + L";endOffset=" + StringUtils::toString(_endOffset); } int32_t OffsetAttribute::startOffset() { return _startOffset; } void OffsetAttribute::setOffset(int32_t startOffset, int32_t endOffset) { this->_startOffset = startOffset; this->_endOffset = endOffset; } int32_t OffsetAttribute::endOffset() { return _endOffset; } void OffsetAttribute::clear() { _startOffset = 0; _endOffset = 0; } bool OffsetAttribute::equals(const LuceneObjectPtr& other) { if (Attribute::equals(other)) { return true; } OffsetAttributePtr otherOffsetAttribute(boost::dynamic_pointer_cast(other)); if (otherOffsetAttribute) { return (otherOffsetAttribute->_startOffset == _startOffset && otherOffsetAttribute->_endOffset == _endOffset); } return false; } int32_t OffsetAttribute::hashCode() { int32_t code = _startOffset; code = code * 31 + _endOffset; return code; } void OffsetAttribute::copyTo(const AttributePtr& target) { OffsetAttributePtr targetOffsetAttribute(boost::dynamic_pointer_cast(target)); targetOffsetAttribute->setOffset(_startOffset, _endOffset); } LuceneObjectPtr OffsetAttribute::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(); OffsetAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); cloneAttribute->_startOffset = _startOffset; cloneAttribute->_endOffset = _endOffset; return cloneAttribute; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/tokenattributes/PayloadAttribute.cpp000066400000000000000000000042041456444476200301020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PayloadAttribute.h" #include "Payload.h" #include "StringUtils.h" namespace Lucene { PayloadAttribute::PayloadAttribute() { } PayloadAttribute::PayloadAttribute(const PayloadPtr& payload) { this->payload = payload; } PayloadAttribute::~PayloadAttribute() { } String PayloadAttribute::toString() { return L"payload(length)=" + StringUtils::toString(payload->length()); } PayloadPtr PayloadAttribute::getPayload() { return this->payload; } void PayloadAttribute::setPayload(const PayloadPtr& payload) { this->payload = payload; } void PayloadAttribute::clear() { payload.reset(); } LuceneObjectPtr PayloadAttribute::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = Attribute::clone(other ? other : newLucene()); PayloadAttributePtr cloneAttribute(boost::dynamic_pointer_cast(clone)); if (payload) { cloneAttribute->payload = boost::dynamic_pointer_cast(payload->clone()); } return cloneAttribute; } bool PayloadAttribute::equals(const LuceneObjectPtr& other) { if (Attribute::equals(other)) { return true; } PayloadAttributePtr otherAttribute(boost::dynamic_pointer_cast(other)); if (otherAttribute) { if (!otherAttribute->payload && !payload) { return true; } return otherAttribute->payload->equals(payload); } return false; } int32_t PayloadAttribute::hashCode() { return payload ? payload->hashCode() : 0; } void PayloadAttribute::copyTo(const AttributePtr& target) { PayloadAttributePtr targetPayloadAttribute(boost::dynamic_pointer_cast(target)); targetPayloadAttribute->setPayload(payload ? boost::dynamic_pointer_cast(payload->clone()) : PayloadPtr()); } } LucenePlusPlus-rel_3.0.9/src/core/analysis/tokenattributes/PositionIncrementAttribute.cpp000066400000000000000000000045371456444476200321730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PositionIncrementAttribute.h" #include "StringUtils.h" namespace Lucene { PositionIncrementAttribute::PositionIncrementAttribute() { positionIncrement = 1; } PositionIncrementAttribute::~PositionIncrementAttribute() { } String PositionIncrementAttribute::toString() { return L"positionIncrement=" + StringUtils::toString(positionIncrement); } void PositionIncrementAttribute::setPositionIncrement(int32_t positionIncrement) { if (positionIncrement < 0) { boost::throw_exception(IllegalArgumentException(L"Increment must be zero or greater: " + StringUtils::toString(positionIncrement))); } this->positionIncrement = positionIncrement; } int32_t PositionIncrementAttribute::getPositionIncrement() { return positionIncrement; } void PositionIncrementAttribute::clear() { this->positionIncrement = 1; } bool PositionIncrementAttribute::equals(const LuceneObjectPtr& other) { if (Attribute::equals(other)) { return true; } PositionIncrementAttributePtr otherPositionIncrementAttribute(boost::dynamic_pointer_cast(other)); if (otherPositionIncrementAttribute) { return positionIncrement == otherPositionIncrementAttribute->positionIncrement; } return false; } int32_t PositionIncrementAttribute::hashCode() { return positionIncrement; } void PositionIncrementAttribute::copyTo(const AttributePtr& target) { PositionIncrementAttributePtr targetPositionIncrementAttribute(boost::dynamic_pointer_cast(target)); targetPositionIncrementAttribute->setPositionIncrement(positionIncrement); } LuceneObjectPtr PositionIncrementAttribute::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(); PositionIncrementAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); cloneAttribute->positionIncrement = positionIncrement; return cloneAttribute; } } LucenePlusPlus-rel_3.0.9/src/core/analysis/tokenattributes/TermAttribute.cpp000066400000000000000000000106461456444476200274270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermAttribute.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { const int32_t TermAttribute::MIN_BUFFER_SIZE = 10; TermAttribute::TermAttribute() { _termLength = 0; } TermAttribute::~TermAttribute() { } String TermAttribute::toString() { return L"term=" + term(); } String TermAttribute::term() { initTermBuffer(); return String(_termBuffer.get(), _termLength); } void TermAttribute::setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length) { growTermBuffer(length); MiscUtils::arrayCopy(buffer, offset, _termBuffer.get(), 0, length); _termLength = length; } void TermAttribute::setTermBuffer(const String& buffer) { int32_t length = (int32_t)buffer.size(); growTermBuffer(length); MiscUtils::arrayCopy(buffer.begin(), 0, _termBuffer.get(), 0, length); _termLength = length; } CharArray TermAttribute::termBuffer() { if (!_termBuffer) { initTermBuffer(); } return _termBuffer; } wchar_t* TermAttribute::termBufferArray() { if (!_termBuffer) { initTermBuffer(); } return _termBuffer.get(); } CharArray TermAttribute::resizeTermBuffer(int32_t newSize) { if (!_termBuffer) { // The buffer is always at least MIN_BUFFER_SIZE _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(std::max(newSize, MIN_BUFFER_SIZE))); } else if (_termBuffer.size() < newSize) { _termBuffer.resize(MiscUtils::getNextSize(newSize)); } return _termBuffer; } void TermAttribute::growTermBuffer(int32_t newSize) { if (!_termBuffer) { // The buffer is always at least MIN_BUFFER_SIZE _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(std::max(newSize, MIN_BUFFER_SIZE))); } else if (_termBuffer.size() < newSize) { _termBuffer.resize(MiscUtils::getNextSize(newSize)); } } void TermAttribute::initTermBuffer() { if (!_termBuffer) { _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(MIN_BUFFER_SIZE)); _termLength = 0; } } int32_t TermAttribute::termLength() { return _termLength; } void TermAttribute::setTermLength(int32_t length) { if (!_termBuffer) { initTermBuffer(); } if (length > _termBuffer.size()) { boost::throw_exception(IllegalArgumentException(L"length " + StringUtils::toString(length) + L" exceeds the size of the termBuffer (" + StringUtils::toString(_termBuffer.size()) + L")")); } _termLength = length; } int32_t TermAttribute::hashCode() { initTermBuffer(); int32_t code = _termLength; code = code * 31 + MiscUtils::hashCode(_termBuffer.get(), 0, _termLength); return code; } void TermAttribute::clear() { _termLength = 0; } LuceneObjectPtr TermAttribute::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = Attribute::clone(other ? other : newLucene()); TermAttributePtr cloneAttribute(boost::dynamic_pointer_cast(clone)); cloneAttribute->_termLength = _termLength; if (_termBuffer) { cloneAttribute->_termBuffer = CharArray::newInstance(_termBuffer.size()); MiscUtils::arrayCopy(_termBuffer.get(), 0, cloneAttribute->_termBuffer.get(), 0, _termBuffer.size()); } return cloneAttribute; } bool TermAttribute::equals(const LuceneObjectPtr& other) { if (Attribute::equals(other)) { return true; } TermAttributePtr otherTermAttribute(boost::dynamic_pointer_cast(other)); if (otherTermAttribute) { initTermBuffer(); otherTermAttribute->initTermBuffer(); if (_termLength != otherTermAttribute->_termLength) { return false; } return (std::memcmp(_termBuffer.get(), otherTermAttribute->_termBuffer.get(), _termLength) == 0); } return false; } void TermAttribute::copyTo(const AttributePtr& target) { initTermBuffer(); TermAttributePtr targetTermAttribute(boost::dynamic_pointer_cast(target)); targetTermAttribute->setTermBuffer(_termBuffer.get(), 0, _termLength); } } LucenePlusPlus-rel_3.0.9/src/core/analysis/tokenattributes/TypeAttribute.cpp000066400000000000000000000034441456444476200274370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TypeAttribute.h" #include "StringUtils.h" namespace Lucene { TypeAttribute::TypeAttribute() { _type = DEFAULT_TYPE(); } TypeAttribute::TypeAttribute(const String& type) { _type = type; } TypeAttribute::~TypeAttribute() { } const String& TypeAttribute::DEFAULT_TYPE() { static String _DEFAULT_TYPE(L"word"); return _DEFAULT_TYPE; } String TypeAttribute::toString() { return L"type=" + _type; } String TypeAttribute::type() { return _type; } void TypeAttribute::setType(const String& type) { _type = type; } void TypeAttribute::clear() { _type = DEFAULT_TYPE(); } bool TypeAttribute::equals(const LuceneObjectPtr& other) { if (Attribute::equals(other)) { return true; } TypeAttributePtr otherTypeAttribute(boost::dynamic_pointer_cast(other)); if (otherTypeAttribute) { return (otherTypeAttribute->_type == _type); } return false; } int32_t TypeAttribute::hashCode() { return StringUtils::hashCode(_type); } void TypeAttribute::copyTo(const AttributePtr& target) { boost::dynamic_pointer_cast(target)->setType(_type); } LuceneObjectPtr TypeAttribute::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(); TypeAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); cloneAttribute->_type = _type; return cloneAttribute; } } LucenePlusPlus-rel_3.0.9/src/core/document/000077500000000000000000000000001456444476200206655ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/document/AbstractField.cpp000066400000000000000000000120771456444476200241070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "AbstractField.h" #include "Field.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { AbstractField::AbstractField() { this->_name = L"body"; this->storeTermVector = false; this->storeOffsetWithTermVector = false; this->storePositionWithTermVector = false; this->_omitNorms = false; this->_isStored = false; this->_isIndexed = true; this->_isTokenized = true; this->_isBinary = false; this->lazy = false; this->omitTermFreqAndPositions = false; this->boost = 1.0; this->fieldsData = VariantUtils::null(); this->binaryLength = 0; this->binaryOffset = 0; } AbstractField::AbstractField(const String& name, Field::Store store, Field::Index index, Field::TermVector termVector) { this->_name = name; this->_isStored = Field::isStored(store); this->_isIndexed = Field::isIndexed(index); this->_isTokenized = Field::isAnalyzed(index); this->_omitNorms = Field::omitNorms(index); this->_isBinary = false; this->lazy = false; this->omitTermFreqAndPositions = false; this->boost = 1.0; this->fieldsData = VariantUtils::null(); this->binaryLength = 0; this->binaryOffset = 0; setStoreTermVector(termVector); } AbstractField::~AbstractField() { } void AbstractField::setBoost(double boost) { this->boost = boost; } double AbstractField::getBoost() { return boost; } String AbstractField::name() { return _name; } void AbstractField::setStoreTermVector(Field::TermVector termVector) { this->storeTermVector = Field::isStored(termVector); this->storePositionWithTermVector = Field::withPositions(termVector); this->storeOffsetWithTermVector = Field::withOffsets(termVector); } bool AbstractField::isStored() { return _isStored; } bool AbstractField::isIndexed() { return _isIndexed; } bool AbstractField::isTokenized() { return _isTokenized; } bool AbstractField::isTermVectorStored() { return storeTermVector; } bool AbstractField::isStoreOffsetWithTermVector() { return storeOffsetWithTermVector; } bool AbstractField::isStorePositionWithTermVector() { return storePositionWithTermVector; } bool AbstractField::isBinary() { return _isBinary; } ByteArray AbstractField::getBinaryValue() { return getBinaryValue(ByteArray()); } ByteArray AbstractField::getBinaryValue(ByteArray result) { return VariantUtils::get(fieldsData); } int32_t AbstractField::getBinaryLength() { if (_isBinary) { return binaryLength; } ByteArray binary(VariantUtils::get(fieldsData)); return binary ? binary.size() : 0; } int32_t AbstractField::getBinaryOffset() { return binaryOffset; } bool AbstractField::getOmitNorms() { return _omitNorms; } bool AbstractField::getOmitTermFreqAndPositions() { return omitTermFreqAndPositions; } void AbstractField::setOmitNorms(bool omitNorms) { this->_omitNorms = omitNorms; } void AbstractField::setOmitTermFreqAndPositions(bool omitTermFreqAndPositions) { this->omitTermFreqAndPositions = omitTermFreqAndPositions; } bool AbstractField::isLazy() { return lazy; } String AbstractField::toString() { StringStream result; if (_isStored) { result << L"stored"; } if (_isIndexed) { if (!result.str().empty()) { result << L","; } result << L"indexed"; } if (_isTokenized) { if (!result.str().empty()) { result << L","; } result << L"tokenized"; } if (storeTermVector) { if (!result.str().empty()) { result << L","; } result << L"termVector"; } if (storeOffsetWithTermVector) { if (!result.str().empty()) { result << L","; } result << L"termVectorOffsets"; } if (storePositionWithTermVector) { if (!result.str().empty()) { result << L","; } result << L"termVectorPosition"; } if (_isBinary) { if (!result.str().empty()) { result << L","; } result << L"binary"; } if (_omitNorms) { result << L",omitNorms"; } if (omitTermFreqAndPositions) { result << L",omitTermFreqAndPositions"; } if (lazy) { result << L",lazy"; } result << L"<" << _name << L":"; if (VariantUtils::typeOf(fieldsData)) { result << VariantUtils::get(fieldsData); } else if (VariantUtils::typeOf(fieldsData)) { result << L"Reader"; } else if (VariantUtils::typeOf(fieldsData)) { result << L"Binary [size=" << StringUtils::toString(VariantUtils::get(fieldsData).size()) << L"]"; } result << L">"; return result.str(); } } LucenePlusPlus-rel_3.0.9/src/core/document/CompressionTools.cpp000066400000000000000000000116241456444476200247170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CompressionTools.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" #include #include #include #include #include namespace Lucene { const int32_t CompressionTools::COMPRESS_BUFFER = 4096; String ZLibToMessage(int32_t error) { if (error == boost::iostreams::zlib::okay) { return L"okay"; } else if (error == boost::iostreams::zlib::stream_end) { return L"stream_end"; } else if (error == boost::iostreams::zlib::stream_error) { return L"stream_error"; } else if (error == boost::iostreams::zlib::version_error) { return L"version_error"; } else if (error == boost::iostreams::zlib::data_error) { return L"data_error"; } else if (error == boost::iostreams::zlib::mem_error) { return L"mem_error"; } else if (error == boost::iostreams::zlib::buf_error ) { return L"buf_error"; } else { return L"unknown"; } } class BufferArraySink : public boost::iostreams::sink { public: BufferArraySink(ByteArray& _buffer, std::streamsize& _position, size_t allocSize) : buffer(_buffer), position(_position) { this->allocSize = allocSize; this->buffer.resize((int32_t)allocSize); } public: ByteArray& buffer; std::streamsize& position; private: size_t allocSize; public: std::streamsize write(const char* s, std::streamsize n) { if (position + n >= (std::streamsize)allocSize) { // grow buffer allocSize <<= 1; buffer.resize((int32_t)allocSize); } MiscUtils::arrayCopy(s, 0, buffer.get(), position, n); position += n; return n; } }; CompressionTools::~CompressionTools() { } ByteArray CompressionTools::compress(uint8_t* value, int32_t offset, int32_t length, int32_t compressionLevel) { // setup the outStream boost::iostreams::filtering_ostreambuf outStream; boost::iostreams::zlib_compressor zcompressor(compressionLevel); outStream.push(zcompressor); // and the output buffer ByteArray buffer(ByteArray::newInstance(COMPRESS_BUFFER)); std::streamsize position = 0; outStream.push(BufferArraySink(buffer, position, COMPRESS_BUFFER)); // setup the source stream, and then copy it to the outStream boost::iostreams::stream< boost::iostreams::array_source > source((char*)(value + offset), length); try { boost::iostreams::copy(source, outStream); } catch (boost::iostreams::zlib_error& err) { boost::throw_exception(CompressionException(L"deflate failure: " + ZLibToMessage(err.error()))); } buffer.resize((int32_t)position); return buffer; } ByteArray CompressionTools::compress(uint8_t* value, int32_t offset, int32_t length) { return compress(value, offset, length, boost::iostreams::zlib::best_compression); } ByteArray CompressionTools::compress(ByteArray value) { return compress(value.get(), 0, value.size(), boost::iostreams::zlib::best_compression); } ByteArray CompressionTools::compressString(const String& value) { return compressString(value, boost::iostreams::zlib::best_compression); } ByteArray CompressionTools::compressString(const String& value, int32_t compressionLevel) { UTF8ResultPtr utf8Result(newLucene()); StringUtils::toUTF8(value.c_str(), (int32_t)value.length(), utf8Result); return compress(utf8Result->result.get(), 0, utf8Result->length, compressionLevel); } ByteArray CompressionTools::decompress(ByteArray value) { // setup the outStream boost::iostreams::filtering_ostreambuf outStream; outStream.push(boost::iostreams::zlib_decompressor()); // and the output buffer ByteArray buffer(ByteArray::newInstance(COMPRESS_BUFFER)); std::streamsize position = 0; outStream.push(BufferArraySink(buffer, position, COMPRESS_BUFFER)); //setup the source stream, and then copy it to the outStream boost::iostreams::stream< boost::iostreams::array_source > source((char*)value.get(), value.size()); try { boost::iostreams::copy(source, outStream); } catch (boost::iostreams::zlib_error& err) { boost::throw_exception(CompressionException(L"deflate failure: " + ZLibToMessage(err.error()))); } buffer.resize((int32_t)position); return buffer; } String CompressionTools::decompressString(ByteArray value) { ByteArray bytes(decompress(value)); return StringUtils::toUnicode(bytes.get(), bytes.size()); } } LucenePlusPlus-rel_3.0.9/src/core/document/DateField.cpp000066400000000000000000000035731456444476200232220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DateField.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { DateField::~DateField() { } int32_t DateField::DATE_LEN() { static int32_t _DATE_LEN = 0; // make date strings long enough to last a millennium LUCENE_RUN_ONCE( _DATE_LEN = (int32_t)StringUtils::toString((int64_t)(1000 * 365 * 24) * (int64_t)(60 * 60 * 1000), StringUtils::CHARACTER_MAX_RADIX).length(); ); return _DATE_LEN; } const String& DateField::MIN_DATE_STRING() { static String _MIN_DATE_STRING; LUCENE_RUN_ONCE( _MIN_DATE_STRING = timeToString(0); ); return _MIN_DATE_STRING; } const String& DateField::MAX_DATE_STRING() { static String _MAX_DATE_STRING; LUCENE_RUN_ONCE( _MAX_DATE_STRING.resize(DATE_LEN()); std::fill(_MAX_DATE_STRING.begin(), _MAX_DATE_STRING.end(), L'z'); ); return _MAX_DATE_STRING; } String DateField::dateToString(const boost::posix_time::ptime& date) { return timeToString(MiscUtils::getTimeMillis(date)); } String DateField::timeToString(int64_t time) { if (time < 0) { boost::throw_exception(RuntimeException(L"time '" + StringUtils::toString(time) + L"' is too early, must be >= 0")); } String timeString(DATE_LEN(), L'0'); timeString += StringUtils::toString(time, StringUtils::CHARACTER_MAX_RADIX); return timeString.substr(timeString.length() - DATE_LEN(), DATE_LEN()); } int64_t DateField::stringToTime(const String& s) { return StringUtils::toLong(s, StringUtils::CHARACTER_MAX_RADIX); } } LucenePlusPlus-rel_3.0.9/src/core/document/DateTools.cpp000066400000000000000000000226461456444476200233010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include #include "DateTools.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { DateTools::DateOrder DateTools::dateOrder = DateTools::DATEORDER_LOCALE; DateTools::~DateTools() { } String DateTools::dateToString(const boost::posix_time::ptime& date, Resolution resolution) { return timeToString(MiscUtils::getTimeMillis(date), resolution); } String DateTools::timeToString(int64_t time, Resolution resolution) { std::string timeString(boost::posix_time::to_iso_string(boost::posix_time::ptime(boost::gregorian::date(1970, 1, 1), boost::posix_time::milliseconds(time)))); switch (resolution) { case RESOLUTION_YEAR: return StringUtils::toUnicode(timeString.substr(0, 4).c_str()); case RESOLUTION_MONTH: return StringUtils::toUnicode(timeString.substr(0, 6).c_str()); case RESOLUTION_DAY: return StringUtils::toUnicode(timeString.substr(0, 8).c_str()); case RESOLUTION_HOUR: return StringUtils::toUnicode(std::string(timeString.substr(0, 8) + timeString.substr(9, 2)).c_str()); case RESOLUTION_MINUTE: return StringUtils::toUnicode(std::string(timeString.substr(0, 8) + timeString.substr(9, 4)).c_str()); case RESOLUTION_SECOND: return StringUtils::toUnicode(std::string(timeString.substr(0, 8) + timeString.substr(9, 6)).c_str()); case RESOLUTION_MILLISECOND: { std::string fraction(timeString.length() > 16 ? timeString.substr(16, 3) : "000" ); return StringUtils::toUnicode(std::string(timeString.substr(0, 8) + timeString.substr(9, 6) + fraction).c_str()); } case RESOLUTION_NULL: // silence static analyzers break; } boost::throw_exception(IllegalArgumentException(L"unknown resolution '" + StringUtils::toString(resolution) + L"'")); return L""; } int64_t DateTools::stringToTime(const String& dateString) { return MiscUtils::getTimeMillis(stringToDate(dateString)); } boost::posix_time::ptime DateTools::stringToDate(const String& dateString) { uint16_t year = dateString.length() >= 4 ? (uint16_t)wcstol(dateString.substr(0, 4).c_str(), 0, 10) : 1970; uint16_t month = dateString.length() >= 6 ? (uint16_t)wcstol(dateString.substr(4, 2).c_str(), 0, 10) : 1; uint16_t day = dateString.length() >= 8 ? (uint16_t)wcstol(dateString.substr(6, 2).c_str(), 0, 10) : 1; uint16_t hour = dateString.length() >= 10 ? (uint16_t)wcstol(dateString.substr(8, 2).c_str(), 0, 10) : 0; uint16_t minute = dateString.length() >= 12 ? (uint16_t)wcstol(dateString.substr(10, 2).c_str(), 0, 10) : 0; uint16_t second = dateString.length() >= 14 ? (uint16_t)wcstol(dateString.substr(12, 2).c_str(), 0, 10) : 0; uint16_t millisecond = dateString.length() >= 16 ? (uint16_t)wcstol(dateString.substr(14, 3).c_str(), 0, 10) : 0; boost::posix_time::ptime date; try { date = boost::posix_time::ptime(boost::gregorian::date(year, month, day), boost::posix_time::hours(hour) + boost::posix_time::minutes(minute) + boost::posix_time::seconds(second) + boost::posix_time::milliseconds(millisecond)); } catch (...) { boost::throw_exception(ParseException(L"Input is not valid date string: " + dateString)); } return date; } boost::posix_time::ptime DateTools::round(const boost::posix_time::ptime& date, Resolution resolution) { boost::posix_time::ptime roundDate; switch (resolution) { case RESOLUTION_YEAR: return boost::posix_time::ptime(boost::gregorian::date(date.date().year(), 1, 1)); case RESOLUTION_MONTH: return boost::posix_time::ptime(boost::gregorian::date(date.date().year(), date.date().month(), 1)); case RESOLUTION_DAY: return boost::posix_time::ptime(date.date()); case RESOLUTION_HOUR: return boost::posix_time::ptime(date.date(), boost::posix_time::hours(boost::posix_time::time_duration(date.time_of_day()).hours())); case RESOLUTION_MINUTE: return boost::posix_time::ptime(date.date(), boost::posix_time::hours(boost::posix_time::time_duration(date.time_of_day()).hours()) + boost::posix_time::minutes(boost::posix_time::time_duration(date.time_of_day()).minutes())); case RESOLUTION_SECOND: return boost::posix_time::ptime(date.date(), boost::posix_time::hours(boost::posix_time::time_duration(date.time_of_day()).hours()) + boost::posix_time::minutes(boost::posix_time::time_duration(date.time_of_day()).minutes()) + boost::posix_time::seconds(boost::posix_time::time_duration(date.time_of_day()).seconds())); case RESOLUTION_MILLISECOND: return date; case RESOLUTION_NULL: // silence static analyzers break; } return boost::posix_time::ptime(); } int64_t DateTools::round(int64_t time, Resolution resolution) { return MiscUtils::getTimeMillis(round(boost::posix_time::ptime(boost::gregorian::date(1970, 1, 1), boost::posix_time::milliseconds(time)), resolution)); } void DateTools::setDateOrder(DateTools::DateOrder order) { dateOrder = order; } DateTools::DateOrder DateTools::getDateOrder(std::locale locale) { if (dateOrder != DATEORDER_LOCALE) { return dateOrder; } std::locale localeDate(std::locale(locale, new boost::gregorian::date_facet("%x"))); SingleStringStream controlStream; controlStream.imbue(localeDate); controlStream << boost::gregorian::date(1974, 10, 20); // Oct 20th 1974 SingleString controlDate(controlStream.str()); SingleString::size_type year = controlDate.find("74"); SingleString::size_type month = controlDate.find("10"); if (month == SingleString::npos) { month = controlDate.find("O"); // safety } SingleString::size_type day = controlDate.find("20"); if (year < month) { return DATEORDER_YMD; } else if (month < day) { return DATEORDER_MDY; } else { return DATEORDER_DMY; } } boost::posix_time::ptime DateTools::parseDate(const String& dateString, std::locale locale) { Collection dateTokens(StringUtils::split(dateString, L",-. /")); String delimiter(dateTokens.size() == 1 ? L"" : L"/"); String paddedDate; for (Collection::iterator token = dateTokens.begin(); token != dateTokens.end(); ++token) { if (token != dateTokens.begin()) { paddedDate += delimiter; } if (token->length() == 1) { paddedDate += L"0" + *token; } else { paddedDate += *token; } } Collection dateFormats(Collection::newInstance()); switch (getDateOrder(locale)) { case DATEORDER_DMY: dateFormats.add(L"%d" + delimiter + L"%m" + delimiter + L"%Y"); dateFormats.add(L"%d" + delimiter + L"%m" + delimiter + L"%y"); dateFormats.add(L"%d" + delimiter + L"%b" + delimiter + L"%Y"); dateFormats.add(L"%d" + delimiter + L"%b" + delimiter + L"%y"); dateFormats.add(L"%d" + delimiter + L"%B" + delimiter + L"%Y"); dateFormats.add(L"%d" + delimiter + L"%B" + delimiter + L"%y"); break; case DATEORDER_MDY: dateFormats.add(L"%m" + delimiter + L"%d" + delimiter + L"%Y"); dateFormats.add(L"%m" + delimiter + L"%d" + delimiter + L"%y"); dateFormats.add(L"%b" + delimiter + L"%d" + delimiter + L"%Y"); dateFormats.add(L"%b" + delimiter + L"%d" + delimiter + L"%y"); dateFormats.add(L"%B" + delimiter + L"%d" + delimiter + L"%Y"); dateFormats.add(L"%B" + delimiter + L"%d" + delimiter + L"%y"); break; case DATEORDER_YMD: dateFormats.add(L"%Y" + delimiter + L"%m" + delimiter + L"%d"); dateFormats.add(L"%y" + delimiter + L"%m" + delimiter + L"%d"); dateFormats.add(L"%Y" + delimiter + L"%b" + delimiter + L"%d"); dateFormats.add(L"%y" + delimiter + L"%b" + delimiter + L"%d"); dateFormats.add(L"%Y" + delimiter + L"%B" + delimiter + L"%d"); dateFormats.add(L"%y" + delimiter + L"%B" + delimiter + L"%d"); break; case DATEORDER_LOCALE: // silence static analyzers break; } boost::date_time::format_date_parser parser(L"", locale); boost::date_time::special_values_parser svp; for (Collection::iterator dateFormat = dateFormats.begin(); dateFormat != dateFormats.end(); ++dateFormat) { try { boost::gregorian::date date = parser.parse_date(paddedDate.c_str(), dateFormat->c_str(), svp); if (!date.is_not_a_date()) { return boost::posix_time::ptime(date); } } catch (...) { } } boost::throw_exception(ParseException(L"Invalid date '" + dateString + L"'")); return boost::posix_time::ptime(); } } LucenePlusPlus-rel_3.0.9/src/core/document/Document.cpp000066400000000000000000000102611456444476200231470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Document.h" #include "Fieldable.h" #include "Field.h" namespace Lucene { Document::Document() { fields = Collection::newInstance(); boost = 1.0; } Document::~Document() { } void Document::setBoost(double boost) { this->boost = boost; } double Document::getBoost() { return boost; } void Document::add(const FieldablePtr& field) { fields.add(field); } /// Utility functor for comparing fieldable names. /// see {@link Document}. struct equalFieldableName { equalFieldableName(const String& name) : equalName(name) {} inline bool operator()(const FieldablePtr& other) const { return (equalName == other->name()); } const String& equalName; }; void Document::removeField(const String& name) { Collection::iterator field = fields.find_if(equalFieldableName(name)); if (field != fields.end()) { fields.remove(field); } } void Document::removeFields(const String& name) { fields.remove_if(equalFieldableName(name)); } FieldPtr Document::getField(const String& name) { return boost::static_pointer_cast(getFieldable(name)); } FieldablePtr Document::getFieldable(const String& name) { Collection::iterator field = fields.find_if(equalFieldableName(name)); return field == fields.end() ? FieldablePtr() : *field; } String Document::get(const String& name) { for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if ((*field)->name() == name && !(*field)->isBinary()) { return (*field)->stringValue(); } } return L""; } Collection Document::getFields() { return fields; } Collection Document::getFields(const String& name) { Collection result(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if ((*field)->name() == name) { result.add(boost::static_pointer_cast(*field)); } } return result; } Collection Document::getFieldables(const String& name) { Collection result(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if ((*field)->name() == name) { result.add(*field); } } return result; } Collection Document::getValues(const String& name) { Collection result(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if ((*field)->name() == name && !(*field)->isBinary()) { result.add((*field)->stringValue()); } } return result; } Collection Document::getBinaryValues(const String& name) { Collection result(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if ((*field)->name() == name && (*field)->isBinary()) { result.add((*field)->getBinaryValue()); } } return result; } ByteArray Document::getBinaryValue(const String& name) { for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if ((*field)->name() == name && (*field)->isBinary()) { return (*field)->getBinaryValue(); } } return ByteArray(); } String Document::toString() { StringStream buffer; buffer << L"Document<"; for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if (field != fields.begin()) { buffer << L" "; } buffer << (*field)->stringValue(); } buffer << L">"; return buffer.str(); } } LucenePlusPlus-rel_3.0.9/src/core/document/Field.cpp000066400000000000000000000222601456444476200224160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Field.h" #include "MiscUtils.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { Field::Field(const String& name, const String& value, Store store, Index index) { ConstructField(name, value, store, index, TERM_VECTOR_NO); } Field::Field(const String& name, const String& value, Store store, Index index, TermVector termVector) { ConstructField(name, value, store, index, termVector); } Field::Field(const String& name, const ReaderPtr& reader) { ConstructField(name, reader, TERM_VECTOR_NO); } Field::Field(const String& name, const ReaderPtr& reader, TermVector termVector) { ConstructField(name, reader, termVector); } Field::Field(const String& name, const TokenStreamPtr& tokenStream) { ConstructField(name, tokenStream, TERM_VECTOR_NO); } Field::Field(const String& name, const TokenStreamPtr& tokenStream, TermVector termVector) { ConstructField(name, tokenStream, termVector); } Field::Field(const String& name, ByteArray value, Store store) { ConstructField(name, value, 0, value.size(), store); } Field::Field(const String& name, ByteArray value, int32_t offset, int32_t length, Store store) { ConstructField(name, value, offset, length, store); } void Field::ConstructField(const String& name, const String& value, Store store, Index index, TermVector termVector) { if (name.empty() && value.empty()) { boost::throw_exception(IllegalArgumentException(L"name and value cannot both be empty")); } if (index == INDEX_NO && store == STORE_NO) { boost::throw_exception(IllegalArgumentException(L"it doesn't make sense to have a field that is neither indexed nor stored")); } if (index == INDEX_NO && termVector != TERM_VECTOR_NO) { boost::throw_exception(IllegalArgumentException(L"cannot store term vector information for a field that is not indexed")); } this->_name = name; this->fieldsData = value; this->_isStored = isStored(store); this->_isIndexed = isIndexed(index); this->_isTokenized = isAnalyzed(index); this->_omitNorms = omitNorms(index); this->_isBinary = false; if (index == INDEX_NO) { this->omitTermFreqAndPositions = false; } setStoreTermVector(termVector); } void Field::ConstructField(const String& name, const ReaderPtr& reader, TermVector termVector) { this->_name = name; this->fieldsData = reader; this->_isStored = false; this->_isIndexed = true; this->_isTokenized = true; this->_isBinary = false; setStoreTermVector(termVector); } void Field::ConstructField(const String& name, const TokenStreamPtr& tokenStream, TermVector termVector) { this->_name = name; this->fieldsData = VariantUtils::null(); this->tokenStream = tokenStream; this->_isStored = false; this->_isIndexed = true; this->_isTokenized = true; this->_isBinary = false; setStoreTermVector(termVector); } void Field::ConstructField(const String& name, ByteArray value, int32_t offset, int32_t length, Store store) { if (store == STORE_NO) { boost::throw_exception(IllegalArgumentException(L"binary values can't be unstored")); } this->_name = name; this->fieldsData = value; this->_isStored = isStored(store); this->_isIndexed = false; this->_isTokenized = false; this->omitTermFreqAndPositions = false; this->_omitNorms = true; this->_isBinary = true; this->binaryLength = length; this->binaryOffset = offset; setStoreTermVector(TERM_VECTOR_NO); } Field::~Field() { } String Field::stringValue() { return VariantUtils::get(fieldsData); } ReaderPtr Field::readerValue() { return VariantUtils::get(fieldsData); } TokenStreamPtr Field::tokenStreamValue() { return tokenStream; } void Field::setValue(const String& value) { if (_isBinary) { boost::throw_exception(IllegalArgumentException(L"cannot set a String value on a binary field")); } fieldsData = value; } void Field::setValue(const ReaderPtr& value) { if (_isBinary) { boost::throw_exception(IllegalArgumentException(L"cannot set a Reader value on a binary field")); } if (_isStored) { boost::throw_exception(IllegalArgumentException(L"cannot set a Reader value on a stored field")); } fieldsData = value; } void Field::setValue(ByteArray value) { if (!_isBinary) { boost::throw_exception(IllegalArgumentException(L"cannot set a byte[] value on a non-binary field")); } fieldsData = value; binaryLength = value.size(); binaryOffset = 0; } void Field::setValue(ByteArray value, int32_t offset, int32_t length) { if (!_isBinary) { boost::throw_exception(IllegalArgumentException(L"cannot set a byte[] value on a non-binary field")); } fieldsData = value; binaryLength = length; binaryOffset = offset; } void Field::setTokenStream(const TokenStreamPtr& tokenStream) { this->_isIndexed = true; this->_isTokenized = true; this->tokenStream = tokenStream; } bool Field::isStored(Store store) { switch (store) { case STORE_YES: return true; case STORE_NO: return false; default: boost::throw_exception(IllegalArgumentException(L"Invalid field store")); return false; } } bool Field::isIndexed(Index index) { switch (index) { case INDEX_NO: return false; case INDEX_ANALYZED: return true; case INDEX_NOT_ANALYZED: return true; case INDEX_NOT_ANALYZED_NO_NORMS: return true; case INDEX_ANALYZED_NO_NORMS: return true; default: boost::throw_exception(IllegalArgumentException(L"Invalid field index")); return false; } } bool Field::isAnalyzed(Index index) { switch (index) { case INDEX_NO: return false; case INDEX_ANALYZED: return true; case INDEX_NOT_ANALYZED: return false; case INDEX_NOT_ANALYZED_NO_NORMS: return false; case INDEX_ANALYZED_NO_NORMS: return true; default: boost::throw_exception(IllegalArgumentException(L"Invalid field index")); return false; } } bool Field::omitNorms(Index index) { switch (index) { case INDEX_NO: return true; case INDEX_ANALYZED: return false; case INDEX_NOT_ANALYZED: return false; case INDEX_NOT_ANALYZED_NO_NORMS: return true; case INDEX_ANALYZED_NO_NORMS: return true; default: boost::throw_exception(IllegalArgumentException(L"Invalid field index")); return false; } } Field::Index Field::toIndex(bool indexed, bool analyzed) { return toIndex(indexed, analyzed, false); } Field::Index Field::toIndex(bool indexed, bool analyzed, bool omitNorms) { // If it is not indexed nothing else matters if (!indexed) { return INDEX_NO; } // typical, non-expert if (!omitNorms) { return analyzed ? INDEX_ANALYZED : INDEX_NOT_ANALYZED; } // Expert: Norms omitted return analyzed ? INDEX_ANALYZED_NO_NORMS : INDEX_NOT_ANALYZED_NO_NORMS; } bool Field::isStored(TermVector termVector) { switch (termVector) { case TERM_VECTOR_NO: return false; case TERM_VECTOR_YES: return true; case TERM_VECTOR_WITH_POSITIONS: return true; case TERM_VECTOR_WITH_OFFSETS: return true; case TERM_VECTOR_WITH_POSITIONS_OFFSETS: return true; default: boost::throw_exception(IllegalArgumentException(L"Invalid field term vector")); return false; } } bool Field::withPositions(TermVector termVector) { switch (termVector) { case TERM_VECTOR_NO: return false; case TERM_VECTOR_YES: return false; case TERM_VECTOR_WITH_POSITIONS: return true; case TERM_VECTOR_WITH_OFFSETS: return false; case TERM_VECTOR_WITH_POSITIONS_OFFSETS: return true; default: boost::throw_exception(IllegalArgumentException(L"Invalid field term vector")); return false; } } bool Field::withOffsets(TermVector termVector) { switch (termVector) { case TERM_VECTOR_NO: return false; case TERM_VECTOR_YES: return false; case TERM_VECTOR_WITH_POSITIONS: return false; case TERM_VECTOR_WITH_OFFSETS: return true; case TERM_VECTOR_WITH_POSITIONS_OFFSETS: return true; default: boost::throw_exception(IllegalArgumentException(L"Invalid field term vector")); return false; } } Field::TermVector Field::toTermVector(bool stored, bool withOffsets, bool withPositions) { // If it is not stored, nothing else matters. if (!stored) { return TERM_VECTOR_NO; } if (withOffsets) { return withPositions ? TERM_VECTOR_WITH_POSITIONS_OFFSETS : TERM_VECTOR_WITH_OFFSETS; } return withPositions ? TERM_VECTOR_WITH_POSITIONS : TERM_VECTOR_YES; } } LucenePlusPlus-rel_3.0.9/src/core/document/FieldSelector.cpp000066400000000000000000000007451456444476200241230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldSelector.h" namespace Lucene { FieldSelector::FieldSelector() { } FieldSelector::~FieldSelector() { } } LucenePlusPlus-rel_3.0.9/src/core/document/Fieldable.cpp000066400000000000000000000047471456444476200232540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Fieldable.h" namespace Lucene { void Fieldable::setBoost(double boost) { BOOST_ASSERT(false); // override } double Fieldable::getBoost() { BOOST_ASSERT(false); return 0; // override } String Fieldable::name() { BOOST_ASSERT(false); return L""; // override } String Fieldable::stringValue() { BOOST_ASSERT(false); return L""; // override } ReaderPtr Fieldable::readerValue() { BOOST_ASSERT(false); return ReaderPtr(); // override } TokenStreamPtr Fieldable::tokenStreamValue() { BOOST_ASSERT(false); return TokenStreamPtr(); // override } bool Fieldable::isStored() { BOOST_ASSERT(false); return false; // override } bool Fieldable::isIndexed() { BOOST_ASSERT(false); return false; // override } bool Fieldable::isTokenized() { BOOST_ASSERT(false); return false; // override } bool Fieldable::isTermVectorStored() { BOOST_ASSERT(false); return false; // override } bool Fieldable::isStoreOffsetWithTermVector() { BOOST_ASSERT(false); return false; // override } bool Fieldable::isStorePositionWithTermVector() { BOOST_ASSERT(false); return false; // override } bool Fieldable::isBinary() { BOOST_ASSERT(false); return false; // override } bool Fieldable::getOmitNorms() { BOOST_ASSERT(false); return false; // override } void Fieldable::setOmitNorms(bool omitNorms) { BOOST_ASSERT(false); // override } bool Fieldable::isLazy() { BOOST_ASSERT(false); return false; // override } int32_t Fieldable::getBinaryOffset() { BOOST_ASSERT(false); return 0; // override } int32_t Fieldable::getBinaryLength() { BOOST_ASSERT(false); return 0; // override } ByteArray Fieldable::getBinaryValue() { BOOST_ASSERT(false); return ByteArray(); // override } ByteArray Fieldable::getBinaryValue(ByteArray result) { BOOST_ASSERT(false); return ByteArray(); // override } bool Fieldable::getOmitTermFreqAndPositions() { BOOST_ASSERT(false); return false; // override } void Fieldable::setOmitTermFreqAndPositions(bool omitTermFreqAndPositions) { BOOST_ASSERT(false); // override } } LucenePlusPlus-rel_3.0.9/src/core/document/LoadFirstFieldSelector.cpp000066400000000000000000000011571456444476200257310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LoadFirstFieldSelector.h" namespace Lucene { LoadFirstFieldSelector::~LoadFirstFieldSelector() { } FieldSelector::FieldSelectorResult LoadFirstFieldSelector::accept(const String& fieldName) { return FieldSelector::SELECTOR_LOAD_AND_BREAK; } } LucenePlusPlus-rel_3.0.9/src/core/document/MapFieldSelector.cpp000066400000000000000000000022231456444476200245520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MapFieldSelector.h" namespace Lucene { MapFieldSelector::MapFieldSelector(MapStringFieldSelectorResult fieldSelections) { this->fieldSelections = fieldSelections; } MapFieldSelector::MapFieldSelector(Collection fields) { fieldSelections = MapStringFieldSelectorResult::newInstance(); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { fieldSelections.put(*field, FieldSelector::SELECTOR_LOAD); } } MapFieldSelector::~MapFieldSelector() { } FieldSelector::FieldSelectorResult MapFieldSelector::accept(const String& fieldName) { MapStringFieldSelectorResult::iterator selection = fieldSelections.find(fieldName); return selection != fieldSelections.end() ? selection->second : FieldSelector::SELECTOR_NO_LOAD; } } LucenePlusPlus-rel_3.0.9/src/core/document/NumberTools.cpp000066400000000000000000000047521456444476200236520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NumberTools.h" #include "StringUtils.h" namespace Lucene { const int32_t NumberTools::RADIX = 36; const wchar_t NumberTools::NEGATIVE_PREFIX = L'-'; const wchar_t NumberTools::POSITIVE_PREFIX = L'0'; NumberTools::~NumberTools() { } const String& NumberTools::MIN_STRING_VALUE() { static String _MIN_STRING_VALUE; LUCENE_RUN_ONCE( _MIN_STRING_VALUE += NEGATIVE_PREFIX; _MIN_STRING_VALUE += L"0000000000000"; ); return _MIN_STRING_VALUE; } const String& NumberTools::MAX_STRING_VALUE() { static String _MAX_STRING_VALUE; LUCENE_RUN_ONCE( _MAX_STRING_VALUE += POSITIVE_PREFIX; _MAX_STRING_VALUE += L"1y2p0ij32e8e7"; ); return _MAX_STRING_VALUE; } int32_t NumberTools::STR_SIZE() { static int32_t _STR_SIZE = 0; LUCENE_RUN_ONCE( _STR_SIZE = (int32_t)MIN_STRING_VALUE().length(); ); return _STR_SIZE; } String NumberTools::longToString(int64_t l) { if (l == std::numeric_limits::min()) { // special case, because long is not symmetric around zero return MIN_STRING_VALUE(); } String buf; buf.reserve(STR_SIZE()); if (l < 0) { buf += NEGATIVE_PREFIX; l = std::numeric_limits::max() + l + 1; } buf += POSITIVE_PREFIX; String num(StringUtils::toString(l, RADIX)); int32_t padLen = (int32_t)(STR_SIZE() - num.length() - buf.length()); while (padLen-- > 0) { buf += L'0'; } return buf + num; } int64_t NumberTools::stringToLong(const String& str) { if ((int32_t)str.length() != STR_SIZE()) { boost::throw_exception(NumberFormatException(L"string is the wrong size")); } if (str == MIN_STRING_VALUE()) { return std::numeric_limits::min(); } wchar_t prefix = str[0]; int64_t l = StringUtils::toLong(str.substr(1), RADIX); if (prefix == POSITIVE_PREFIX) { // nop } else if (prefix == NEGATIVE_PREFIX) { l = l - std::numeric_limits::max() - 1; } else { boost::throw_exception(NumberFormatException(L"string does not begin with the correct prefix")); } return l; } } LucenePlusPlus-rel_3.0.9/src/core/document/NumericField.cpp000066400000000000000000000052461456444476200237460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NumericField.h" #include "Field.h" #include "NumericUtils.h" #include "NumericTokenStream.h" #include "StringUtils.h" namespace Lucene { NumericField::NumericField(const String& name) : AbstractField(name, Field::STORE_NO, Field::INDEX_ANALYZED_NO_NORMS, Field::TERM_VECTOR_NO) { setOmitTermFreqAndPositions(true); tokenStream = newLucene(NumericUtils::PRECISION_STEP_DEFAULT); } NumericField::NumericField(const String& name, Field::Store store, bool index) : AbstractField(name, store, index ? Field::INDEX_ANALYZED_NO_NORMS : Field::INDEX_NO, Field::TERM_VECTOR_NO) { setOmitTermFreqAndPositions(true); tokenStream = newLucene(NumericUtils::PRECISION_STEP_DEFAULT); } NumericField::NumericField(const String& name, int32_t precisionStep) : AbstractField(name, Field::STORE_NO, Field::INDEX_ANALYZED_NO_NORMS, Field::TERM_VECTOR_NO) { setOmitTermFreqAndPositions(true); tokenStream = newLucene(precisionStep); } NumericField::NumericField(const String& name, int32_t precisionStep, Field::Store store, bool index) : AbstractField(name, store, index ? Field::INDEX_ANALYZED_NO_NORMS : Field::INDEX_NO, Field::TERM_VECTOR_NO) { setOmitTermFreqAndPositions(true); tokenStream = newLucene(precisionStep); } NumericField::~NumericField() { } TokenStreamPtr NumericField::tokenStreamValue() { return isIndexed() ? boost::static_pointer_cast(tokenStream) : TokenStreamPtr(); } ByteArray NumericField::getBinaryValue(ByteArray result) { return ByteArray(); } ReaderPtr NumericField::readerValue() { return ReaderPtr(); } String NumericField::stringValue() { StringStream value; value << fieldsData; return value.str(); } int64_t NumericField::getNumericValue() { return StringUtils::toLong(stringValue()); } NumericFieldPtr NumericField::setLongValue(int64_t value) { tokenStream->setLongValue(value); fieldsData = value; return shared_from_this(); } NumericFieldPtr NumericField::setIntValue(int32_t value) { tokenStream->setIntValue(value); fieldsData = value; return shared_from_this(); } NumericFieldPtr NumericField::setDoubleValue(double value) { tokenStream->setDoubleValue(value); fieldsData = value; return shared_from_this(); } } LucenePlusPlus-rel_3.0.9/src/core/document/SetBasedFieldSelector.cpp000066400000000000000000000020571456444476200255340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SetBasedFieldSelector.h" namespace Lucene { SetBasedFieldSelector::SetBasedFieldSelector(HashSet fieldsToLoad, HashSet lazyFieldsToLoad) { this->fieldsToLoad = fieldsToLoad; this->lazyFieldsToLoad = lazyFieldsToLoad; } SetBasedFieldSelector::~SetBasedFieldSelector() { } FieldSelector::FieldSelectorResult SetBasedFieldSelector::accept(const String& fieldName) { FieldSelector::FieldSelectorResult result = FieldSelector::SELECTOR_NO_LOAD; if (fieldsToLoad.contains(fieldName)) { result = FieldSelector::SELECTOR_LOAD; } if (lazyFieldsToLoad.contains(fieldName)) { result = FieldSelector::SELECTOR_LAZY_LOAD; } return result; } } LucenePlusPlus-rel_3.0.9/src/core/include/000077500000000000000000000000001456444476200204725ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/include/LuceneInc.h000066400000000000000000000010261456444476200225070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifdef _WIN32 #include "targetver.h" #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #ifndef NOMINMAX #define NOMINMAX #endif #include #endif #include "Lucene.h" LucenePlusPlus-rel_3.0.9/src/core/include/_BooleanQuery.h000066400000000000000000000030321456444476200234050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _BOOLEANQUERY_H #define _BOOLEANQUERY_H #include "SimilarityDelegator.h" namespace Lucene { /// The Weight for BooleanQuery, used to normalize, score and explain these queries. class BooleanWeight : public Weight { public: BooleanWeight(const BooleanQueryPtr& query, const SearcherPtr& searcher); virtual ~BooleanWeight(); LUCENE_CLASS(BooleanWeight); protected: BooleanQueryPtr query; /// The Similarity implementation. SimilarityPtr similarity; Collection weights; public: virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); virtual bool scoresDocsOutOfOrder(); }; /// Disabled coord Similarity class SimilarityDisableCoord : public SimilarityDelegator { public: SimilarityDisableCoord(const SimilarityPtr& delegee); virtual ~SimilarityDisableCoord(); LUCENE_CLASS(SimilarityDisableCoord); public: virtual double coord(int32_t overlap, int32_t maxOverlap); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_ByteFieldSource.h000066400000000000000000000016041456444476200240330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _BYTEFIELDSOURCE_H #define _BYTEFIELDSOURCE_H #include "DocValues.h" namespace Lucene { class ByteDocValues : public DocValues { public: ByteDocValues(const ByteFieldSourcePtr& source, Collection arr); virtual ~ByteDocValues(); LUCENE_CLASS(ByteDocValues); protected: ByteFieldSourceWeakPtr _source; Collection arr; public: virtual double doubleVal(int32_t doc); virtual int32_t intVal(int32_t doc); virtual String toString(int32_t doc); virtual CollectionValue getInnerArray(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_CachingSpanFilter.h000066400000000000000000000014631456444476200243320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _CACHINGSPANFILTER_H #define _CACHINGSPANFILTER_H #include "_CachingWrapperFilter.h" namespace Lucene { class FilterCacheSpanFilterResult : public FilterCache { public: FilterCacheSpanFilterResult(CachingWrapperFilter::DeletesMode deletesMode); virtual ~FilterCacheSpanFilterResult(); LUCENE_CLASS(FilterCacheSpanFilterResult); protected: virtual LuceneObjectPtr mergeDeletes(const IndexReaderPtr& reader, const LuceneObjectPtr& value); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_CachingWrapperFilter.h000066400000000000000000000033351456444476200250510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _CACHINGWRAPPERFILTER_H #define _CACHINGWRAPPERFILTER_H #include "FilteredDocIdSet.h" namespace Lucene { class FilterCache : public LuceneObject { public: FilterCache(CachingWrapperFilter::DeletesMode deletesMode); virtual ~FilterCache(); LUCENE_CLASS(FilterCache); public: WeakMapObjectObject cache; CachingWrapperFilter::DeletesMode deletesMode; public: virtual LuceneObjectPtr get(const IndexReaderPtr& reader, const LuceneObjectPtr& coreKey, const LuceneObjectPtr& delCoreKey); virtual void put(const LuceneObjectPtr& coreKey, const LuceneObjectPtr& delCoreKey, const LuceneObjectPtr& value); protected: virtual LuceneObjectPtr mergeDeletes(const IndexReaderPtr& reader, const LuceneObjectPtr& value) = 0; }; class FilterCacheDocIdSet : public FilterCache { public: FilterCacheDocIdSet(CachingWrapperFilter::DeletesMode deletesMode); virtual ~FilterCacheDocIdSet(); LUCENE_CLASS(FilterCacheDocIdSet); protected: virtual LuceneObjectPtr mergeDeletes(const IndexReaderPtr& reader, const LuceneObjectPtr& value); }; class FilteredCacheDocIdSet : public FilteredDocIdSet { public: FilteredCacheDocIdSet(const IndexReaderPtr& reader, const DocIdSetPtr& innerSet); virtual ~FilteredCacheDocIdSet(); LUCENE_CLASS(FilteredCacheDocIdSet); protected: IndexReaderPtr reader; protected: virtual bool match(int32_t docid); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_CheckIndex.h000066400000000000000000000013511456444476200230070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _CHECKINDEX_H #define _CHECKINDEX_H #include "SegmentTermDocs.h" namespace Lucene { class MySegmentTermDocs : public SegmentTermDocs { public: MySegmentTermDocs(const SegmentReaderPtr& p); virtual ~MySegmentTermDocs(); LUCENE_CLASS(MySegmentTermDocs); public: int32_t delCount; public: virtual void seek(const TermPtr& term); virtual void skippingDoc(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_ConcurrentMergeScheduler.h000066400000000000000000000017731456444476200257530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _CONCURRENTMERGESCHEDULER_H #define _CONCURRENTMERGESCHEDULER_H #include "LuceneThread.h" namespace Lucene { class LPPAPI MergeThread : public LuceneThread { public: MergeThread(const ConcurrentMergeSchedulerPtr& merger, const IndexWriterPtr& writer, const OneMergePtr& startMerge); virtual ~MergeThread(); LUCENE_CLASS(MergeThread); protected: ConcurrentMergeSchedulerWeakPtr _merger; IndexWriterWeakPtr _writer; OneMergePtr startMerge; OneMergePtr runningMerge; public: void setRunningMerge(const OneMergePtr& merge); OneMergePtr getRunningMerge(); void setThreadPriority(int32_t pri); virtual void run(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_ConstantScoreQuery.h000066400000000000000000000031261456444476200246170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _CONSTANTSCOREQUERY_H #define _CONSTANTSCOREQUERY_H #include "Weight.h" namespace Lucene { class ConstantWeight : public Weight { public: ConstantWeight(const ConstantScoreQueryPtr& constantScorer, const SearcherPtr& searcher); virtual ~ConstantWeight(); LUCENE_CLASS(ConstantWeight); protected: ConstantScoreQueryPtr constantScorer; SimilarityPtr similarity; double queryNorm; double queryWeight; public: virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); }; class ConstantScorer : public Scorer { public: ConstantScorer(const ConstantScoreQueryPtr& constantScorer, const SimilarityPtr& similarity, const IndexReaderPtr& reader, const WeightPtr& w); virtual ~ConstantScorer(); LUCENE_CLASS(ConstantScorer); public: DocIdSetIteratorPtr docIdSetIterator; double theScore; int32_t doc; public: virtual int32_t nextDoc(); virtual int32_t docID(); virtual double score(); virtual int32_t advance(int32_t target); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_CustomScoreQuery.h000066400000000000000000000056311456444476200243030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _CUSTOMSCOREQUERY_H #define _CUSTOMSCOREQUERY_H #include "Weight.h" #include "Scorer.h" #include "CustomScoreProvider.h" namespace Lucene { // when deprecated methods are removed, do not extend class here, just return new default CustomScoreProvider class DefaultCustomScoreProvider : public CustomScoreProvider { public: DefaultCustomScoreProvider(const CustomScoreQueryPtr& customQuery, const IndexReaderPtr& reader); virtual ~DefaultCustomScoreProvider(); LUCENE_CLASS(DefaultCustomScoreProvider); protected: CustomScoreQueryWeakPtr _customQuery; public: virtual double customScore(int32_t doc, double subQueryScore, Collection valSrcScores); virtual double customScore(int32_t doc, double subQueryScore, double valSrcScore); virtual ExplanationPtr customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, Collection valSrcExpls); virtual ExplanationPtr customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, const ExplanationPtr& valSrcExpl); }; class CustomWeight : public Weight { public: CustomWeight(const CustomScoreQueryPtr& query, const SearcherPtr& searcher); virtual ~CustomWeight(); LUCENE_CLASS(CustomWeight); public: CustomScoreQueryPtr query; SimilarityPtr similarity; WeightPtr subQueryWeight; Collection valSrcWeights; bool qStrict; public: virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); virtual bool scoresDocsOutOfOrder(); protected: ExplanationPtr doExplain(const IndexReaderPtr& reader, int32_t doc); }; /// A scorer that applies a (callback) function on scores of the subQuery. class CustomScorer : public Scorer { public: CustomScorer(const SimilarityPtr& similarity, const IndexReaderPtr& reader, const CustomWeightPtr& weight, const ScorerPtr& subQueryScorer, Collection valSrcScorers); virtual ~CustomScorer(); LUCENE_CLASS(CustomScorer); protected: double qWeight; ScorerPtr subQueryScorer; Collection valSrcScorers; IndexReaderPtr reader; CustomScoreProviderPtr provider; Collection vScores; // reused in score() to avoid allocating this array for each doc public: virtual int32_t nextDoc(); virtual int32_t docID(); virtual double score(); virtual int32_t advance(int32_t target); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_DirectoryReader.h000066400000000000000000000026011456444476200240700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _DIRECTORYREADER_H #define _DIRECTORYREADER_H #include "_SegmentInfos.h" namespace Lucene { class FindSegmentsOpen : public FindSegmentsFileT { public: FindSegmentsOpen(bool readOnly, const IndexDeletionPolicyPtr& deletionPolicy, int32_t termInfosIndexDivisor, const SegmentInfosPtr& infos, const DirectoryPtr& directory); virtual ~FindSegmentsOpen(); LUCENE_CLASS(FindSegmentsOpen); protected: bool readOnly; IndexDeletionPolicyPtr deletionPolicy; int32_t termInfosIndexDivisor; public: virtual IndexReaderPtr doBody(const String& segmentFileName); }; class FindSegmentsReopen : public FindSegmentsFileT { public: FindSegmentsReopen(const DirectoryReaderPtr& reader, bool openReadOnly, const SegmentInfosPtr& infos, const DirectoryPtr& directory); virtual ~FindSegmentsReopen(); LUCENE_CLASS(FindSegmentsReopen); protected: DirectoryReaderWeakPtr _reader; bool openReadOnly; public: virtual DirectoryReaderPtr doBody(const String& segmentFileName); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_DisjunctionMaxQuery.h000066400000000000000000000034221456444476200247700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _DISJUNCTIONMAXQUERY_H #define _DISJUNCTIONMAXQUERY_H #include "Weight.h" namespace Lucene { /// The Weight for DisjunctionMaxQuery, used to normalize, score and explain these queries. class DisjunctionMaxWeight : public Weight { public: /// Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. DisjunctionMaxWeight(const DisjunctionMaxQueryPtr& query, const SearcherPtr& searcher); virtual ~DisjunctionMaxWeight(); LUCENE_CLASS(DisjunctionMaxWeight); protected: DisjunctionMaxQueryPtr query; /// The Similarity implementation. SimilarityPtr similarity; /// The Weights for our subqueries, in 1-1 correspondence with disjuncts Collection weights; public: /// Return our associated DisjunctionMaxQuery virtual QueryPtr getQuery(); /// Return our boost virtual double getValue(); /// Compute the sub of squared weights of us applied to our subqueries. Used for normalization. virtual double sumOfSquaredWeights(); /// Apply the computed normalization factor to our subqueries virtual void normalize(double norm); /// Create the scorer used to score our associated DisjunctionMaxQuery virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); /// Explain the score we computed for doc virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_DocIdBitSet.h000066400000000000000000000014361456444476200231030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _DOCIDBITSET_H #define _DOCIDBITSET_H #include "DocIdSet.h" namespace Lucene { class DocIdBitSetIterator : public DocIdSetIterator { public: DocIdBitSetIterator(const BitSetPtr& bitSet); virtual ~DocIdBitSetIterator(); LUCENE_CLASS(DocIdBitSetIterator); protected: int32_t docId; BitSetPtr bitSet; public: virtual int32_t docID(); virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_DocIdSet.h000066400000000000000000000017411456444476200224430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _DOCIDSET_H #define _DOCIDSET_H #include "DocIdSetIterator.h" namespace Lucene { class EmptyDocIdSetIterator : public DocIdSetIterator { public: virtual ~EmptyDocIdSetIterator(); LUCENE_CLASS(EmptyDocIdSetIterator); public: virtual int32_t advance(int32_t target); virtual int32_t docID(); virtual int32_t nextDoc(); }; /// An empty {@code DocIdSet} instance for easy use, eg. in Filters that hit no documents. class EmptyDocIdSet : public DocIdSet { public: virtual ~EmptyDocIdSet(); LUCENE_CLASS(EmptyDocIdSet); public: virtual DocIdSetIteratorPtr iterator(); virtual bool isCacheable(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_FieldCache.h000066400000000000000000000044721456444476200227600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FIELDCACHE_H #define _FIELDCACHE_H #include "LuceneObject.h" namespace Lucene { /// @see FieldCache#DEFAULT_BYTE_PARSER() class DefaultByteParser : public ByteParser { public: virtual ~DefaultByteParser(); LUCENE_CLASS(DefaultByteParser); public: virtual uint8_t parseByte(const String& string); virtual String toString(); }; /// @see FieldCache#DEFAULT_INT_PARSER() class DefaultIntParser : public IntParser { public: virtual ~DefaultIntParser(); LUCENE_CLASS(DefaultIntParser); public: virtual int32_t parseInt(const String& string); virtual String toString(); }; /// @see FieldCache#NUMERIC_UTILS_INT_PARSER() class NumericUtilsIntParser : public IntParser { public: virtual ~NumericUtilsIntParser(); LUCENE_CLASS(NumericUtilsIntParser); public: virtual int32_t parseInt(const String& string); virtual String toString(); }; /// @see FieldCache#DEFAULT_LONG_PARSER() class DefaultLongParser : public LongParser { public: virtual ~DefaultLongParser(); LUCENE_CLASS(DefaultLongParser); public: virtual int64_t parseLong(const String& string); virtual String toString(); }; /// @see FieldCache#NUMERIC_UTILS_LONG_PARSER() class NumericUtilsLongParser : public LongParser { public: virtual ~NumericUtilsLongParser(); LUCENE_CLASS(NumericUtilsLongParser); public: virtual int64_t parseLong(const String& string); virtual String toString(); }; /// @see FieldCache#DEFAULT_DOUBLE_PARSER() class DefaultDoubleParser : public DoubleParser { public: virtual ~DefaultDoubleParser(); LUCENE_CLASS(DefaultDoubleParser); public: virtual double parseDouble(const String& string); virtual String toString(); }; /// @see FieldCache#NUMERIC_UTILS_DOUBLE_PARSER() class NumericUtilsDoubleParser : public DoubleParser { public: virtual ~NumericUtilsDoubleParser(); LUCENE_CLASS(NumericUtilsDoubleParser); public: virtual double parseDouble(const String& string); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_FieldCacheRangeFilter.h000066400000000000000000000214561456444476200251040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FIELDCACHERANGEFILTER_H #define _FIELDCACHERANGEFILTER_H #include "Filter.h" #include "DocIdSet.h" #include "DocIdSetIterator.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { class FieldCacheRangeFilterString : public FieldCacheRangeFilter { public: FieldCacheRangeFilterString(const String& field, const ParserPtr& parser, const String& lowerVal, const String& upperVal, bool includeLower, bool includeUpper); virtual ~FieldCacheRangeFilterString(); LUCENE_CLASS(FieldCacheRangeFilterString); public: String lowerVal; String upperVal; public: virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader); virtual String toString(); virtual bool equals(const LuceneObjectPtr& other); virtual int32_t hashCode(); }; class FieldCacheDocIdSet : public DocIdSet { public: FieldCacheDocIdSet(const IndexReaderPtr& reader, bool mayUseTermDocs); virtual ~FieldCacheDocIdSet(); LUCENE_CLASS(FieldCacheDocIdSet); protected: IndexReaderPtr reader; bool mayUseTermDocs; public: /// This method checks, if a doc is a hit, should throw ArrayIndexOutOfBounds, when position invalid virtual bool matchDoc(int32_t doc) = 0; /// This DocIdSet is cacheable, if it works solely with FieldCache and no TermDocs. virtual bool isCacheable(); virtual DocIdSetIteratorPtr iterator(); }; template class FieldCacheDocIdSetNumeric : public FieldCacheDocIdSet { public: FieldCacheDocIdSetNumeric(const IndexReaderPtr& reader, bool mayUseTermDocs, Collection values, TYPE inclusiveLowerPoint, TYPE inclusiveUpperPoint) : FieldCacheDocIdSet(reader, mayUseTermDocs) { this->values = values; this->inclusiveLowerPoint = inclusiveLowerPoint; this->inclusiveUpperPoint = inclusiveUpperPoint; } virtual ~FieldCacheDocIdSetNumeric() { } protected: Collection values; TYPE inclusiveLowerPoint; TYPE inclusiveUpperPoint; public: virtual bool matchDoc(int32_t doc) { if (doc < 0 || doc >= values.size()) { boost::throw_exception(IndexOutOfBoundsException()); } return (values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint); } }; template class FieldCacheRangeFilterNumeric : public FieldCacheRangeFilter { public: FieldCacheRangeFilterNumeric(const String& field, const ParserPtr& parser, TYPE lowerVal, TYPE upperVal, TYPE maxVal, bool includeLower, bool includeUpper) : FieldCacheRangeFilter(field, parser, includeLower, includeUpper) { this->lowerVal = lowerVal; this->upperVal = upperVal; this->maxVal = maxVal; } virtual ~FieldCacheRangeFilterNumeric() { } public: TYPE lowerVal; TYPE upperVal; TYPE maxVal; public: virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader) { if (!includeLower && lowerVal == maxVal) { return DocIdSet::EMPTY_DOCIDSET(); } int64_t inclusiveLowerPoint = (int64_t)(includeLower ? lowerVal : (lowerVal + 1)); if (!includeUpper && upperVal == 0) { return DocIdSet::EMPTY_DOCIDSET(); } int64_t inclusiveUpperPoint = (int64_t)(includeUpper ? upperVal : (upperVal - 1)); if (inclusiveLowerPoint > inclusiveUpperPoint) { return DocIdSet::EMPTY_DOCIDSET(); } // we only request the usage of termDocs, if the range contains 0 return newLucene< FieldCacheDocIdSetNumeric >(reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0), getValues(reader), inclusiveLowerPoint, inclusiveUpperPoint); } virtual Collection getValues(const IndexReaderPtr& reader) = 0; virtual String toString() { StringStream buffer; buffer << field << L":" << (includeLower ? L"[" : L"{"); buffer << lowerVal << L" TO " << lowerVal; buffer << (includeLower ? L"]" : L"}"); return buffer.str(); } virtual bool equals(const LuceneObjectPtr& other) { if (Filter::equals(other)) { return true; } boost::shared_ptr< FieldCacheRangeFilterNumeric > otherFilter(boost::dynamic_pointer_cast< FieldCacheRangeFilterNumeric >(other)); if (!otherFilter) { return false; } if (field != otherFilter->field || includeLower != otherFilter->includeLower || includeUpper != otherFilter->includeUpper) { return false; } if (lowerVal != otherFilter->lowerVal || upperVal != otherFilter->upperVal) { return false; } if (parser.get() != NULL ? !parser->equals(otherFilter->parser) : otherFilter->parser.get() != NULL) { return false; } return true; } int32_t hashCode() { int32_t code = StringUtils::hashCode(field); code ^= lowerVal == 0 ? 550356204 : (int32_t)lowerVal; code = (code << 1) | MiscUtils::unsignedShift(code, 31); // rotate to distinguish lower from upper code ^= upperVal == 0 ? -1674416163 : (int32_t)upperVal; code ^= parser ? parser->hashCode() : -1572457324; code ^= (includeLower ? 1549299360 : -365038026) ^ (includeUpper ? 1721088258 : 1948649653); return code; } }; class FieldCacheRangeFilterByte : public FieldCacheRangeFilterNumeric { public: FieldCacheRangeFilterByte(const String& field, const ParserPtr& parser, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper); virtual ~FieldCacheRangeFilterByte(); LUCENE_CLASS(FieldCacheRangeFilterByte); public: virtual Collection getValues(const IndexReaderPtr& reader); }; class FieldCacheRangeFilterInt : public FieldCacheRangeFilterNumeric { public: FieldCacheRangeFilterInt(const String& field, const ParserPtr& parser, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper); virtual ~FieldCacheRangeFilterInt(); LUCENE_CLASS(FieldCacheRangeFilterInt); public: virtual Collection getValues(const IndexReaderPtr& reader); }; class FieldCacheRangeFilterLong : public FieldCacheRangeFilterNumeric { public: FieldCacheRangeFilterLong(const String& field, const ParserPtr& parser, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper); virtual ~FieldCacheRangeFilterLong(); LUCENE_CLASS(FieldCacheRangeFilterLong); public: virtual Collection getValues(const IndexReaderPtr& reader); }; class FieldCacheRangeFilterDouble : public FieldCacheRangeFilterNumeric { public: FieldCacheRangeFilterDouble(const String& field, const ParserPtr& parser, double lowerVal, double upperVal, bool includeLower, bool includeUpper); virtual ~FieldCacheRangeFilterDouble(); LUCENE_CLASS(FieldCacheRangeFilterDouble); public: virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader); virtual Collection getValues(const IndexReaderPtr& reader); }; class FieldCacheDocIdSetString : public FieldCacheDocIdSet { public: FieldCacheDocIdSetString(const IndexReaderPtr& reader, bool mayUseTermDocs, const StringIndexPtr& fcsi, int32_t inclusiveLowerPoint, int32_t inclusiveUpperPoint); virtual ~FieldCacheDocIdSetString(); LUCENE_CLASS(FieldCacheDocIdSetString); protected: StringIndexPtr fcsi; int32_t inclusiveLowerPoint; int32_t inclusiveUpperPoint; public: virtual bool matchDoc(int32_t doc); }; /// A DocIdSetIterator using TermDocs to iterate valid docIds class FieldDocIdSetIteratorTermDocs : public DocIdSetIterator { public: FieldDocIdSetIteratorTermDocs(const FieldCacheDocIdSetPtr& cacheDocIdSet, const TermDocsPtr& termDocs); virtual ~FieldDocIdSetIteratorTermDocs(); LUCENE_CLASS(FieldDocIdSetIteratorTermDocs); protected: FieldCacheDocIdSetWeakPtr _cacheDocIdSet; TermDocsPtr termDocs; int32_t doc; public: virtual int32_t docID(); virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); }; /// A DocIdSetIterator generating docIds by incrementing a variable - this one can be used if there /// are no deletions are on the index. class FieldDocIdSetIteratorIncrement : public DocIdSetIterator { public: FieldDocIdSetIteratorIncrement(const FieldCacheDocIdSetPtr& cacheDocIdSet); virtual ~FieldDocIdSetIteratorIncrement(); LUCENE_CLASS(FieldDocIdSetIteratorIncrement); protected: FieldCacheDocIdSetWeakPtr _cacheDocIdSet; int32_t doc; public: virtual int32_t docID(); virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_FieldCacheSanityChecker.h000066400000000000000000000016211456444476200254260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FIELDCACHESANITYCHECKER_H #define _FIELDCACHESANITYCHECKER_H #include "LuceneObject.h" namespace Lucene { /// Simple pair object for using "readerKey + fieldName" a Map key class ReaderField : public LuceneObject { public: ReaderField(const LuceneObjectPtr& readerKey, const String& fieldName); virtual ~ReaderField(); LUCENE_CLASS(ReaderField); public: LuceneObjectPtr readerKey; String fieldName; public: virtual int32_t hashCode(); virtual bool equals(const LuceneObjectPtr& other); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_FieldCacheTermsFilter.h000066400000000000000000000026441456444476200251400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FIELDCACHETERMSFILTER_H #define _FIELDCACHETERMSFILTER_H #include "DocIdSet.h" #include "DocIdSetIterator.h" namespace Lucene { class FieldCacheTermsFilterDocIdSet : public DocIdSet { public: FieldCacheTermsFilterDocIdSet(Collection terms, const StringIndexPtr& fcsi); virtual ~FieldCacheTermsFilterDocIdSet(); LUCENE_CLASS(FieldCacheTermsFilterDocIdSet); protected: StringIndexPtr fcsi; OpenBitSetPtr openBitSet; public: virtual DocIdSetIteratorPtr iterator(); /// This DocIdSet implementation is cacheable. virtual bool isCacheable(); }; class FieldCacheTermsFilterDocIdSetIterator : public DocIdSetIterator { public: FieldCacheTermsFilterDocIdSetIterator(const StringIndexPtr& fcsi, const OpenBitSetPtr& openBitSet); virtual ~FieldCacheTermsFilterDocIdSetIterator(); LUCENE_CLASS(FieldCacheTermsFilterDocIdSetIterator); protected: StringIndexPtr fcsi; OpenBitSetPtr openBitSet; int32_t doc; public: virtual int32_t docID(); virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_FieldValueHitQueue.h000066400000000000000000000027031456444476200244760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FIELDVALUEHITQUEUE_H #define _FIELDVALUEHITQUEUE_H #include "LuceneObject.h" namespace Lucene { /// An implementation of {@link FieldValueHitQueue} which is optimized in case there is just one comparator. class OneComparatorFieldValueHitQueue : public FieldValueHitQueue { public: OneComparatorFieldValueHitQueue(Collection fields, int32_t size); virtual ~OneComparatorFieldValueHitQueue(); LUCENE_CLASS(OneComparatorFieldValueHitQueue); public: FieldComparatorPtr comparator; int32_t oneReverseMul; protected: virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second); }; /// An implementation of {@link FieldValueHitQueue} which is optimized in case there is more than one comparator. class MultiComparatorsFieldValueHitQueue : public FieldValueHitQueue { public: MultiComparatorsFieldValueHitQueue(Collection fields, int32_t size); virtual ~MultiComparatorsFieldValueHitQueue(); LUCENE_CLASS(MultiComparatorsFieldValueHitQueue); protected: virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_FilterManager.h000066400000000000000000000030551456444476200235250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FILTERMANAGER_H #define _FILTERMANAGER_H #include "LuceneThread.h" namespace Lucene { /// Holds the filter and the last time the filter was used, to make LRU-based cache cleaning possible. class FilterItem : public LuceneObject { public: FilterItem(const FilterPtr& filter); virtual ~FilterItem(); LUCENE_CLASS(FilterItem); public: FilterPtr filter; int64_t timestamp; }; /// Keeps the cache from getting too big. /// /// The SortedSet sortedFilterItems is used only to sort the items from the cache, so when it's time to clean /// up we have the TreeSet sort the FilterItems by timestamp. /// /// Removes 1.5 * the numbers of items to make the cache smaller. /// For example: If cache clean size is 10, and the cache is at 15, we would remove (15 - 10) * 1.5 = 7.5 /// round up to 8. This way we clean the cache a bit more, and avoid having the cache cleaner having to do /// it frequently. class FilterCleaner : public LuceneThread { public: FilterCleaner(const FilterManagerPtr& manager); virtual ~FilterCleaner(); LUCENE_CLASS(FilterCleaner); protected: FilterManagerWeakPtr _manager; bool running; MapLongInt sortedFilterItems; public: virtual void run(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_FilteredDocIdSet.h000066400000000000000000000016341456444476200241230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FILTEREDDOCIDSET_H #define _FILTEREDDOCIDSET_H #include "FilteredDocIdSetIterator.h" namespace Lucene { /// Implementation of the contract to build a DocIdSetIterator. class DefaultFilteredDocIdSetIterator : public FilteredDocIdSetIterator { public: DefaultFilteredDocIdSetIterator(const FilteredDocIdSetPtr& filtered, const DocIdSetIteratorPtr& innerIter); virtual ~DefaultFilteredDocIdSetIterator(); LUCENE_CLASS(DefaultFilteredDocIdSetIterator); protected: FilteredDocIdSetPtr filtered; protected: virtual bool match(int32_t docid); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_FilteredQuery.h000066400000000000000000000035111456444476200235660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FILTEREDQUERY_H #define _FILTEREDQUERY_H #include "Weight.h" #include "Scorer.h" namespace Lucene { class FilteredQueryWeight : public Weight { public: FilteredQueryWeight(const FilteredQueryPtr& query, const WeightPtr& weight, const SimilarityPtr& similarity); virtual ~FilteredQueryWeight(); LUCENE_CLASS(FilteredQueryWeight); protected: FilteredQueryPtr query; WeightPtr weight; SimilarityPtr similarity; double value; public: virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); virtual QueryPtr getQuery(); virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); friend class FilteredQueryWeightScorer; }; class FilteredQueryWeightScorer : public Scorer { public: FilteredQueryWeightScorer(const FilteredQueryWeightPtr& weight, const ScorerPtr& scorer, const DocIdSetIteratorPtr& docIdSetIterator, const SimilarityPtr& similarity); virtual ~FilteredQueryWeightScorer(); LUCENE_CLASS(FilteredQueryWeightScorer); protected: FilteredQueryWeightPtr weight; ScorerPtr scorer; DocIdSetIteratorPtr docIdSetIterator; int32_t doc; public: virtual int32_t nextDoc(); virtual int32_t docID(); virtual int32_t advance(int32_t target); virtual double score(); protected: int32_t advanceToCommon(int32_t scorerDoc, int32_t disiDoc); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_FuzzyQuery.h000066400000000000000000000016271456444476200231650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _FUZZYQUERY_H #define _FUZZYQUERY_H #include "PriorityQueue.h" namespace Lucene { class ScoreTerm : public LuceneObject { public: virtual ~ScoreTerm(); LUCENE_CLASS(ScoreTerm); public: TermPtr term; double score; public: int32_t compareTo(const ScoreTermPtr& other); }; class ScoreTermQueue : public PriorityQueue { public: ScoreTermQueue(int32_t size); virtual ~ScoreTermQueue(); LUCENE_CLASS(ScoreTermQueue); protected: virtual bool lessThan(const ScoreTermPtr& first, const ScoreTermPtr& second); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_IndexReader.h000066400000000000000000000013641456444476200232000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _INDEXREADER_H #define _INDEXREADER_H #include "_SegmentInfos.h" namespace Lucene { class FindSegmentsModified : public FindSegmentsFileT { public: FindSegmentsModified(const SegmentInfosPtr& infos, const DirectoryPtr& directory); virtual ~FindSegmentsModified(); LUCENE_CLASS(FindSegmentsModified); public: virtual uint64_t doBody(const String& segmentFileName); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_IndexWriter.h000066400000000000000000000050671456444476200232560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _INDEXWRITER_H #define _INDEXWRITER_H #include "LuceneObject.h" namespace Lucene { /// Holds shared SegmentReader instances. IndexWriter uses SegmentReaders for 1) applying deletes, /// 2) doing merges, 3) handing out a real-time reader. This pool reuses instances of the SegmentReaders /// in all these places if it is in "near real-time mode" (getReader() has been called on this instance). class ReaderPool : public LuceneObject { public: ReaderPool(const IndexWriterPtr& writer); virtual ~ReaderPool(); LUCENE_CLASS(ReaderPool); protected: IndexWriterWeakPtr _indexWriter; MapSegmentInfoSegmentReader readerMap; public: /// Forcefully clear changes for the specified segments, and remove from the pool. /// This is called on successful merge. void clear(const SegmentInfosPtr& infos); /// used only by asserts bool infoIsLive(const SegmentInfoPtr& info); SegmentInfoPtr mapToLive(const SegmentInfoPtr& info); /// Release the segment reader (i.e. decRef it and close if there are no more references. void release(const SegmentReaderPtr& sr); /// Release the segment reader (i.e. decRef it and close if there are no more references. void release(const SegmentReaderPtr& sr, bool drop); /// Remove all our references to readers, and commits any pending changes. void close(); /// Commit all segment reader in the pool. void commit(); /// Returns a ref to a clone. NOTE: this clone is not enrolled in the pool, so you should /// simply close() it when you're done (ie, do not call release()). IndexReaderPtr getReadOnlyClone(const SegmentInfoPtr& info, bool doOpenStores, int32_t termInfosIndexDivisor); /// Obtain a SegmentReader from the readerPool. The reader must be returned by calling /// {@link #release(SegmentReader)} SegmentReaderPtr get(const SegmentInfoPtr& info, bool doOpenStores); /// Obtain a SegmentReader from the readerPool. The reader must be returned by calling /// {@link #release(SegmentReader)} SegmentReaderPtr get(const SegmentInfoPtr& info, bool doOpenStores, int32_t readBufferSize, int32_t termsIndexDivisor); /// Returns a ref SegmentReaderPtr getIfExists(const SegmentInfoPtr& info); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_IntFieldSource.h000066400000000000000000000015741456444476200236700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _INTFIELDSOURCE_H #define _INTFIELDSOURCE_H #include "DocValues.h" namespace Lucene { class IntDocValues : public DocValues { public: IntDocValues(const IntFieldSourcePtr& source, Collection arr); virtual ~IntDocValues(); LUCENE_CLASS(IntDocValues); protected: IntFieldSourceWeakPtr _source; Collection arr; public: virtual double doubleVal(int32_t doc); virtual int32_t intVal(int32_t doc); virtual String toString(int32_t doc); virtual CollectionValue getInnerArray(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_MMapDirectory.h000066400000000000000000000035311456444476200235230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _MMAPDIRECTORY_H #define _MMAPDIRECTORY_H #include #include "IndexInput.h" namespace Lucene { class MMapIndexInput : public IndexInput { public: MMapIndexInput(const String& path = L""); virtual ~MMapIndexInput(); LUCENE_CLASS(MMapIndexInput); protected: int32_t _length; bool isClone; boost::iostreams::mapped_file_source file; int32_t bufferPosition; // next byte to read public: /// Reads and returns a single byte. /// @see IndexOutput#writeByte(uint8_t) virtual uint8_t readByte(); /// Reads a specified number of bytes into an array at the specified offset. /// @param b the array to read bytes into. /// @param offset the offset in the array to start storing bytes. /// @param length the number of bytes to read. /// @see IndexOutput#writeBytes(const uint8_t*,int) virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); /// Returns the current position in this file, where the next read will occur. /// @see #seek(int64_t) virtual int64_t getFilePointer(); /// Sets current position in this file, where the next read will occur. /// @see #getFilePointer() virtual void seek(int64_t pos); /// The number of bytes in the file. virtual int64_t length(); /// Closes the stream to further operations. virtual void close(); /// Returns a clone of this stream. virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_MatchAllDocsQuery.h000066400000000000000000000032761456444476200243360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _MATCHALLDOCSQUERY_H #define _MATCHALLDOCSQUERY_H #include "Weight.h" #include "Scorer.h" namespace Lucene { class MatchAllDocsWeight : public Weight { public: MatchAllDocsWeight(const MatchAllDocsQueryPtr& query, const SearcherPtr& searcher); virtual ~MatchAllDocsWeight(); LUCENE_CLASS(MatchAllDocsWeight); protected: MatchAllDocsQueryPtr query; SimilarityPtr similarity; double queryWeight; double queryNorm; public: virtual String toString(); virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); }; class MatchAllScorer : public Scorer { public: MatchAllScorer(const MatchAllDocsQueryPtr& query, const IndexReaderPtr& reader, const SimilarityPtr& similarity, const WeightPtr& weight, ByteArray norms); virtual ~MatchAllScorer(); LUCENE_CLASS(MatchAllScorer); public: TermDocsPtr termDocs; double _score; ByteArray norms; protected: MatchAllDocsQueryPtr query; int32_t doc; public: virtual int32_t docID(); virtual int32_t nextDoc(); virtual double score(); virtual int32_t advance(int32_t target); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_MultiPhraseQuery.h000066400000000000000000000021721456444476200242670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _MULTIPHRASEQUERY_H #define _MULTIPHRASEQUERY_H #include "Weight.h" namespace Lucene { class MultiPhraseWeight : public Weight { public: MultiPhraseWeight(const MultiPhraseQueryPtr& query, const SearcherPtr& searcher); virtual ~MultiPhraseWeight(); LUCENE_CLASS(MultiPhraseWeight); protected: MultiPhraseQueryPtr query; SimilarityPtr similarity; double value; double idf; double queryNorm; double queryWeight; public: virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_MultiSearcher.h000066400000000000000000000071251456444476200235560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _MULTISEARCHER_H #define _MULTISEARCHER_H #include "Searcher.h" #include "Collector.h" namespace Lucene { /// Document Frequency cache acting as a Dummy-Searcher. This class is not a full-fledged Searcher, but /// only supports the methods necessary to initialize Weights. class CachedDfSource : public Searcher { public: CachedDfSource(MapTermInt dfMap, int32_t maxDoc, const SimilarityPtr& similarity); virtual ~CachedDfSource(); LUCENE_CLASS(CachedDfSource); protected: MapTermInt dfMap; // Map from Terms to corresponding doc freqs int32_t _maxDoc; // document count public: virtual int32_t docFreq(const TermPtr& term); virtual Collection docFreqs(Collection terms); virtual int32_t maxDoc(); virtual QueryPtr rewrite(const QueryPtr& query); virtual void close(); virtual DocumentPtr doc(int32_t n); virtual DocumentPtr doc(int32_t n, const FieldSelectorPtr& fieldSelector); virtual ExplanationPtr explain(const WeightPtr& weight, int32_t doc); virtual void search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& results); virtual TopDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n); virtual TopFieldDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort); }; /// A subclass for searching a single searchable class MultiSearcherCallableNoSort : public LuceneObject { public: MultiSearcherCallableNoSort(const SynchronizePtr& lock, const SearchablePtr& searchable, const WeightPtr& weight, const FilterPtr& filter, int32_t nDocs, const HitQueuePtr& hq, int32_t i, Collection starts); virtual ~MultiSearcherCallableNoSort(); LUCENE_CLASS(MultiSearcherCallableNoSort); protected: SynchronizePtr lock; SearchablePtr searchable; WeightPtr weight; FilterPtr filter; int32_t nDocs; int32_t i; HitQueuePtr hq; Collection starts; public: TopDocsPtr call(); }; /// A subclass for searching a single searchable class MultiSearcherCallableWithSort : public LuceneObject { public: MultiSearcherCallableWithSort(const SynchronizePtr& lock, const SearchablePtr& searchable, const WeightPtr& weight, const FilterPtr& filter, int32_t nDocs, const FieldDocSortedHitQueuePtr& hq, const SortPtr& sort, int32_t i, Collection starts); virtual ~MultiSearcherCallableWithSort(); LUCENE_CLASS(MultiSearcherCallableWithSort); protected: SynchronizePtr lock; SearchablePtr searchable; WeightPtr weight; FilterPtr filter; int32_t nDocs; int32_t i; FieldDocSortedHitQueuePtr hq; Collection starts; SortPtr sort; public: TopFieldDocsPtr call(); }; class MultiSearcherCollector : public Collector { public: MultiSearcherCollector(const CollectorPtr& collector, int32_t start); virtual ~MultiSearcherCollector(); LUCENE_CLASS(MultiSearcherCollector); protected: CollectorPtr collector; int32_t start; public: virtual void setScorer(const ScorerPtr& scorer); virtual void collect(int32_t doc); virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); virtual bool acceptsDocsOutOfOrder(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_MultiTermQuery.h000066400000000000000000000030271456444476200237540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _MULTITERMQUERY_H #define _MULTITERMQUERY_H #include "LuceneObject.h" namespace Lucene { class ConstantScoreFilterRewrite : public RewriteMethod { public: virtual ~ConstantScoreFilterRewrite(); LUCENE_CLASS(ConstantScoreFilterRewrite); public: virtual QueryPtr rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query); }; class ScoringBooleanQueryRewrite : public RewriteMethod { public: virtual ~ScoringBooleanQueryRewrite(); LUCENE_CLASS(ScoringBooleanQueryRewrite); public: virtual QueryPtr rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query); }; class ConstantScoreBooleanQueryRewrite : public ScoringBooleanQueryRewrite { public: virtual ~ConstantScoreBooleanQueryRewrite(); LUCENE_CLASS(ConstantScoreBooleanQueryRewrite); public: virtual QueryPtr rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query); }; class ConstantScoreAutoRewriteDefault : public ConstantScoreAutoRewrite { public: virtual ~ConstantScoreAutoRewriteDefault(); LUCENE_CLASS(ConstantScoreAutoRewriteDefault); public: virtual void setTermCountCutoff(int32_t count); virtual void setDocCountPercent(double percent); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_MultipleTermPositions.h000066400000000000000000000023541456444476200253410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _MULTIPLETERMPOSITIONS_H #define _MULTIPLETERMPOSITIONS_H #include "PriorityQueue.h" namespace Lucene { class TermPositionsQueue : public PriorityQueue { public: TermPositionsQueue(Collection termPositions); virtual ~TermPositionsQueue(); LUCENE_CLASS(TermPositionsQueue); protected: Collection termPositions; public: virtual void initialize(); protected: virtual bool lessThan(const TermPositionsPtr& first, const TermPositionsPtr& second); }; class IntQueue : public LuceneObject { public: IntQueue(); virtual ~IntQueue(); LUCENE_CLASS(IntQueue); protected: int32_t arraySize; int32_t index; int32_t lastIndex; Collection array; public: void add(int32_t i); int32_t next(); void sort(); void clear(); int32_t size(); protected: void growArray(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_NativeFSLockFactory.h000066400000000000000000000016601456444476200246250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _NATIVEFSLOCKFACTORY_H #define _NATIVEFSLOCKFACTORY_H #include "Lock.h" namespace Lucene { class NativeFSLock : public Lock { public: NativeFSLock(const String& lockDir, const String& lockFileName); virtual ~NativeFSLock(); LUCENE_CLASS(NativeFSLock); protected: String lockDir; String path; filelockPtr lock; static SynchronizePtr LOCK_HELD_LOCK(); static HashSet LOCK_HELD(); public: virtual bool obtain(); virtual void release(); virtual bool isLocked(); virtual String toString(); protected: bool lockExists(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_NearSpansUnordered.h000066400000000000000000000026771456444476200245600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _NEARSPANSUNORDERED_H #define _NEARSPANSUNORDERED_H #include "Spans.h" #include "PriorityQueue.h" namespace Lucene { /// Wraps a Spans, and can be used to form a linked list. class SpansCell : public Spans { public: SpansCell(const NearSpansUnorderedPtr& unordered, const SpansPtr& spans, int32_t index); virtual ~SpansCell(); LUCENE_CLASS(SpansCell); protected: NearSpansUnorderedWeakPtr _unordered; SpansPtr spans; SpansCellPtr _next; int32_t length; int32_t index; public: virtual bool next(); virtual bool skipTo(int32_t target); virtual int32_t doc(); virtual int32_t start(); virtual int32_t end(); virtual Collection getPayload(); virtual bool isPayloadAvailable(); virtual String toString(); protected: bool adjust(bool condition); friend class NearSpansUnordered; }; class CellQueue : public PriorityQueue { public: CellQueue(int32_t size); virtual ~CellQueue(); LUCENE_CLASS(CellQueue); protected: virtual bool lessThan(const SpansCellPtr& first, const SpansCellPtr& second); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_NoLockFactory.h000066400000000000000000000012161456444476200235170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _NOLOCKFACTORY_H #define _NOLOCKFACTORY_H #include "Lock.h" namespace Lucene { class NoLock : public Lock { public: virtual ~NoLock(); LUCENE_CLASS(NoLock); public: virtual bool obtain(); virtual void release(); virtual bool isLocked(); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_NumericRangeQuery.h000066400000000000000000000053201456444476200244070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _NUMERICRANGEQUERY_H #define _NUMERICRANGEQUERY_H #include "FilteredTermEnum.h" #include "NumericUtils.h" namespace Lucene { /// Subclass of FilteredTermEnum for enumerating all terms that match the sub-ranges for trie range queries. /// /// Warning: This term enumeration is not guaranteed to be always ordered by {@link Term#compareTo}. The /// ordering depends on how {@link NumericUtils#splitLongRange} and {@link NumericUtils#splitIntRange} /// generates the sub-ranges. For {@link MultiTermQuery} ordering is not relevant. class LPPAPI NumericRangeTermEnum : public FilteredTermEnum { public: NumericRangeTermEnum(const NumericRangeQueryPtr& query, const IndexReaderPtr& reader); virtual ~NumericRangeTermEnum(); LUCENE_CLASS(NumericRangeTermEnum); protected: NumericRangeQueryWeakPtr _query; IndexReaderPtr reader; Collection rangeBounds; TermPtr termTemplate; String currentUpperBound; public: virtual double difference(); /// Increments the enumeration to the next element. True if one exists. virtual bool next(); /// Closes the enumeration to further activity, freeing resources. virtual void close(); protected: /// This is a dummy, it is not used by this class. virtual bool endEnum(); /// This is a dummy, it is not used by this class. virtual void setEnum(const TermEnumPtr& actualEnum); /// Compares if current upper bound is reached, this also updates the term count for statistics. /// In contrast to {@link FilteredTermEnum}, a return value of false ends iterating the current enum /// and forwards to the next sub-range. virtual bool termCompare(const TermPtr& term); }; class NumericLongRangeBuilder : public LongRangeBuilder { public: NumericLongRangeBuilder(Collection rangeBounds); virtual ~NumericLongRangeBuilder(); LUCENE_CLASS(NumericLongRangeBuilder); protected: Collection rangeBounds; public: virtual void addRange(const String& minPrefixCoded, const String& maxPrefixCoded); }; class NumericIntRangeBuilder : public IntRangeBuilder { public: NumericIntRangeBuilder(Collection rangeBounds); virtual ~NumericIntRangeBuilder(); LUCENE_CLASS(NumericIntRangeBuilder); protected: Collection rangeBounds; public: virtual void addRange(const String& minPrefixCoded, const String& maxPrefixCoded); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_OrdFieldSource.h000066400000000000000000000016021456444476200236520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _ORDFIELDSOURCE_H #define _ORDFIELDSOURCE_H #include "DocValues.h" namespace Lucene { class LPPAPI OrdDocValues : public DocValues { public: OrdDocValues(const OrdFieldSourcePtr& source, Collection arr); virtual ~OrdDocValues(); LUCENE_CLASS(OrdDocValues); protected: OrdFieldSourceWeakPtr _source; Collection arr; public: virtual double doubleVal(int32_t doc); virtual String strVal(int32_t doc); virtual String toString(int32_t doc); virtual CollectionValue getInnerArray(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_ParallelReader.h000066400000000000000000000050061456444476200236620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _PARALLELREADER_H #define _PARALLELREADER_H #include "TermEnum.h" #include "TermDocs.h" #include "TermPositions.h" namespace Lucene { class ParallelTermEnum : public TermEnum { public: ParallelTermEnum(const ParallelReaderPtr& reader); ParallelTermEnum(const ParallelReaderPtr& reader, const TermPtr& term); virtual ~ParallelTermEnum(); LUCENE_CLASS(ParallelTermEnum); protected: ParallelReaderWeakPtr _reader; String field; MapStringIndexReader::iterator fieldIterator; bool setIterator; TermEnumPtr termEnum; public: /// Increments the enumeration to the next element. True if one exists. virtual bool next(); /// Returns the current Term in the enumeration. virtual TermPtr term(); /// Returns the docFreq of the current Term in the enumeration. virtual int32_t docFreq(); /// Closes the enumeration to further activity, freeing resources. virtual void close(); }; /// Wrap a TermDocs in order to support seek(Term) class ParallelTermDocs : public TermPositions, public LuceneObject { public: ParallelTermDocs(const ParallelReaderPtr& reader); ParallelTermDocs(const ParallelReaderPtr& reader, const TermPtr& term); virtual ~ParallelTermDocs(); LUCENE_CLASS(ParallelTermDocs); protected: ParallelReaderWeakPtr _reader; TermDocsPtr termDocs; public: virtual int32_t doc(); virtual int32_t freq(); virtual void seek(const TermPtr& term); virtual void seek(const TermEnumPtr& termEnum); virtual bool next(); virtual int32_t read(Collection& docs, Collection& freqs); virtual bool skipTo(int32_t target); virtual void close(); }; class ParallelTermPositions : public ParallelTermDocs { public: ParallelTermPositions(const ParallelReaderPtr& reader); ParallelTermPositions(const ParallelReaderPtr& reader, const TermPtr& term); virtual ~ParallelTermPositions(); LUCENE_CLASS(ParallelTermPositions); public: virtual void seek(const TermPtr& term); virtual int32_t nextPosition(); virtual int32_t getPayloadLength(); virtual ByteArray getPayload(ByteArray data, int32_t offset); virtual bool isPayloadAvailable(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_PayloadTermQuery.h000066400000000000000000000034451456444476200242570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _PAYLOADTERMQUERY_H #define _PAYLOADTERMQUERY_H #include "SpanWeight.h" #include "SpanScorer.h" namespace Lucene { class PayloadTermWeight : public SpanWeight { public: PayloadTermWeight(const PayloadTermQueryPtr& query, const SearcherPtr& searcher); virtual ~PayloadTermWeight(); LUCENE_CLASS(PayloadTermWeight); public: virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); }; class PayloadTermSpanScorer : public SpanScorer { public: PayloadTermSpanScorer(const TermSpansPtr& spans, const WeightPtr& weight, const SimilarityPtr& similarity, ByteArray norms); virtual ~PayloadTermSpanScorer(); LUCENE_CLASS(PayloadTermSpanScorer); protected: ByteArray payload; TermPositionsPtr positions; double payloadScore; int32_t payloadsSeen; public: virtual double score(); protected: virtual bool setFreqCurrentDoc(); void processPayload(const SimilarityPtr& similarity); /// Returns the SpanScorer score only. /// /// Should not be overridden without good cause /// /// @return the score for just the Span part without the payload /// @see #score() virtual double getSpanScore(); /// The score for the payload /// /// @return The score, as calculated by {@link PayloadFunction#docScore(int32_t, const String&, /// int32_t, double)} virtual double getPayloadScore(); virtual ExplanationPtr explain(int32_t doc); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_PhraseQuery.h000066400000000000000000000022171456444476200232540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _PHRASEQUERY_H #define _PHRASEQUERY_H #include "Weight.h" namespace Lucene { class PhraseWeight : public Weight { public: PhraseWeight(const PhraseQueryPtr& query, const SearcherPtr& searcher); virtual ~PhraseWeight(); LUCENE_CLASS(PhraseWeight); protected: PhraseQueryPtr query; SimilarityPtr similarity; double value; double idf; double queryNorm; double queryWeight; IDFExplanationPtr idfExp; public: virtual String toString(); virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_QueryWrapperFilter.h000066400000000000000000000015111456444476200246140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _QUERYWRAPPERFILTER_H #define _QUERYWRAPPERFILTER_H #include "DocIdSet.h" namespace Lucene { class QueryWrapperFilterDocIdSet : public DocIdSet { public: QueryWrapperFilterDocIdSet(const IndexReaderPtr& reader, const WeightPtr& weight); virtual ~QueryWrapperFilterDocIdSet(); LUCENE_CLASS(QueryWrapperFilterDocIdSet); protected: IndexReaderPtr reader; WeightPtr weight; public: virtual DocIdSetIteratorPtr iterator(); virtual bool isCacheable(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_ReverseOrdFieldSource.h000066400000000000000000000017721456444476200252160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _REVERSEORDFIELDSOURCE_H #define _REVERSEORDFIELDSOURCE_H #include "DocValues.h" namespace Lucene { class ReverseOrdDocValues : public DocValues { public: ReverseOrdDocValues(const ReverseOrdFieldSourcePtr& source, Collection arr, int32_t end); virtual ~ReverseOrdDocValues(); LUCENE_CLASS(ReverseOrdDocValues); protected: ReverseOrdFieldSourceWeakPtr _source; Collection arr; int32_t end; public: virtual double doubleVal(int32_t doc); virtual int32_t intVal(int32_t doc); virtual String strVal(int32_t doc); virtual String toString(int32_t doc); virtual CollectionValue getInnerArray(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_ScorerDocQueue.h000066400000000000000000000013641456444476200236760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SCORERDOCQUEUE_H #define _SCORERDOCQUEUE_H #include "LuceneObject.h" namespace Lucene { class HeapedScorerDoc : public LuceneObject { public: HeapedScorerDoc(const ScorerPtr& scorer); HeapedScorerDoc(const ScorerPtr& scorer, int32_t doc); virtual ~HeapedScorerDoc(); LUCENE_CLASS(HeapedScorerDoc); public: ScorerPtr scorer; int32_t doc; public: void adjust(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_SegmentInfos.h000066400000000000000000000042111456444476200234010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SEGMENTINFOS_H #define _SEGMENTINFOS_H #include "LuceneObject.h" namespace Lucene { /// Utility class for executing code that needs to do something with the current segments file. class FindSegmentsFile : public LuceneObject { public: FindSegmentsFile(const SegmentInfosPtr& infos, const DirectoryPtr& directory); virtual ~FindSegmentsFile(); LUCENE_CLASS(FindSegmentsFile); protected: SegmentInfosWeakPtr _segmentInfos; DirectoryPtr directory; public: void doRun(const IndexCommitPtr& commit = IndexCommitPtr()); virtual void runBody(const String& segmentFileName) = 0; }; template class FindSegmentsFileT : public FindSegmentsFile { public: FindSegmentsFileT(const SegmentInfosPtr& infos, const DirectoryPtr& directory) : FindSegmentsFile(infos, directory) {} virtual ~FindSegmentsFileT() {} protected: TYPE result; public: virtual TYPE run(const IndexCommitPtr& commit = IndexCommitPtr()) { doRun(commit); return result; } virtual void runBody(const String& segmentFileName) { result = doBody(segmentFileName); } virtual TYPE doBody(const String& segmentFileName) = 0; }; /// Utility class for executing code that needs to do something with the current segments file. This is necessary with /// lock-less commits because from the time you locate the current segments file name, until you actually open it, read /// its contents, or check modified time, etc., it could have been deleted due to a writer commit finishing. class FindSegmentsRead : public FindSegmentsFileT { public: FindSegmentsRead(const SegmentInfosPtr& infos, const DirectoryPtr& directory); virtual ~FindSegmentsRead(); LUCENE_CLASS(FindSegmentsRead); public: virtual int64_t doBody(const String& segmentFileName); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_SegmentReader.h000066400000000000000000000112141456444476200235260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SEGMENTREADER_H #define _SEGMENTREADER_H #include "CloseableThreadLocal.h" namespace Lucene { /// Holds core readers that are shared (unchanged) when SegmentReader is cloned or reopened class CoreReaders : public LuceneObject { public: CoreReaders(const SegmentReaderPtr& origInstance, const DirectoryPtr& dir, const SegmentInfoPtr& si, int32_t readBufferSize, int32_t termsIndexDivisor); virtual ~CoreReaders(); LUCENE_CLASS(CoreReaders); protected: /// Counts how many other reader share the core objects (freqStream, proxStream, tis, etc.) of this reader; /// when coreRef drops to 0, these core objects may be closed. A given instance of SegmentReader may be /// closed, even those it shares core objects with other SegmentReaders SegmentReaderRefPtr ref; SegmentReaderWeakPtr _origInstance; public: String segment; FieldInfosPtr fieldInfos; IndexInputPtr freqStream; IndexInputPtr proxStream; TermInfosReaderPtr tisNoIndex; DirectoryPtr dir; DirectoryPtr cfsDir; int32_t readBufferSize; int32_t termsIndexDivisor; TermInfosReaderPtr tis; FieldsReaderPtr fieldsReaderOrig; TermVectorsReaderPtr termVectorsReaderOrig; CompoundFileReaderPtr cfsReader; CompoundFileReaderPtr storeCFSReader; public: TermVectorsReaderPtr getTermVectorsReaderOrig(); FieldsReaderPtr getFieldsReaderOrig(); void incRef(); DirectoryPtr getCFSReader(); TermInfosReaderPtr getTermsReader(); bool termsIndexIsLoaded(); /// NOTE: only called from IndexWriter when a near real-time reader is opened, or applyDeletes is run, /// sharing a segment that's still being merged. This method is not fully thread safe, and relies on the /// synchronization in IndexWriter void loadTermsIndex(const SegmentInfoPtr& si, int32_t termsIndexDivisor); void openDocStores(const SegmentInfoPtr& si); void decRef(); friend class SegmentReader; }; /// Sets the initial value class LPPAPI FieldsReaderLocal : public CloseableThreadLocal { public: FieldsReaderLocal(const SegmentReaderPtr& reader); protected: SegmentReaderWeakPtr _reader; protected: virtual FieldsReaderPtr initialValue(); }; class LPPAPI SegmentReaderRef : public LuceneObject { public: SegmentReaderRef(); virtual ~SegmentReaderRef(); LUCENE_CLASS(SegmentReaderRef); protected: int32_t _refCount; public: virtual String toString(); int32_t refCount(); int32_t incRef(); int32_t decRef(); friend class SegmentReader; }; /// Byte[] referencing is used because a new norm object needs to be created for each clone, and the byte /// array is all that is needed for sharing between cloned readers. The current norm referencing is for /// sharing between readers whereas the byte[] referencing is for copy on write which is independent of /// reader references (i.e. incRef, decRef). class LPPAPI Norm : public LuceneObject { public: Norm(); Norm(const SegmentReaderPtr& reader, const IndexInputPtr& in, int32_t number, int64_t normSeek); virtual ~Norm(); LUCENE_CLASS(Norm); protected: SegmentReaderWeakPtr _reader; int32_t refCount; /// If this instance is a clone, the originalNorm references the Norm that has a real open IndexInput NormPtr origNorm; SegmentReaderPtr origReader; IndexInputPtr in; int64_t normSeek; SegmentReaderRefPtr _bytesRef; ByteArray _bytes; bool dirty; int32_t number; bool rollbackDirty; public: void incRef(); void decRef(); /// Load bytes but do not cache them if they were not already cached void bytes(uint8_t* bytesOut, int32_t offset, int32_t length); /// Load & cache full bytes array. Returns bytes. ByteArray bytes(); /// Only for testing SegmentReaderRefPtr bytesRef(); /// Called if we intend to change a norm value. We make a private copy of bytes if it's shared // with others ByteArray copyOnWrite(); /// Returns a copy of this Norm instance that shares IndexInput & bytes with the original one virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); /// Flush all pending changes to the next generation separate norms file. void reWrite(const SegmentInfoPtr& si); protected: void closeInput(); friend class SegmentReader; }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_Similarity.h000066400000000000000000000015451456444476200231350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SIMILARITY_H #define _SIMILARITY_H #include "Explanation.h" namespace Lucene { class SimilarityIDFExplanation : public IDFExplanation { public: SimilarityIDFExplanation(int32_t df, int32_t max, double idf); SimilarityIDFExplanation(const String& exp, double idf); virtual ~SimilarityIDFExplanation(); LUCENE_CLASS(SimilarityIDFExplanation); protected: String exp; int32_t df; int32_t max; double idf; public: virtual String explain(); virtual double getIdf(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_SimpleFSDirectory.h000066400000000000000000000051021456444476200243470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SIMPLEFSDIRECTORY_H #define _SIMPLEFSDIRECTORY_H #include "BufferedIndexInput.h" #include "BufferedIndexOutput.h" namespace Lucene { class LPPAPI InputFile : public LuceneObject { public: InputFile(const String& path); virtual ~InputFile(); LUCENE_CLASS(InputFile); public: static const int32_t FILE_EOF; static const int32_t FILE_ERROR; protected: ifstreamPtr file; int64_t position; int64_t length; public: void setPosition(int64_t position); int64_t getPosition(); int64_t getLength(); int32_t read(uint8_t* b, int32_t offset, int32_t length); void close(); bool isValid(); }; class LPPAPI SimpleFSIndexInput : public BufferedIndexInput { public: SimpleFSIndexInput(); SimpleFSIndexInput(const String& path, int32_t bufferSize, int32_t chunkSize); virtual ~SimpleFSIndexInput(); LUCENE_CLASS(SimpleFSIndexInput); protected: String path; InputFilePtr file; bool isClone; int32_t chunkSize; protected: virtual void readInternal(uint8_t* b, int32_t offset, int32_t length); virtual void seekInternal(int64_t pos); public: virtual int64_t length(); virtual void close(); /// Method used for testing. bool isValid(); /// Returns a clone of this stream. virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); }; class OutputFile : public LuceneObject { public: OutputFile(const String& path); virtual ~OutputFile(); LUCENE_CLASS(OutputFile); protected: ofstreamPtr file; String path; public: bool write(const uint8_t* b, int32_t offset, int32_t length); void close(); void setPosition(int64_t position); int64_t getLength(); void setLength(int64_t length); void flush(); bool isValid(); }; class SimpleFSIndexOutput : public BufferedIndexOutput { public: SimpleFSIndexOutput(const String& path); virtual ~SimpleFSIndexOutput(); LUCENE_CLASS(SimpleFSIndexOutput); protected: OutputFilePtr file; bool isOpen; public: virtual void flushBuffer(const uint8_t* b, int32_t offset, int32_t length); virtual void close(); virtual void seek(int64_t pos); virtual int64_t length(); virtual void setLength(int64_t length); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_SimpleFSLockFactory.h000066400000000000000000000022151456444476200246250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SIMPLEFSLOCKFACTORY_H #define _SIMPLEFSLOCKFACTORY_H #include "Lock.h" namespace Lucene { class SimpleFSLock : public Lock { public: SimpleFSLock(const String& lockDir, const String& lockFileName); virtual ~SimpleFSLock(); LUCENE_CLASS(SimpleFSLock); public: String lockDir; String lockFile; public: /// Attempts to obtain exclusive access and immediately return upon success or failure. /// @return true if exclusive access is obtained. virtual bool obtain(); /// Releases exclusive access. virtual void release(); /// Returns true if the resource is currently locked. Note that one must still call {@link #obtain()} /// before using the resource. virtual bool isLocked(); /// Returns derived object name. virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_SingleInstanceLockFactory.h000066400000000000000000000022771456444476200260610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SINGLEINSTANCELOCKFACTORY_H #define _SINGLEINSTANCELOCKFACTORY_H #include "Lock.h" namespace Lucene { class SingleInstanceLock : public Lock { public: SingleInstanceLock(HashSet locks, const String& lockName); virtual ~SingleInstanceLock(); LUCENE_CLASS(SingleInstanceLock); protected: HashSet locks; String lockName; public: /// Attempts to obtain exclusive access and immediately return /// upon success or failure. /// @return true if exclusive access is obtained. virtual bool obtain(); /// Releases exclusive access. virtual void release(); /// Returns true if the resource is currently locked. Note that /// one must still call {@link #obtain()} before using the resource. virtual bool isLocked(); /// Returns derived object name. virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_SnapshotDeletionPolicy.h000066400000000000000000000031111456444476200254410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SNAPSHOTDELETIONPOLICY_H #define _SNAPSHOTDELETIONPOLICY_H #include "IndexCommit.h" namespace Lucene { class MyCommitPoint : public IndexCommit { public: MyCommitPoint(const SnapshotDeletionPolicyPtr& deletionPolicy, const IndexCommitPtr& cp); virtual ~MyCommitPoint(); LUCENE_CLASS(MyCommitPoint); protected: SnapshotDeletionPolicyWeakPtr _deletionPolicy; public: IndexCommitPtr cp; public: virtual String toString(); /// Get the segments file (segments_N) associated with this commit point. virtual String getSegmentsFileName(); /// Returns all index files referenced by this commit point. virtual HashSet getFileNames(); /// Returns the {@link Directory} for the index. virtual DirectoryPtr getDirectory(); /// Delete this commit point. virtual void deleteCommit(); virtual bool isDeleted(); /// Returns the version for this IndexCommit. virtual int64_t getVersion(); /// Returns the generation (the _N in segments_N) for this IndexCommit. virtual int64_t getGeneration(); /// Returns userData, previously passed to {@link IndexWriter#commit(Map)} for this commit. virtual MapStringString getUserData(); virtual bool isOptimized(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_SortedVIntList.h000066400000000000000000000016161456444476200237030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SORTEDVINTLIST_H #define _SORTEDVINTLIST_H #include "DocIdSetIterator.h" namespace Lucene { class SortedDocIdSetIterator : public DocIdSetIterator { public: SortedDocIdSetIterator(const SortedVIntListPtr& list); virtual ~SortedDocIdSetIterator(); LUCENE_CLASS(SortedDocIdSetIterator); public: SortedVIntListWeakPtr _list; int32_t bytePos; int32_t lastInt; int32_t doc; public: virtual int32_t docID(); virtual int32_t nextDoc(); virtual int32_t advance(int32_t target); protected: void advance(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_SpanFirstQuery.h000066400000000000000000000016331456444476200237440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SPANFIRSTQUERY_H #define _SPANFIRSTQUERY_H #include "Spans.h" namespace Lucene { class FirstSpans : public Spans { public: FirstSpans(const SpanFirstQueryPtr& query, const SpansPtr& spans); virtual ~FirstSpans(); LUCENE_CLASS(FirstSpans); protected: SpanFirstQueryPtr query; SpansPtr spans; public: virtual bool next(); virtual bool skipTo(int32_t target); virtual int32_t doc(); virtual int32_t start(); virtual int32_t end(); virtual Collection getPayload(); virtual bool isPayloadAvailable(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_SpanNotQuery.h000066400000000000000000000020351456444476200234120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SPANNOTQUERY_H #define _SPANNOTQUERY_H #include "Spans.h" namespace Lucene { class NotSpans : public Spans { public: NotSpans(const SpanNotQueryPtr& query, const SpansPtr& includeSpans, const SpansPtr& excludeSpans); virtual ~NotSpans(); LUCENE_CLASS(NotSpans); protected: SpanNotQueryPtr query; SpansPtr includeSpans; bool moreInclude; SpansPtr excludeSpans; bool moreExclude; public: virtual bool next(); virtual bool skipTo(int32_t target); virtual int32_t doc(); virtual int32_t start(); virtual int32_t end(); virtual Collection getPayload(); virtual bool isPayloadAvailable(); virtual String toString(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_SpanOrQuery.h000066400000000000000000000024251456444476200232350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _SPANORQUERY_H #define _SPANORQUERY_H #include "PriorityQueue.h" #include "Spans.h" namespace Lucene { class SpanQueue : public PriorityQueue { public: SpanQueue(int32_t size); virtual ~SpanQueue(); LUCENE_CLASS(SpanQueue); protected: virtual bool lessThan(const SpansPtr& first, const SpansPtr& second); }; class OrSpans : public Spans { public: OrSpans(const SpanOrQueryPtr& query, const IndexReaderPtr& reader); virtual ~OrSpans(); LUCENE_CLASS(OrSpans); protected: SpanOrQueryPtr query; IndexReaderPtr reader; SpanQueuePtr queue; public: virtual bool next(); virtual bool skipTo(int32_t target); virtual int32_t doc(); virtual int32_t start(); virtual int32_t end(); virtual Collection getPayload(); virtual bool isPayloadAvailable(); virtual String toString(); protected: bool initSpanQueue(int32_t target); SpansPtr top(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_StandardAnalyzer.h000066400000000000000000000012201456444476200242430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _STANDARDANALYZER_H #define _STANDARDANALYZER_H #include "LuceneObject.h" namespace Lucene { class StandardAnalyzerSavedStreams : public LuceneObject { public: virtual ~StandardAnalyzerSavedStreams(); public: StandardTokenizerPtr tokenStream; TokenStreamPtr filteredTokenStream; }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_StopAnalyzer.h000066400000000000000000000013031456444476200234320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _STOPANALYZER_H #define _STOPANALYZER_H #include "LuceneObject.h" namespace Lucene { /// Filters LowerCaseTokenizer with StopFilter. class StopAnalyzerSavedStreams : public LuceneObject { public: virtual ~StopAnalyzerSavedStreams(); LUCENE_CLASS(StopAnalyzerSavedStreams); public: TokenizerPtr source; TokenStreamPtr result; }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_TermQuery.h000066400000000000000000000021771456444476200227460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _TERMQUERY_H #define _TERMQUERY_H #include "Weight.h" namespace Lucene { class TermWeight : public Weight { public: TermWeight(const TermQueryPtr& query, const SearcherPtr& searcher); virtual ~TermWeight(); LUCENE_CLASS(TermWeight); protected: TermQueryPtr query; SimilarityPtr similarity; double value; double idf; double queryNorm; double queryWeight; IDFExplanationPtr idfExp; public: virtual String toString(); virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_TimeLimitingCollector.h000066400000000000000000000015031456444476200252430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _TIMELIMITINGCOLLECTOR_H #define _TIMELIMITINGCOLLECTOR_H #include "LuceneThread.h" namespace Lucene { class TimerThread : public LuceneThread { public: TimerThread(); virtual ~TimerThread(); LUCENE_CLASS(TimerThread); protected: int64_t time; bool _stopThread; public: virtual void start(); virtual void run(); /// Get the timer value in milliseconds. int64_t getMilliseconds(); /// Stop timer thread. void stopThread(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_TopFieldCollector.h000066400000000000000000000200221456444476200243530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _TOPFIELDCOLLECTOR_H #define _TOPFIELDCOLLECTOR_H #include "TopDocsCollector.h" namespace Lucene { /// Implements a TopFieldCollector over one SortField criteria, without tracking document scores and maxScore. class OneComparatorNonScoringCollector : public TopFieldCollector { public: OneComparatorNonScoringCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); virtual ~OneComparatorNonScoringCollector(); LUCENE_CLASS(OneComparatorNonScoringCollector); public: FieldComparatorPtr comparator; int32_t reverseMul; public: virtual void initialize(); virtual void updateBottom(int32_t doc); virtual void collect(int32_t doc); virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); virtual void setScorer(const ScorerPtr& scorer); }; /// Implements a TopFieldCollector over one SortField criteria, without tracking document scores and maxScore, /// and assumes out of orderness in doc Ids collection. class OutOfOrderOneComparatorNonScoringCollector : public OneComparatorNonScoringCollector { public: OutOfOrderOneComparatorNonScoringCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); virtual ~OutOfOrderOneComparatorNonScoringCollector(); LUCENE_CLASS(OutOfOrderOneComparatorNonScoringCollector); public: virtual void collect(int32_t doc); virtual bool acceptsDocsOutOfOrder(); }; /// Implements a TopFieldCollector over one SortField criteria, while tracking document scores but no maxScore. class OneComparatorScoringNoMaxScoreCollector : public OneComparatorNonScoringCollector { public: OneComparatorScoringNoMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); virtual ~OneComparatorScoringNoMaxScoreCollector(); LUCENE_CLASS(OneComparatorScoringNoMaxScoreCollector); public: ScorerPtr scorer; public: virtual void updateBottom(int32_t doc, double score); virtual void collect(int32_t doc); virtual void setScorer(const ScorerPtr& scorer); }; /// Implements a TopFieldCollector over one SortField criteria, while tracking document scores but no maxScore, /// and assumes out of orderness in doc Ids collection. class OutOfOrderOneComparatorScoringNoMaxScoreCollector : public OneComparatorScoringNoMaxScoreCollector { public: OutOfOrderOneComparatorScoringNoMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); virtual ~OutOfOrderOneComparatorScoringNoMaxScoreCollector(); LUCENE_CLASS(OutOfOrderOneComparatorScoringNoMaxScoreCollector); public: virtual void collect(int32_t doc); virtual bool acceptsDocsOutOfOrder(); }; /// Implements a TopFieldCollector over one SortField criteria, with tracking document scores and maxScore. class OneComparatorScoringMaxScoreCollector : public OneComparatorNonScoringCollector { public: OneComparatorScoringMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); virtual ~OneComparatorScoringMaxScoreCollector(); LUCENE_CLASS(OneComparatorScoringMaxScoreCollector); public: ScorerPtr scorer; public: virtual void updateBottom(int32_t doc, double score); virtual void collect(int32_t doc); virtual void setScorer(const ScorerPtr& scorer); }; /// Implements a TopFieldCollector over one SortField criteria, with tracking document scores and maxScore, /// and assumes out of orderness in doc Ids collection. class OutOfOrderOneComparatorScoringMaxScoreCollector : public OneComparatorScoringMaxScoreCollector { public: OutOfOrderOneComparatorScoringMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); virtual ~OutOfOrderOneComparatorScoringMaxScoreCollector(); LUCENE_CLASS(OutOfOrderOneComparatorScoringMaxScoreCollector); public: virtual void collect(int32_t doc); virtual bool acceptsDocsOutOfOrder(); }; /// Implements a TopFieldCollector over multiple SortField criteria, without tracking document scores and maxScore. class MultiComparatorNonScoringCollector : public TopFieldCollector { public: MultiComparatorNonScoringCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); virtual ~MultiComparatorNonScoringCollector(); LUCENE_CLASS(MultiComparatorNonScoringCollector); public: Collection comparators; Collection reverseMul; public: virtual void initialize(); virtual void updateBottom(int32_t doc); virtual void collect(int32_t doc); virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); virtual void setScorer(const ScorerPtr& scorer); }; /// Implements a TopFieldCollector over multiple SortField criteria, without tracking document scores and maxScore. class OutOfOrderMultiComparatorNonScoringCollector : public MultiComparatorNonScoringCollector { public: OutOfOrderMultiComparatorNonScoringCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); virtual ~OutOfOrderMultiComparatorNonScoringCollector(); LUCENE_CLASS(OutOfOrderMultiComparatorNonScoringCollector); public: virtual void collect(int32_t doc); virtual bool acceptsDocsOutOfOrder(); }; /// Implements a TopFieldCollector over multiple SortField criteria, with tracking document scores and maxScore. class MultiComparatorScoringMaxScoreCollector : public MultiComparatorNonScoringCollector { public: MultiComparatorScoringMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); virtual ~MultiComparatorScoringMaxScoreCollector(); LUCENE_CLASS(MultiComparatorScoringMaxScoreCollector); public: ScorerWeakPtr _scorer; public: virtual void updateBottom(int32_t doc, double score); virtual void collect(int32_t doc); virtual void setScorer(const ScorerPtr& scorer); }; /// Implements a TopFieldCollector over multiple SortField criteria, without tracking document scores and maxScore. class OutOfOrderMultiComparatorScoringMaxScoreCollector : public MultiComparatorScoringMaxScoreCollector { public: OutOfOrderMultiComparatorScoringMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); virtual ~OutOfOrderMultiComparatorScoringMaxScoreCollector(); LUCENE_CLASS(OutOfOrderMultiComparatorScoringMaxScoreCollector); public: virtual void collect(int32_t doc); virtual bool acceptsDocsOutOfOrder(); }; /// Implements a TopFieldCollector over multiple SortField criteria, with tracking document scores and maxScore. class MultiComparatorScoringNoMaxScoreCollector : public MultiComparatorNonScoringCollector { public: MultiComparatorScoringNoMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); virtual ~MultiComparatorScoringNoMaxScoreCollector(); LUCENE_CLASS(MultiComparatorScoringNoMaxScoreCollector); public: ScorerWeakPtr _scorer; public: virtual void updateBottom(int32_t doc, double score); virtual void collect(int32_t doc); virtual void setScorer(const ScorerPtr& scorer); }; /// Implements a TopFieldCollector over multiple SortField criteria, with tracking document scores and maxScore, /// and assumes out of orderness in doc Ids collection. class OutOfOrderMultiComparatorScoringNoMaxScoreCollector : public MultiComparatorScoringNoMaxScoreCollector { public: OutOfOrderMultiComparatorScoringNoMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); virtual ~OutOfOrderMultiComparatorScoringNoMaxScoreCollector(); LUCENE_CLASS(OutOfOrderMultiComparatorScoringNoMaxScoreCollector); public: virtual void collect(int32_t doc); virtual void setScorer(const ScorerPtr& scorer); virtual bool acceptsDocsOutOfOrder(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_TopScoreDocCollector.h000066400000000000000000000022311456444476200250330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _TOPSCOREDOCCOLLECTOR_H #define _TOPSCOREDOCCOLLECTOR_H #include "TopDocsCollector.h" namespace Lucene { /// Assumes docs are scored in order. class InOrderTopScoreDocCollector : public TopScoreDocCollector { public: InOrderTopScoreDocCollector(int32_t numHits); virtual ~InOrderTopScoreDocCollector(); LUCENE_CLASS(InOrderTopScoreDocCollector); public: virtual void collect(int32_t doc); virtual bool acceptsDocsOutOfOrder(); }; /// Assumes docs are scored out of order. class OutOfOrderTopScoreDocCollector : public TopScoreDocCollector { public: OutOfOrderTopScoreDocCollector(int32_t numHits); virtual ~OutOfOrderTopScoreDocCollector(); LUCENE_CLASS(OutOfOrderTopScoreDocCollector); public: virtual void collect(int32_t doc); virtual bool acceptsDocsOutOfOrder(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/include/_ValueSourceQuery.h000066400000000000000000000036461456444476200242760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef _VALUESOURCEQUERY_H #define _VALUESOURCEQUERY_H #include "Weight.h" #include "Scorer.h" namespace Lucene { class ValueSourceWeight : public Weight { public: ValueSourceWeight(const ValueSourceQueryPtr& query, const SearcherPtr& searcher); virtual ~ValueSourceWeight(); LUCENE_CLASS(ValueSourceWeight); public: ValueSourceQueryPtr query; SimilarityPtr similarity; double queryNorm; double queryWeight; public: virtual QueryPtr getQuery(); virtual double getValue(); virtual double sumOfSquaredWeights(); virtual void normalize(double norm); virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); }; /// A scorer that (simply) matches all documents, and scores each document with the value of the value /// source in effect. As an example, if the value source is a (cached) field source, then value of that /// field in that document will be used. (assuming field is indexed for this doc, with a single token.) class ValueSourceScorer : public Scorer { public: ValueSourceScorer(const SimilarityPtr& similarity, const IndexReaderPtr& reader, const ValueSourceWeightPtr& weight); virtual ~ValueSourceScorer(); LUCENE_CLASS(ValueSourceScorer); public: ValueSourceWeightPtr weight; double qWeight; DocValuesPtr vals; TermDocsPtr termDocs; int32_t doc; public: virtual int32_t nextDoc(); virtual int32_t docID(); virtual int32_t advance(int32_t target); virtual double score(); }; } #endif LucenePlusPlus-rel_3.0.9/src/core/index/000077500000000000000000000000001456444476200201565ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/index/AbstractAllTermDocs.cpp000066400000000000000000000031541456444476200245220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "AbstractAllTermDocs.h" namespace Lucene { AbstractAllTermDocs::AbstractAllTermDocs(int32_t maxDoc) { this->maxDoc = maxDoc; this->_doc = -1; } AbstractAllTermDocs::~AbstractAllTermDocs() { } void AbstractAllTermDocs::seek(const TermPtr& term) { if (!term) { _doc = -1; } else { boost::throw_exception(UnsupportedOperationException()); } } void AbstractAllTermDocs::seek(const TermEnumPtr& termEnum) { boost::throw_exception(UnsupportedOperationException()); } int32_t AbstractAllTermDocs::doc() { return _doc; } int32_t AbstractAllTermDocs::freq() { return 1; } bool AbstractAllTermDocs::next() { return skipTo(_doc + 1); } int32_t AbstractAllTermDocs::read(Collection& docs, Collection& freqs) { int32_t length = docs.size(); int32_t i = 0; while (i < length && _doc < maxDoc) { if (!isDeleted(_doc)) { docs[i] = _doc; freqs[i] = 1; ++i; } ++_doc; } return i; } bool AbstractAllTermDocs::skipTo(int32_t target) { _doc = target; while (_doc < maxDoc) { if (!isDeleted(_doc)) { return true; } ++_doc; } return false; } void AbstractAllTermDocs::close() { } } LucenePlusPlus-rel_3.0.9/src/core/index/AllTermDocs.cpp000066400000000000000000000014671456444476200230430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "AllTermDocs.h" #include "SegmentReader.h" #include "BitVector.h" namespace Lucene { AllTermDocs::AllTermDocs(const SegmentReaderPtr& parent) : AbstractAllTermDocs(parent->maxDoc()) { SyncLock parentLock(parent); this->_deletedDocs = parent->deletedDocs; } AllTermDocs::~AllTermDocs() { } bool AllTermDocs::isDeleted(int32_t doc) { BitVectorPtr deletedDocs(_deletedDocs.lock()); return (deletedDocs && deletedDocs->get(_doc)); } } LucenePlusPlus-rel_3.0.9/src/core/index/BufferedDeletes.cpp000066400000000000000000000070241456444476200237150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BufferedDeletes.h" #include "MergeDocIDRemapper.h" namespace Lucene { BufferedDeletes::BufferedDeletes(bool doTermSort) { // doTermSort not used: always use sorted term map terms = MapTermNum::newInstance(); queries = MapQueryInt::newInstance(); docIDs = Collection::newInstance(); numTerms = 0; bytesUsed = 0; } BufferedDeletes::~BufferedDeletes() { } int32_t BufferedDeletes::size() { // We use numTerms not terms.size() intentionally, so that deletes by the same term // multiple times "count", ie if you ask to flush every 1000 deletes then even dup'd // terms are counted towards that 1000 return numTerms + queries.size() + docIDs.size(); } void BufferedDeletes::update(const BufferedDeletesPtr& in) { numTerms += in->numTerms; bytesUsed += in->bytesUsed; terms.putAll(in->terms.begin(), in->terms.end()); queries.putAll(in->queries.begin(), in->queries.end()); docIDs.addAll(in->docIDs.begin(), in->docIDs.end()); in->clear(); } void BufferedDeletes::clear() { terms.clear(); queries.clear(); docIDs.clear(); numTerms = 0; bytesUsed = 0; } void BufferedDeletes::addBytesUsed(int64_t b) { bytesUsed += b; } bool BufferedDeletes::any() { return (!terms.empty() || !docIDs.empty() || !queries.empty()); } void BufferedDeletes::remap(const MergeDocIDRemapperPtr& mapper, const SegmentInfosPtr& infos, Collection< Collection > docMaps, Collection delCounts, const OneMergePtr& merge, int32_t mergedDocCount) { SyncLock syncLock(this); MapTermNum newDeleteTerms; // Remap delete-by-term if (!terms.empty()) { newDeleteTerms = MapTermNum::newInstance(); for (MapTermNum::iterator entry = terms.begin(); entry != terms.end(); ++entry) { newDeleteTerms.put(entry->first, newLucene(mapper->remap(entry->second->getNum()))); } } // Remap delete-by-docID Collection newDeleteDocIDs; if (!docIDs.empty()) { newDeleteDocIDs = Collection::newInstance(); for (Collection::iterator num = docIDs.begin(); num != docIDs.end(); ++num) { newDeleteDocIDs.add(mapper->remap(*num)); } } // Remap delete-by-query MapQueryInt newDeleteQueries; if (!queries.empty()) { newDeleteQueries = MapQueryInt::newInstance(); for (MapQueryInt::iterator entry = queries.begin(); entry != queries.end(); ++entry) { newDeleteQueries.put(entry->first, mapper->remap(entry->second)); } } if (newDeleteTerms) { terms = newDeleteTerms; } if (newDeleteDocIDs) { docIDs = newDeleteDocIDs; } if (newDeleteQueries) { queries = newDeleteQueries; } } Num::Num(int32_t num) { this->num = num; } int32_t Num::getNum() { return num; } void Num::setNum(int32_t num) { // Only record the new number if it's greater than the current one. This is important // because if multiple threads are replacing the same doc at nearly the same time, it's // possible that one thread that got a higher docID is scheduled before the other threads. this->num = std::max(this->num, num); } } LucenePlusPlus-rel_3.0.9/src/core/index/ByteBlockPool.cpp000066400000000000000000000073431456444476200234010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ByteBlockPool.h" #include "DocumentsWriter.h" #include "MiscUtils.h" namespace Lucene { // Size of each slice. These arrays should be at most 16 elements (index is encoded with 4 bits). First array // is just a compact way to encode X+1 with a max. Second array is the length of each slice, ie first slice is // 5 bytes, next slice is 14 bytes, etc. const int32_t ByteBlockPool::nextLevelArray[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9}; const int32_t ByteBlockPool::levelSizeArray[] = {5, 14, 20, 30, 40, 40, 80, 80, 120, 200}; ByteBlockPool::ByteBlockPool(const ByteBlockPoolAllocatorBasePtr& allocator, bool trackAllocations) { buffers = Collection::newInstance(10); bufferUpto = -1; byteUpto = DocumentsWriter::BYTE_BLOCK_SIZE; byteOffset = -DocumentsWriter::BYTE_BLOCK_SIZE; this->allocator = allocator; this->trackAllocations = trackAllocations; } ByteBlockPool::~ByteBlockPool() { } int32_t ByteBlockPool::FIRST_LEVEL_SIZE() { return levelSizeArray[0]; } void ByteBlockPool::reset() { if (bufferUpto != -1) { // We allocated at least one buffer for (int32_t i = 0; i < bufferUpto; ++i) { // Fully zero fill buffers that we fully used MiscUtils::arrayFill(buffers[i].get(), 0, buffers[i].size(), 0); } // Partial zero fill the final buffer MiscUtils::arrayFill(buffers[bufferUpto].get(), 0, byteUpto, 0); if (bufferUpto > 0) { // Recycle all but the first buffer allocator->recycleByteBlocks(buffers, 1, 1 + bufferUpto); } // Re-use the first buffer bufferUpto = 0; byteUpto = 0; byteOffset = 0; buffer = buffers[0]; } } void ByteBlockPool::nextBuffer() { if (1 + bufferUpto == buffers.size()) { buffers.resize((int32_t)((double)buffers.size() * 1.5)); } buffers[1 + bufferUpto] = allocator->getByteBlock(trackAllocations); buffer = buffers[1 + bufferUpto]; ++bufferUpto; byteUpto = 0; byteOffset += DocumentsWriter::BYTE_BLOCK_SIZE; } int32_t ByteBlockPool::newSlice(int32_t size) { if (byteUpto > DocumentsWriter::BYTE_BLOCK_SIZE - size) { nextBuffer(); } int32_t upto = byteUpto; byteUpto += size; buffer[byteUpto - 1] = 16; return upto; } int32_t ByteBlockPool::allocSlice(ByteArray slice, int32_t upto) { int32_t level = slice[upto] & 15; int32_t newLevel = nextLevelArray[level]; int32_t newSize = levelSizeArray[newLevel]; // Maybe allocate another block if (byteUpto > DocumentsWriter::BYTE_BLOCK_SIZE - newSize) { nextBuffer(); } int32_t newUpto = byteUpto; int32_t offset = newUpto + byteOffset; byteUpto += newSize; // Copy forward the past 3 bytes (which we are about to overwrite with the forwarding address) buffer[newUpto] = slice[upto - 3]; buffer[newUpto + 1] = slice[upto - 2]; buffer[newUpto + 2] = slice[upto - 1]; // Write forwarding address at end of last slice slice[upto - 3] = (uint8_t)MiscUtils::unsignedShift(offset, 24); slice[upto - 2] = (uint8_t)MiscUtils::unsignedShift(offset, 16); slice[upto - 1] = (uint8_t)MiscUtils::unsignedShift(offset, 8); slice[upto] = (uint8_t)offset; // Write new level buffer[byteUpto - 1] = (uint8_t)(16 | newLevel); return (newUpto + 3); } ByteBlockPoolAllocatorBase::~ByteBlockPoolAllocatorBase() { } } LucenePlusPlus-rel_3.0.9/src/core/index/ByteSliceReader.cpp000066400000000000000000000102111456444476200236630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ByteSliceReader.h" #include "DocumentsWriter.h" #include "IndexOutput.h" #include "MiscUtils.h" namespace Lucene { ByteSliceReader::ByteSliceReader() { bufferUpto = 0; upto = 0; limit = 0; level = 0; bufferOffset = 0; endIndex = 0; } ByteSliceReader::~ByteSliceReader() { } void ByteSliceReader::init(const ByteBlockPoolPtr& pool, int32_t startIndex, int32_t endIndex) { BOOST_ASSERT(endIndex - startIndex >= 0); BOOST_ASSERT(startIndex >= 0); BOOST_ASSERT(endIndex >= 0); this->pool = pool; this->endIndex = endIndex; level = 0; bufferUpto = startIndex / DocumentsWriter::BYTE_BLOCK_SIZE; bufferOffset = bufferUpto * DocumentsWriter::BYTE_BLOCK_SIZE; buffer = pool->buffers[bufferUpto]; upto = startIndex & DocumentsWriter::BYTE_BLOCK_MASK; int32_t firstSize = ByteBlockPool::levelSizeArray[0]; if (startIndex + firstSize >= endIndex) { // There is only this one slice to read limit = endIndex & DocumentsWriter::BYTE_BLOCK_MASK; } else { limit = upto + firstSize - 4; } } bool ByteSliceReader::eof() { BOOST_ASSERT(upto + bufferOffset <= endIndex); return (upto + bufferOffset == endIndex); } uint8_t ByteSliceReader::readByte() { BOOST_ASSERT(!eof()); BOOST_ASSERT(upto <= limit); if (upto == limit) { nextSlice(); } return buffer[upto++]; } int64_t ByteSliceReader::writeTo(const IndexOutputPtr& out) { int64_t size = 0; while (true) { if (limit + bufferOffset == endIndex) { BOOST_ASSERT(endIndex - bufferOffset >= upto); out->writeBytes(buffer.get(), upto, limit - upto); size += limit - upto; break; } else { out->writeBytes(buffer.get(), upto, limit - upto); size += limit-upto; nextSlice(); } } return size; } void ByteSliceReader::nextSlice() { // Skip to our next slice int32_t nextIndex = ((buffer[limit] & 0xff) << 24) + ((buffer[1 + limit] & 0xff) << 16) + ((buffer[2 + limit] & 0xff) << 8) + (buffer[3 + limit] & 0xff); level = ByteBlockPool::nextLevelArray[level]; int32_t newSize = ByteBlockPool::levelSizeArray[level]; bufferUpto = nextIndex / DocumentsWriter::BYTE_BLOCK_SIZE; bufferOffset = bufferUpto * DocumentsWriter::BYTE_BLOCK_SIZE; this->buffer = pool->buffers[bufferUpto]; upto = nextIndex & DocumentsWriter::BYTE_BLOCK_MASK; if (nextIndex + newSize >= endIndex) { // We are advancing to the final slice BOOST_ASSERT(endIndex - nextIndex > 0); limit = endIndex - bufferOffset; } else { // This is not the final slice (subtract 4 for the forwarding address at the end of this new slice) limit = upto + newSize - 4; } } void ByteSliceReader::readBytes(uint8_t* b, int32_t offset, int32_t length) { while (length > 0) { int32_t numLeft = limit - upto; if (numLeft < length) { // Read entire slice MiscUtils::arrayCopy(buffer.get(), upto, b, offset, numLeft); offset += numLeft; length -= numLeft; nextSlice(); } else { // This slice is the last one MiscUtils::arrayCopy(buffer.get(), upto, b, offset, length); upto += length; break; } } } int64_t ByteSliceReader::getFilePointer() { boost::throw_exception(RuntimeException(L"not implemented")); return 0; } int64_t ByteSliceReader::length() { boost::throw_exception(RuntimeException(L"not implemented")); return 0; } void ByteSliceReader::seek(int64_t pos) { boost::throw_exception(RuntimeException(L"not implemented")); } void ByteSliceReader::close() { boost::throw_exception(RuntimeException(L"not implemented")); } } LucenePlusPlus-rel_3.0.9/src/core/index/ByteSliceWriter.cpp000066400000000000000000000036541456444476200237520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ByteSliceWriter.h" #include "DocumentsWriter.h" #include "MiscUtils.h" namespace Lucene { ByteSliceWriter::ByteSliceWriter(const ByteBlockPoolPtr& pool) { this->pool = pool; upto = 0; offset0 = 0; } ByteSliceWriter::~ByteSliceWriter() { } void ByteSliceWriter::init(int32_t address) { slice = pool->buffers[address >> DocumentsWriter::BYTE_BLOCK_SHIFT]; BOOST_ASSERT(slice); upto = (address & DocumentsWriter::BYTE_BLOCK_MASK); offset0 = address; BOOST_ASSERT(upto < slice.size()); } void ByteSliceWriter::writeByte(uint8_t b) { BOOST_ASSERT(slice); if (slice[upto] != 0) { upto = pool->allocSlice(slice, upto); slice = pool->buffer; offset0 = pool->byteOffset; BOOST_ASSERT(slice); } slice[upto++] = b; BOOST_ASSERT(upto != slice.size()); } void ByteSliceWriter::writeBytes(const uint8_t* b, int32_t offset, int32_t length) { int32_t offsetEnd = offset + length; while (offset < offsetEnd) { if (slice[upto] != 0) { // End marker upto = pool->allocSlice(slice, upto); slice = pool->buffer; offset0 = pool->byteOffset; } slice[upto++] = b[offset++]; BOOST_ASSERT(upto != slice.size()); } } int32_t ByteSliceWriter::getAddress() { return upto + (offset0 & DocumentsWriter::BYTE_BLOCK_NOT_MASK); } void ByteSliceWriter::writeVInt(int32_t i) { while ((i & ~0x7f) != 0) { writeByte((uint8_t)((i & 0x7f) | 0x80)); i = MiscUtils::unsignedShift(i, 7); } writeByte((uint8_t)i); } } LucenePlusPlus-rel_3.0.9/src/core/index/CharBlockPool.cpp000066400000000000000000000025331456444476200233470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CharBlockPool.h" #include "DocumentsWriter.h" namespace Lucene { CharBlockPool::CharBlockPool(const DocumentsWriterPtr& docWriter) { numBuffer = 0; bufferUpto = -1; charUpto = DocumentsWriter::CHAR_BLOCK_SIZE; charOffset = -DocumentsWriter::CHAR_BLOCK_SIZE; buffers = Collection::newInstance(10); this->_docWriter = docWriter; } CharBlockPool::~CharBlockPool() { } void CharBlockPool::reset() { DocumentsWriterPtr(_docWriter)->recycleCharBlocks(buffers, 1 + bufferUpto); bufferUpto = -1; charUpto = DocumentsWriter::CHAR_BLOCK_SIZE; charOffset = -DocumentsWriter::CHAR_BLOCK_SIZE; } void CharBlockPool::nextBuffer() { if (1 + bufferUpto == buffers.size()) { buffers.resize((int32_t)((double)buffers.size() * 1.5)); } buffers[1 + bufferUpto] = DocumentsWriterPtr(_docWriter)->getCharBlock(); buffer = buffers[1 + bufferUpto]; ++bufferUpto; charUpto = 0; charOffset += DocumentsWriter::CHAR_BLOCK_SIZE; } } LucenePlusPlus-rel_3.0.9/src/core/index/CheckIndex.cpp000066400000000000000000000635121456444476200226760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "CheckIndex.h" #include "_CheckIndex.h" #include "SegmentInfos.h" #include "SegmentInfo.h" #include "SegmentReader.h" #include "Directory.h" #include "IndexInput.h" #include "BitVector.h" #include "Term.h" #include "TermEnum.h" #include "TermPositions.h" #include "Document.h" #include "FSDirectory.h" #include "InfoStream.h" #include "StringUtils.h" namespace Lucene { bool CheckIndex::_assertsOn = false; CheckIndex::CheckIndex(const DirectoryPtr& dir) { this->dir = dir; } CheckIndex::~CheckIndex() { } void CheckIndex::setInfoStream(const InfoStreamPtr& out) { infoStream = out; } void CheckIndex::msg(const String& msg) { if (infoStream) { *infoStream << msg << L"\n"; } } IndexStatusPtr CheckIndex::checkIndex() { return checkIndex(Collection()); } IndexStatusPtr CheckIndex::checkIndex(Collection onlySegments) { SegmentInfosPtr sis(newLucene()); IndexStatusPtr result(newLucene()); result->dir = dir; try { sis->read(dir); } catch (...) { msg(L"ERROR: could not read any segments file in directory"); result->missingSegments = true; return result; } int32_t numSegments = sis->size(); String segmentsFileName(sis->getCurrentSegmentFileName()); IndexInputPtr input; try { input = dir->openInput(segmentsFileName); } catch (...) { msg(L"ERROR: could not open segments file in directory"); result->cantOpenSegments = true; return result; } int32_t format = 0; try { format = input->readInt(); } catch (...) { msg(L"ERROR: could not read segment file version in directory"); result->missingSegmentVersion = true; if (input) { input->close(); } return result; } if (input) { input->close(); } String sFormat; bool skip = false; if (format == SegmentInfos::FORMAT) { sFormat = L"FORMAT [Lucene Pre-2.1]"; } if (format == SegmentInfos::FORMAT_LOCKLESS) { sFormat = L"FORMAT_LOCKLESS [Lucene 2.1]"; } else if (format == SegmentInfos::FORMAT_SINGLE_NORM_FILE) { sFormat = L"FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; } else if (format == SegmentInfos::FORMAT_SHARED_DOC_STORE) { sFormat = L"FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; } else { if (format == SegmentInfos::FORMAT_CHECKSUM) { sFormat = L"FORMAT_CHECKSUM [Lucene 2.4]"; } else if (format == SegmentInfos::FORMAT_DEL_COUNT) { sFormat = L"FORMAT_DEL_COUNT [Lucene 2.4]"; } else if (format == SegmentInfos::FORMAT_HAS_PROX) { sFormat = L"FORMAT_HAS_PROX [Lucene 2.4]"; } else if (format == SegmentInfos::FORMAT_USER_DATA) { sFormat = L"FORMAT_USER_DATA [Lucene 2.9]"; } else if (format == SegmentInfos::FORMAT_DIAGNOSTICS) { sFormat = L"FORMAT_DIAGNOSTICS [Lucene 2.9]"; } else if (format < SegmentInfos::CURRENT_FORMAT) { sFormat = L"int=" + StringUtils::toString(format) + L" [newer version of Lucene than this tool]"; skip = true; } else { sFormat = StringUtils::toString(format) + L" [Lucene 1.3 or prior]"; } } result->segmentsFileName = segmentsFileName; result->numSegments = numSegments; result->segmentFormat = sFormat; result->userData = sis->getUserData(); String userDataString; if (!sis->getUserData().empty()) { userDataString = L" userData(size)=" + StringUtils::toString(sis->getUserData().size()); } msg(L"Segments file=" + segmentsFileName + L" numSegments=" + StringUtils::toString(numSegments) + L" version=" + sFormat + userDataString); if (onlySegments) { result->partial = true; msg(L"\nChecking only these segments:"); for (Collection::iterator s = onlySegments.begin(); s != onlySegments.end(); ++s) { msg(L" " + *s); } result->segmentsChecked.addAll(onlySegments.begin(), onlySegments.end()); msg(L":"); } if (skip) { msg(L"\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on;" \ L" please re-compile this tool on the matching version of Lucene; exiting"); result->toolOutOfDate = true; return result; } result->newSegments = boost::dynamic_pointer_cast(sis->clone()); result->newSegments->clear(); for (int32_t i = 0; i < numSegments; ++i) { SegmentInfoPtr info(sis->info(i)); if (onlySegments && !onlySegments.contains(info->name)) { continue; } SegmentInfoStatusPtr segInfoStat(newLucene()); result->segmentInfos.add(segInfoStat); msg(L" name=" + info->name + L" docCount=" + StringUtils::toString(info->docCount)); segInfoStat->name = info->name; segInfoStat->docCount = info->docCount; int32_t toLoseDocCount = info->docCount; SegmentReaderPtr reader; try { msg(L" compound=" + StringUtils::toString(info->getUseCompoundFile())); segInfoStat->compound = info->getUseCompoundFile(); msg(L" hasProx=" + StringUtils::toString(info->getHasProx())); segInfoStat->hasProx = info->getHasProx(); msg(L" numFiles=" + StringUtils::toString(info->files().size())); segInfoStat->numFiles = info->files().size(); msg(L" size (MB)=" + StringUtils::toString((double)info->sizeInBytes() / (double)(1024 * 1024))); segInfoStat->sizeMB = (double)info->sizeInBytes() / (double)(1024 * 1024); MapStringString diagnostics(info->getDiagnostics()); segInfoStat->diagnostics = diagnostics; if (!diagnostics.empty()) { msg(L" diagnostics (size)= " + StringUtils::toString(diagnostics.size())); } int32_t docStoreOffset = info->getDocStoreOffset(); if (docStoreOffset != -1) { msg(L" docStoreOffset=" + StringUtils::toString(docStoreOffset)); segInfoStat->docStoreOffset = docStoreOffset; msg(L" docStoreSegment=" + info->getDocStoreSegment()); segInfoStat->docStoreSegment = info->getDocStoreSegment(); msg(L" docStoreIsCompoundFile=" + StringUtils::toString(info->getDocStoreIsCompoundFile())); segInfoStat->docStoreCompoundFile = info->getDocStoreIsCompoundFile(); } String delFileName(info->getDelFileName()); if (delFileName.empty()) { msg(L" no deletions"); segInfoStat->hasDeletions = false; } else { msg(L" has deletions [delFileName=" + delFileName + L"]"); segInfoStat->hasDeletions = true; segInfoStat->deletionsFileName = delFileName; } msg(L" test: open reader........."); reader = SegmentReader::get(true, info, IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); segInfoStat->openReaderPassed = true; int32_t numDocs = reader->numDocs(); toLoseDocCount = numDocs; if (reader->hasDeletions()) { if (reader->deletedDocs->count() != info->getDelCount()) { boost::throw_exception(RuntimeException(L"delete count mismatch: info=" + StringUtils::toString(info->getDelCount()) + L" vs deletedDocs.count()=" + StringUtils::toString(reader->deletedDocs->count()))); } if (reader->deletedDocs->count() > reader->maxDoc()) { boost::throw_exception(RuntimeException(L"too many deleted docs: maxDoc()=" + StringUtils::toString(reader->maxDoc()) + L" vs deletedDocs.count()=" + StringUtils::toString(reader->deletedDocs->count()))); } if (info->docCount - numDocs != info->getDelCount()) { boost::throw_exception(RuntimeException(L"delete count mismatch: info=" + StringUtils::toString(info->getDelCount()) + L" vs reader=" + StringUtils::toString((info->docCount - numDocs)))); } segInfoStat->numDeleted = info->docCount - numDocs; msg(L"OK [" + StringUtils::toString(segInfoStat->numDeleted) + L" deleted docs]"); } else { if (info->getDelCount() != 0) { boost::throw_exception(RuntimeException(L"delete count mismatch: info=" + StringUtils::toString(info->getDelCount()) + L" vs reader=" + StringUtils::toString(info->docCount - numDocs))); } msg(L"OK"); } if (reader->maxDoc() != info->docCount) { boost::throw_exception(RuntimeException(L"SegmentReader.maxDoc() " + StringUtils::toString(reader->maxDoc()) + L" != SegmentInfos.docCount " + StringUtils::toString(info->docCount))); } msg(L" test: fields.............."); HashSet fieldNames(reader->getFieldNames(IndexReader::FIELD_OPTION_ALL)); msg(L"OK [" + StringUtils::toString(fieldNames.size()) + L" fields]"); segInfoStat->numFields = fieldNames.size(); // Test Field Norms segInfoStat->fieldNormStatus = testFieldNorms(Collection::newInstance(fieldNames.begin(), fieldNames.end()), reader); // Test the Term Index segInfoStat->termIndexStatus = testTermIndex(info, reader); // Test Stored Fields segInfoStat->storedFieldStatus = testStoredFields(info, reader); // Test Term Vectors segInfoStat->termVectorStatus = testTermVectors(info, reader); // Rethrow the first exception we encountered. This will cause stats for failed segments to be incremented properly if (!segInfoStat->fieldNormStatus->error.isNull()) { boost::throw_exception(RuntimeException(L"Field Norm test failed")); } else if (!segInfoStat->termIndexStatus->error.isNull()) { boost::throw_exception(RuntimeException(L"Term Index test failed")); } else if (!segInfoStat->storedFieldStatus->error.isNull()) { boost::throw_exception(RuntimeException(L"Stored Field test failed")); } else if (!segInfoStat->termVectorStatus->error.isNull()) { boost::throw_exception(RuntimeException(L"Term Vector test failed")); } msg(L""); } catch (...) { msg(L"FAILED"); String comment(L"fixIndex() would remove reference to this segment"); msg(L" WARNING: " + comment + L"; full exception:"); msg(L""); result->totLoseDocCount += toLoseDocCount; ++result->numBadSegments; if (reader) { reader->close(); } continue; } if (reader) { reader->close(); } // Keeper result->newSegments->add(boost::dynamic_pointer_cast(info->clone())); } if (result->numBadSegments == 0) { result->clean = true; msg(L"No problems were detected with this index.\n"); } else { msg(L"WARNING: " + StringUtils::toString(result->numBadSegments) + L" broken segments (containing " + StringUtils::toString(result->totLoseDocCount) + L" documents) detected"); } return result; } FieldNormStatusPtr CheckIndex::testFieldNorms(Collection fieldNames, const SegmentReaderPtr& reader) { FieldNormStatusPtr status(newLucene()); try { // Test Field Norms msg(L" test: field norms........."); ByteArray b(ByteArray::newInstance(reader->maxDoc())); for (Collection::iterator fieldName = fieldNames.begin(); fieldName != fieldNames.end(); ++fieldName) { if (reader->hasNorms(*fieldName)) { reader->norms(*fieldName, b, 0); ++status->totFields; } } msg(L"OK [" + StringUtils::toString(status->totFields) + L" fields]"); } catch (LuceneException& e) { msg(L"ERROR [" + e.getError() + L"]"); status->error = e; } return status; } TermIndexStatusPtr CheckIndex::testTermIndex(const SegmentInfoPtr& info, const SegmentReaderPtr& reader) { TermIndexStatusPtr status(newLucene()); try { msg(L" test: terms, freq, prox..."); TermEnumPtr termEnum(reader->terms()); TermPositionsPtr termPositions(reader->termPositions()); // Used only to count up # deleted docs for this term MySegmentTermDocsPtr myTermDocs(newLucene(reader)); int32_t maxDoc = reader->maxDoc(); while (termEnum->next()) { ++status->termCount; TermPtr term(termEnum->term()); int32_t docFreq = termEnum->docFreq(); termPositions->seek(term); int32_t lastDoc = -1; int32_t freq0 = 0; status->totFreq += docFreq; while (termPositions->next()) { ++freq0; int32_t doc = termPositions->doc(); int32_t freq = termPositions->freq(); if (doc <= lastDoc) { boost::throw_exception(RuntimeException(L"term " + term->toString() + L": doc " + StringUtils::toString(doc) + L" <= lastDoc " + StringUtils::toString(lastDoc))); } if (doc >= maxDoc) { boost::throw_exception(RuntimeException(L"term " + term->toString() + L": doc " + StringUtils::toString(doc) + L" >= maxDoc " + StringUtils::toString(maxDoc))); } lastDoc = doc; if (freq <= 0) { boost::throw_exception(RuntimeException(L"term " + term->toString() + L": doc " + StringUtils::toString(doc) + L": freq " + StringUtils::toString(freq) + L" is out of bounds")); } int32_t lastPos = -1; status->totPos += freq; for (int32_t j = 0; j < freq; ++j) { int32_t pos = termPositions->nextPosition(); if (pos < -1) { boost::throw_exception(RuntimeException(L"term " + term->toString() + L": doc " + StringUtils::toString(doc) + L": pos " + StringUtils::toString(pos) + L" is out of bounds")); } if (pos < lastPos) { boost::throw_exception(RuntimeException(L"term " + term->toString() + L": doc " + StringUtils::toString(doc) + L": pos " + StringUtils::toString(pos) + L" < lastPos " + StringUtils::toString(lastPos))); } lastPos = pos; } } // Now count how many deleted docs occurred in this term int32_t delCount; if (reader->hasDeletions()) { myTermDocs->seek(term); while (myTermDocs->next()) { } delCount = myTermDocs->delCount; } else { delCount = 0; } if (freq0 + delCount != docFreq) { boost::throw_exception(RuntimeException(L"term " + term->toString() + L"docFreq=" + StringUtils::toString(docFreq) + L" != num docs seen " + StringUtils::toString(freq0) + L" + num docs deleted " + StringUtils::toString(delCount))); } } msg(L"OK [" + StringUtils::toString(status->termCount) + L" terms; " + StringUtils::toString(status->totFreq) + L" terms/docs pairs; " + StringUtils::toString(status->totPos) + L" tokens]"); } catch (LuceneException& e) { msg(L"ERROR [" + e.getError() + L"]"); status->error = e; } return status; } StoredFieldStatusPtr CheckIndex::testStoredFields(const SegmentInfoPtr& info, const SegmentReaderPtr& reader) { StoredFieldStatusPtr status(newLucene()); try { msg(L" test: stored fields......."); // Scan stored fields for all documents for (int32_t j = 0; j < info->docCount; ++j) { if (!reader->isDeleted(j)) { ++status->docCount; DocumentPtr doc(reader->document(j, FieldSelectorPtr())); status->totFields += doc->getFields().size(); } } // Validate docCount if (status->docCount != reader->numDocs()) { boost::throw_exception(RuntimeException(L"docCount=" + StringUtils::toString(status->docCount) + L" but saw " + StringUtils::toString(status->docCount) + L" undeleted docs")); } msg(L"OK [" + StringUtils::toString(status->totFields) + L" total field count; avg " + StringUtils::toString((double)status->totFields / (double)status->docCount) + L" fields per doc]"); } catch (LuceneException& e) { msg(L"ERROR [" + e.getError() + L"]"); status->error = e; } return status; } TermVectorStatusPtr CheckIndex::testTermVectors(const SegmentInfoPtr& info, const SegmentReaderPtr& reader) { TermVectorStatusPtr status(newLucene()); try { msg(L" test: term vectors........"); for (int32_t j = 0; j < info->docCount; ++j) { if (!reader->isDeleted(j)) { ++status->docCount; Collection tfv(reader->getTermFreqVectors(j)); if (tfv) { status->totVectors += tfv.size(); } } } msg(L"OK [" + StringUtils::toString(status->totVectors) + L" total vector count; avg " + StringUtils::toString((double)status->totVectors / (double)status->docCount) + L" term/freq vector fields per doc]"); } catch (LuceneException& e) { msg(L"ERROR [" + e.getError() + L"]"); status->error = e; } return status; } void CheckIndex::fixIndex(const IndexStatusPtr& result) { if (result->partial) { boost::throw_exception(IllegalArgumentException(L"can only fix an index that was fully checked (this status checked a subset of segments)")); } result->newSegments->commit(result->dir); } bool CheckIndex::testAsserts() { _assertsOn = true; return true; } bool CheckIndex::assertsOn() { BOOST_ASSERT(testAsserts()); return _assertsOn; } int CheckIndex::main(Collection args) { bool doFix = false; Collection onlySegments(Collection::newInstance()); String indexPath; for (Collection::iterator arg = args.begin(); arg != args.end(); ++arg) { if (*arg == L"-fix") { doFix = true; } else if (*arg == L"-segment") { if (arg + 1 == args.end()) { std::wcout << L"ERROR: missing name for -segment option\n"; return 1; } ++arg; onlySegments.add(*arg); } else { if (!indexPath.empty()) { std::wcout << L"ERROR: unexpected extra argument '" << *arg << L"'\n"; return 1; } indexPath = *arg; } } if (indexPath.empty()) { std::wcout << L"\nERROR: index path not specified\n"; std::wcout << L"Usage: CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n"; std::wcout << L"\n"; std::wcout << L" -fix: actually write a new segments_N file, removing any problematic segments\n"; std::wcout << L" -segment X: only check the specified segments. This can be specified multiple\n"; std::wcout << L" times, to check more than one segment, eg '-segment _2 -segment _a'.\n"; std::wcout << L" You can't use this with the -fix option\n"; std::wcout << L"\n"; std::wcout << L"**WARNING**: -fix should only be used on an emergency basis as it will cause\n"; std::wcout << L"documents (perhaps many) to be permanently removed from the index. Always make\n"; std::wcout << L"a backup copy of your index before running this! Do not run this tool on an index\n"; std::wcout << L"that is actively being written to. You have been warned!\n"; std::wcout << L"\n"; std::wcout << L"Run without -fix, this tool will open the index, report version information\n"; std::wcout << L"and report any exceptions it hits and what action it would take if -fix were\n"; std::wcout << L"specified. With -fix, this tool will remove any segments that have issues and\n"; std::wcout << L"write a new segments_N file. This means all documents contained in the affected\n"; std::wcout << L"segments will be removed.\n"; std::wcout << L"\n"; std::wcout << L"This tool exits with exit code 1 if the index cannot be opened or has any\n"; std::wcout << L"corruption, else 0.\n\n"; return 1; } if (!assertsOn()) { std::wcout << L"\nNOTE: testing will be more thorough if you run with '-ea', so assertions are enabled\n"; } if (onlySegments.empty()) { onlySegments.reset(); } else if (doFix) { std::wcout << L"ERROR: cannot specify both -fix and -segment\n"; return 1; } std::wcout << L"\nOpening index @ " << indexPath << L"\n\n"; DirectoryPtr dir; try { dir = FSDirectory::open(indexPath); } catch (...) { std::wcout << L"ERROR: could not open directory \"" << indexPath << L"\"; exiting\n"; return 1; } CheckIndexPtr checker(newLucene(dir)); checker->setInfoStream(newLucene()); IndexStatusPtr result(checker->checkIndex(onlySegments)); if (result->missingSegments) { return 1; } if (!result->clean) { if (!doFix) { std::wcout << L"WARNING: would write new segments file, and " << result->totLoseDocCount << L" documents would be lost, if -fix were specified\n\n"; } else { std::wcout << L"WARNING: " << result->totLoseDocCount << L" documents will be lost\n"; std::wcout << L"NOTE: will write new segments file in 5 seconds; this will remove " << result->totLoseDocCount; std::wcout << L" docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!\n"; for (int32_t sec = 0; sec < 5; ++sec) { LuceneThread::threadSleep(1000); std::wcout << L" " << (5 - sec) << L"...\n"; } std::wcout << L"Writing...\n"; checker->fixIndex(result); std::wcout << L"OK\n"; std::wcout << L"Wrote new segments file \"" << result->newSegments->getCurrentSegmentFileName() << L"\"\n"; } } std::wcout << L"\n"; return ((result && result->clean) ? 0 : 1); } IndexStatus::IndexStatus() { clean = false; missingSegments = false; cantOpenSegments = false; missingSegmentVersion = false; numSegments = false; segmentInfos = Collection::newInstance(); segmentsChecked = Collection::newInstance(); toolOutOfDate = false; totLoseDocCount = 0; numBadSegments = 0; partial = false; } IndexStatus::~IndexStatus() { } SegmentInfoStatus::SegmentInfoStatus() { docCount = 0; compound = false; numFiles = 0; sizeMB = 0; docStoreOffset = -1; docStoreCompoundFile = false; hasDeletions = false; numDeleted = 0; openReaderPassed = false; numFields = 0; hasProx = false; } SegmentInfoStatus::~SegmentInfoStatus() { } FieldNormStatus::FieldNormStatus() { totFields = 0; } FieldNormStatus::~FieldNormStatus() { } TermIndexStatus::TermIndexStatus() { termCount = 0; totFreq = 0; totPos = 0; } TermIndexStatus::~TermIndexStatus() { } StoredFieldStatus::StoredFieldStatus() { docCount = 0; totFields = 0; } StoredFieldStatus::~StoredFieldStatus() { } TermVectorStatus::TermVectorStatus() { docCount = 0; totVectors = 0; } TermVectorStatus::~TermVectorStatus() { } MySegmentTermDocs::MySegmentTermDocs(const SegmentReaderPtr& p) : SegmentTermDocs(p) { delCount = 0; } MySegmentTermDocs::~MySegmentTermDocs() { } void MySegmentTermDocs::seek(const TermPtr& term) { SegmentTermDocs::seek(term); delCount = 0; } void MySegmentTermDocs::skippingDoc() { ++delCount; } } LucenePlusPlus-rel_3.0.9/src/core/index/CompoundFileReader.cpp000066400000000000000000000140241456444476200243720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CompoundFileReader.h" namespace Lucene { CompoundFileReader::CompoundFileReader(const DirectoryPtr& dir, const String& name) { ConstructReader(dir, name, BufferedIndexInput::BUFFER_SIZE); } CompoundFileReader::CompoundFileReader(const DirectoryPtr& dir, const String& name, int32_t readBufferSize) { ConstructReader(dir, name, readBufferSize); } CompoundFileReader::~CompoundFileReader() { } void CompoundFileReader::ConstructReader(const DirectoryPtr& dir, const String& name, int32_t readBufferSize) { directory = dir; fileName = name; this->readBufferSize = readBufferSize; this->entries = MapStringFileEntryPtr::newInstance(); bool success = false; LuceneException finally; try { stream = dir->openInput(name, readBufferSize); // read the directory and init files int32_t count = stream->readVInt(); FileEntryPtr entry; for (int32_t i = 0; i < count; ++i) { int64_t offset = stream->readLong(); String id(stream->readString()); if (entry) { // set length of the previous entry entry->length = offset - entry->offset; } entry = newInstance(); entry->offset = offset; entries.put(id, entry); } // set the length of the final entry if (entry) { entry->length = stream->length() - entry->offset; } success = true; } catch (LuceneException& e) { finally = e; } if (!success && stream) { try { stream->close(); } catch (...) { } } finally.throwException(); } DirectoryPtr CompoundFileReader::getDirectory() { return directory; } String CompoundFileReader::getName() { return fileName; } void CompoundFileReader::close() { SyncLock syncLock(this); if (!stream) { boost::throw_exception(IOException(L"Already closed")); } entries.clear(); stream->close(); stream.reset(); } IndexInputPtr CompoundFileReader::openInput(const String& name) { SyncLock syncLock(this); // Default to readBufferSize passed in when we were opened return openInput(name, readBufferSize); } IndexInputPtr CompoundFileReader::openInput(const String& name, int32_t bufferSize) { SyncLock syncLock(this); if (!stream) { boost::throw_exception(IOException(L"Stream closed")); } MapStringFileEntryPtr::iterator entry = entries.find(name); if (entry == entries.end()) { boost::throw_exception(IOException(L"No sub-file with id " + name + L" found")); } return newLucene(stream, entry->second->offset, entry->second->length, readBufferSize); } HashSet CompoundFileReader::listAll() { HashSet res(HashSet::newInstance()); for (MapStringFileEntryPtr::iterator entry = entries.begin(); entry != entries.end(); ++entry) { res.add(entry->first); } return res; } bool CompoundFileReader::fileExists(const String& name) { return entries.contains(name); } uint64_t CompoundFileReader::fileModified(const String& name) { return directory->fileModified(fileName); } void CompoundFileReader::touchFile(const String& name) { directory->touchFile(fileName); } void CompoundFileReader::deleteFile(const String& name) { boost::throw_exception(UnsupportedOperationException()); } void CompoundFileReader::renameFile(const String& from, const String& to) { boost::throw_exception(UnsupportedOperationException()); } int64_t CompoundFileReader::fileLength(const String& name) { MapStringFileEntryPtr::iterator entry = entries.find(name); if (entry == entries.end()) { boost::throw_exception(IOException(L"File " + name + L" does not exist")); } return entry->second->length; } IndexOutputPtr CompoundFileReader::createOutput(const String& name) { boost::throw_exception(UnsupportedOperationException()); return IndexOutputPtr(); } LockPtr CompoundFileReader::makeLock(const String& name) { boost::throw_exception(UnsupportedOperationException()); return LockPtr(); } CSIndexInput::CSIndexInput() { fileOffset = 0; _length = 0; } CSIndexInput::CSIndexInput(const IndexInputPtr& base, int64_t fileOffset, int64_t length) : BufferedIndexInput(BufferedIndexInput::BUFFER_SIZE) { this->base = boost::dynamic_pointer_cast(base->clone()); this->fileOffset = fileOffset; this->_length = length; } CSIndexInput::CSIndexInput(const IndexInputPtr& base, int64_t fileOffset, int64_t length, int32_t readBufferSize) : BufferedIndexInput(readBufferSize) { this->base = boost::dynamic_pointer_cast(base->clone()); this->fileOffset = fileOffset; this->_length = length; } CSIndexInput::~CSIndexInput() { } void CSIndexInput::readInternal(uint8_t* b, int32_t offset, int32_t length) { int64_t start = getFilePointer(); if (start + length > _length) { boost::throw_exception(IOException(L"read past EOF")); } base->seek(fileOffset + start); base->readBytes(b, offset, length, false); } void CSIndexInput::seekInternal(int64_t pos) { } void CSIndexInput::close() { base->close(); } int64_t CSIndexInput::length() { return _length; } LuceneObjectPtr CSIndexInput::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(); CSIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(BufferedIndexInput::clone(clone))); cloneIndexInput->base = boost::dynamic_pointer_cast(this->base->clone()); cloneIndexInput->fileOffset = fileOffset; cloneIndexInput->_length = _length; return cloneIndexInput; } } LucenePlusPlus-rel_3.0.9/src/core/index/CompoundFileWriter.cpp000066400000000000000000000141011456444476200244400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CompoundFileWriter.h" #include "SegmentMerger.h" #include "Directory.h" #include "IndexInput.h" #include "IndexOutput.h" #include "StringUtils.h" namespace Lucene { CompoundFileWriter::CompoundFileWriter(const DirectoryPtr& dir, const String& name, const CheckAbortPtr& checkAbort) { if (!dir) { boost::throw_exception(IllegalArgumentException(L"directory cannot be empty")); } if (name.empty()) { boost::throw_exception(IllegalArgumentException(L"name cannot be empty")); } this->checkAbort = checkAbort; _directory = dir; fileName = name; ids = HashSet::newInstance(); entries = Collection::newInstance(); merged = false; } CompoundFileWriter::~CompoundFileWriter() { } DirectoryPtr CompoundFileWriter::getDirectory() { return DirectoryPtr(_directory); } String CompoundFileWriter::getName() { return fileName; } void CompoundFileWriter::addFile(const String& file) { if (merged) { boost::throw_exception(IllegalStateException(L"Can't add extensions after merge has been called")); } if (file.empty()) { boost::throw_exception(IllegalArgumentException(L"file cannot be empty")); } if (!ids.add(file)) { boost::throw_exception(IllegalArgumentException(L"File " + file + L" already added")); } FileEntry entry; entry.file = file; entries.add(entry); } void CompoundFileWriter::close() { if (merged) { boost::throw_exception(IllegalStateException(L"Merge already performed")); } if (entries.empty()) { boost::throw_exception(IllegalStateException(L"No entries to merge have been defined")); } merged = true; DirectoryPtr directory(_directory); // open the compound stream IndexOutputPtr os; LuceneException finally; try { os = directory->createOutput(fileName); // Write the number of entries os->writeVInt(entries.size()); // Write the directory with all offsets at 0. Remember the positions of directory entries so that we // can adjust the offsets later int64_t totalSize = 0; for (Collection::iterator fe = entries.begin(); fe != entries.end(); ++fe) { fe->directoryOffset = os->getFilePointer(); os->writeLong(0); // for now os->writeString(fe->file); totalSize += directory->fileLength(fe->file); } // Pre-allocate size of file as optimization - this can potentially help IO performance as we write the // file and also later during searching. It also uncovers a disk-full situation earlier and hopefully // without actually filling disk to 100% int64_t finalLength = totalSize + os->getFilePointer(); os->setLength(finalLength); // Open the files and copy their data into the stream. Remember the locations of each file's data section. ByteArray buffer(ByteArray::newInstance(16384)); for (Collection::iterator fe = entries.begin(); fe != entries.end(); ++fe) { fe->dataOffset = os->getFilePointer(); copyFile(*fe, os, buffer); } // Write the data offsets into the directory of the compound stream for (Collection::iterator fe = entries.begin(); fe != entries.end(); ++fe) { os->seek(fe->directoryOffset); os->writeLong(fe->dataOffset); } BOOST_ASSERT(finalLength == os->length()); // Close the output stream. Set the os to null before trying to close so that if an exception occurs during // the close, the finally clause below will not attempt to close the stream the second time. IndexOutputPtr tmp(os); os.reset(); tmp->close(); } catch (LuceneException& e) { finally = e; } if (os) { try { os->close(); } catch (LuceneException&) { } } finally.throwException(); } void CompoundFileWriter::copyFile(const FileEntry& source, const IndexOutputPtr& os, ByteArray buffer) { IndexInputPtr is; DirectoryPtr directory(_directory); LuceneException finally; try { int64_t startPtr = os->getFilePointer(); is = directory->openInput(source.file); int64_t length = is->length(); int64_t remainder = length; int64_t chunk = buffer.size(); while (remainder > 0) { int32_t len = (int32_t)std::min(chunk, remainder); is->readBytes(buffer.get(), 0, len, false); os->writeBytes(buffer.get(), len); remainder -= len; if (checkAbort) { // Roughly every 2 MB we will check if it's time to abort checkAbort->work(80); } } // Verify that remainder is 0 if (remainder != 0) { boost::throw_exception(IOException(L"Non-zero remainder length after copying: " + StringUtils::toString(remainder) + L" (id: " + source.file + L", length: " + StringUtils::toString(length) + L", buffer size: " + StringUtils::toString(chunk) + L")")); } // Verify that the output length diff is equal to original file int64_t endPtr = os->getFilePointer(); int64_t diff = endPtr - startPtr; if (diff != length) { boost::throw_exception(IOException(L"Difference in the output file offsets " + StringUtils::toString(diff) + L" does not match the original file length " + StringUtils::toString(length))); } } catch (LuceneException& e) { finally = e; } if (is) { is->close(); } finally.throwException(); } } LucenePlusPlus-rel_3.0.9/src/core/index/ConcurrentMergeScheduler.cpp000066400000000000000000000233111456444476200256230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ConcurrentMergeScheduler.h" #include "_ConcurrentMergeScheduler.h" #include "IndexWriter.h" #include "TestPoint.h" #include "StringUtils.h" namespace Lucene { Collection ConcurrentMergeScheduler::allInstances; bool ConcurrentMergeScheduler::anyExceptions = false; ConcurrentMergeScheduler::ConcurrentMergeScheduler() { mergeThreadPriority = -1; mergeThreads = SetMergeThread::newInstance(); maxThreadCount = 1; suppressExceptions = false; closed = false; } ConcurrentMergeScheduler::~ConcurrentMergeScheduler() { } void ConcurrentMergeScheduler::initialize() { // Only for testing if (allInstances) { addMyself(); } } void ConcurrentMergeScheduler::setMaxThreadCount(int32_t count) { if (count < 1) { boost::throw_exception(IllegalArgumentException(L"count should be at least 1")); } maxThreadCount = count; } int32_t ConcurrentMergeScheduler::getMaxThreadCount() { return maxThreadCount; } int32_t ConcurrentMergeScheduler::getMergeThreadPriority() { SyncLock syncLock(this); initMergeThreadPriority(); return mergeThreadPriority; } void ConcurrentMergeScheduler::setMergeThreadPriority(int32_t pri) { SyncLock syncLock(this); if (pri > LuceneThread::MAX_THREAD_PRIORITY || pri < LuceneThread::MIN_THREAD_PRIORITY) { boost::throw_exception(IllegalArgumentException(L"priority must be in range " + StringUtils::toString(LuceneThread::MIN_THREAD_PRIORITY) + L" .. " + StringUtils::toString(LuceneThread::MAX_THREAD_PRIORITY) + L" inclusive")); } mergeThreadPriority = pri; for (SetMergeThread::iterator merge = mergeThreads.begin(); merge != mergeThreads.end(); ++merge) { (*merge)->setThreadPriority(pri); } } bool ConcurrentMergeScheduler::verbose() { return (!_writer.expired() && IndexWriterPtr(_writer)->verbose()); } void ConcurrentMergeScheduler::message(const String& message) { if (verbose() && !_writer.expired()) { IndexWriterPtr(_writer)->message(L"CMS: " + message); } } void ConcurrentMergeScheduler::initMergeThreadPriority() { SyncLock syncLock(this); if (mergeThreadPriority == -1) { // Default to slightly higher priority than our calling thread mergeThreadPriority = std::min(LuceneThread::NORM_THREAD_PRIORITY + 1, LuceneThread::MAX_THREAD_PRIORITY); } } void ConcurrentMergeScheduler::close() { sync(); closed = true; } void ConcurrentMergeScheduler::sync() { SyncLock syncLock(this); while (mergeThreadCount() > 0) { message(L"now wait for threads; currently " + StringUtils::toString(mergeThreads.size()) + L" still running"); wait(1000); } mergeThreads.clear(); } int32_t ConcurrentMergeScheduler::mergeThreadCount() { SyncLock syncLock(this); int32_t count = 0; for (SetMergeThread::iterator merge = mergeThreads.begin(); merge != mergeThreads.end(); ++merge) { if ((*merge)->isAlive()) { ++count; } } return count; } void ConcurrentMergeScheduler::merge(const IndexWriterPtr& writer) { BOOST_ASSERT(!writer->holdsLock()); this->_writer = writer; initMergeThreadPriority(); dir = writer->getDirectory(); // First, quickly run through the newly proposed merges and add any orthogonal merges (ie a merge not // involving segments already pending to be merged) to the queue. If we are way behind on merging, // many of these newly proposed merges will likely already be registered. message(L"now merge"); message(L" index: " + writer->segString()); // Iterate, pulling from the IndexWriter's queue of pending merges, until it's empty while (true) { OneMergePtr merge(writer->getNextMerge()); if (!merge) { message(L" no more merges pending; now return"); return; } // We do this with the primary thread to keep deterministic assignment of segment names writer->mergeInit(merge); bool success = false; LuceneException finally; try { SyncLock syncLock(this); MergeThreadPtr merger; while (mergeThreadCount() >= maxThreadCount) { message(L" too many merge threads running; stalling..."); wait(1000); } message(L" consider merge " + merge->segString(dir)); BOOST_ASSERT(mergeThreadCount() < maxThreadCount); // OK to spawn a new merge thread to handle this merge merger = getMergeThread(writer, merge); mergeThreads.add(merger); message(L" launch new thread"); merger->start(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { writer->mergeFinish(merge); } finally.throwException(); } } void ConcurrentMergeScheduler::doMerge(const OneMergePtr& merge) { TestScope testScope(L"ConcurrentMergeScheduler", L"doMerge"); IndexWriterPtr(_writer)->merge(merge); } MergeThreadPtr ConcurrentMergeScheduler::getMergeThread(const IndexWriterPtr& writer, const OneMergePtr& merge) { SyncLock syncLock(this); MergeThreadPtr thread(newLucene(shared_from_this(), writer, merge)); thread->setThreadPriority(mergeThreadPriority); return thread; } void ConcurrentMergeScheduler::handleMergeException(const LuceneException& exc) { // When an exception is hit during merge, IndexWriter removes any partial files and then // allows another merge to run. If whatever caused the error is not transient then the // exception will keep happening, so, we sleep here to avoid saturating CPU in such cases LuceneThread::threadSleep(250); // pause 250 msec boost::throw_exception(MergeException()); } bool ConcurrentMergeScheduler::anyUnhandledExceptions() { if (!allInstances) { boost::throw_exception(RuntimeException(L"setTestMode() was not called")); } SyncLock instancesLock(&allInstances); for (Collection::iterator instance = allInstances.begin(); instance != allInstances.end(); ++instance) { (*instance)->sync(); } bool v = anyExceptions; anyExceptions = false; return v; } void ConcurrentMergeScheduler::clearUnhandledExceptions() { SyncLock instancesLock(&allInstances); anyExceptions = false; } void ConcurrentMergeScheduler::addMyself() { SyncLock instancesLock(&allInstances); int32_t size = allInstances.size(); int32_t upto = 0; for (int32_t i = 0; i < size; ++i) { ConcurrentMergeSchedulerPtr other(allInstances[i]); if (!(other->closed && other->mergeThreadCount() == 0)) { // Keep this one for now: it still has threads or may spawn new threads allInstances[upto++] = other; } allInstances.remove(allInstances.begin() + upto, allInstances.end()); allInstances.add(shared_from_this()); } } void ConcurrentMergeScheduler::setSuppressExceptions() { suppressExceptions = true; } void ConcurrentMergeScheduler::clearSuppressExceptions() { suppressExceptions = false; } void ConcurrentMergeScheduler::setTestMode() { allInstances = Collection::newInstance(); } MergeThread::MergeThread(const ConcurrentMergeSchedulerPtr& merger, const IndexWriterPtr& writer, const OneMergePtr& startMerge) { this->_merger = merger; this->_writer = writer; this->startMerge = startMerge; } MergeThread::~MergeThread() { } void MergeThread::setRunningMerge(const OneMergePtr& merge) { ConcurrentMergeSchedulerPtr merger(_merger); SyncLock syncLock(merger); runningMerge = merge; } OneMergePtr MergeThread::getRunningMerge() { ConcurrentMergeSchedulerPtr merger(_merger); SyncLock syncLock(merger); return runningMerge; } void MergeThread::setThreadPriority(int32_t pri) { try { setPriority(pri); } catch (...) { } } void MergeThread::run() { // First time through the while loop we do the merge that we were started with OneMergePtr merge(this->startMerge); ConcurrentMergeSchedulerPtr merger(_merger); LuceneException finally; try { merger->message(L" merge thread: start"); IndexWriterPtr writer(_writer); while (true) { setRunningMerge(merge); merger->doMerge(merge); // Subsequent times through the loop we do any new merge that writer says is necessary merge = writer->getNextMerge(); if (merge) { writer->mergeInit(merge); merger->message(L" merge thread: do another merge " + merge->segString(merger->dir)); } else { break; } } merger->message(L" merge thread: done"); } catch (MergeAbortedException&) { // Ignore the exception if it was due to abort } catch (LuceneException& e) { if (!merger->suppressExceptions) { // suppressExceptions is normally only set during testing. merger->anyExceptions = true; merger->handleMergeException(e); } else { finally = e; } } { SyncLock syncLock(merger); merger->notifyAll(); bool removed = merger->mergeThreads.remove(shared_from_this()); BOOST_ASSERT(removed); } finally.throwException(); } } LucenePlusPlus-rel_3.0.9/src/core/index/DefaultSkipListReader.cpp000066400000000000000000000063031456444476200250560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DefaultSkipListReader.h" #include "MiscUtils.h" namespace Lucene { DefaultSkipListReader::DefaultSkipListReader(const IndexInputPtr& skipStream, int32_t maxSkipLevels, int32_t skipInterval) : MultiLevelSkipListReader(skipStream, maxSkipLevels, skipInterval) { currentFieldStoresPayloads = false; lastFreqPointer = 0; lastProxPointer = 0; lastPayloadLength = 0; freqPointer = Collection::newInstance(maxSkipLevels); proxPointer = Collection::newInstance(maxSkipLevels); payloadLength = Collection::newInstance(maxSkipLevels); MiscUtils::arrayFill(freqPointer.begin(), 0, freqPointer.size(), 0); MiscUtils::arrayFill(proxPointer.begin(), 0, proxPointer.size(), 0); MiscUtils::arrayFill(payloadLength.begin(), 0, payloadLength.size(), 0); } DefaultSkipListReader::~DefaultSkipListReader() { } void DefaultSkipListReader::init(int64_t skipPointer, int64_t freqBasePointer, int64_t proxBasePointer, int32_t df, bool storesPayloads) { MultiLevelSkipListReader::init(skipPointer, df); this->currentFieldStoresPayloads = storesPayloads; lastFreqPointer = freqBasePointer; lastProxPointer = proxBasePointer; MiscUtils::arrayFill(freqPointer.begin(), 0, freqPointer.size(), freqBasePointer); MiscUtils::arrayFill(proxPointer.begin(), 0, proxPointer.size(), proxBasePointer); MiscUtils::arrayFill(payloadLength.begin(), 0, payloadLength.size(), 0); } int64_t DefaultSkipListReader::getFreqPointer() { return lastFreqPointer; } int64_t DefaultSkipListReader::getProxPointer() { return lastProxPointer; } int32_t DefaultSkipListReader::getPayloadLength() { return lastPayloadLength; } void DefaultSkipListReader::seekChild(int32_t level) { MultiLevelSkipListReader::seekChild(level); freqPointer[level] = lastFreqPointer; proxPointer[level] = lastProxPointer; payloadLength[level] = lastPayloadLength; } void DefaultSkipListReader::setLastSkipData(int32_t level) { MultiLevelSkipListReader::setLastSkipData(level); lastFreqPointer = freqPointer[level]; lastProxPointer = proxPointer[level]; lastPayloadLength = payloadLength[level]; } int32_t DefaultSkipListReader::readSkipData(int32_t level, const IndexInputPtr& skipStream) { int32_t delta; if (currentFieldStoresPayloads) { // The current field stores payloads. If the doc delta is odd then we have to read the current // payload length because it differs from the length of the previous payload delta = skipStream->readVInt(); if ((delta & 1) != 0) { payloadLength[level] = skipStream->readVInt(); } delta = MiscUtils::unsignedShift(delta, 1); } else { delta = skipStream->readVInt(); } freqPointer[level] += skipStream->readVInt(); proxPointer[level] += skipStream->readVInt(); return delta; } } LucenePlusPlus-rel_3.0.9/src/core/index/DefaultSkipListWriter.cpp000066400000000000000000000117051456444476200251320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DefaultSkipListWriter.h" #include "IndexOutput.h" #include "MiscUtils.h" namespace Lucene { DefaultSkipListWriter::DefaultSkipListWriter(int32_t skipInterval, int32_t numberOfSkipLevels, int32_t docCount, const IndexOutputPtr& freqOutput, const IndexOutputPtr& proxOutput) : MultiLevelSkipListWriter(skipInterval, numberOfSkipLevels, docCount) { curDoc = 0; curStorePayloads = false; curPayloadLength = 0; curFreqPointer = 0; curProxPointer = 0; this->freqOutput = freqOutput; this->proxOutput = proxOutput; lastSkipDoc = Collection::newInstance(numberOfSkipLevels); lastSkipPayloadLength = Collection::newInstance(numberOfSkipLevels); lastSkipFreqPointer = Collection::newInstance(numberOfSkipLevels); lastSkipProxPointer = Collection::newInstance(numberOfSkipLevels); } DefaultSkipListWriter::~DefaultSkipListWriter() { } void DefaultSkipListWriter::setFreqOutput(const IndexOutputPtr& freqOutput) { this->freqOutput = freqOutput; } void DefaultSkipListWriter::setProxOutput(const IndexOutputPtr& proxOutput) { this->proxOutput = proxOutput; } void DefaultSkipListWriter::setSkipData(int32_t doc, bool storePayloads, int32_t payloadLength) { this->curDoc = doc; this->curStorePayloads = storePayloads; this->curPayloadLength = payloadLength; this->curFreqPointer = freqOutput->getFilePointer(); if (proxOutput) { this->curProxPointer = proxOutput->getFilePointer(); } } void DefaultSkipListWriter::resetSkip() { MultiLevelSkipListWriter::resetSkip(); MiscUtils::arrayFill(lastSkipDoc.begin(), 0, lastSkipDoc.size(), 0); MiscUtils::arrayFill(lastSkipPayloadLength.begin(), 0, lastSkipPayloadLength.size(), -1); // we don't have to write the first length in the skip list MiscUtils::arrayFill(lastSkipFreqPointer.begin(), 0, lastSkipFreqPointer.size(), freqOutput->getFilePointer()); if (proxOutput) { MiscUtils::arrayFill(lastSkipProxPointer.begin(), 0, lastSkipProxPointer.size(), proxOutput->getFilePointer()); } } void DefaultSkipListWriter::writeSkipData(int32_t level, const IndexOutputPtr& skipBuffer) { // To efficiently store payloads in the posting lists we do not store the length of // every payload. Instead we omit the length for a payload if the previous payload had // the same length. // However, in order to support skipping the payload length at every skip point must be known. // So we use the same length encoding that we use for the posting lists for the skip data as well: // Case 1: current field does not store payloads // SkipDatum --> DocSkip, FreqSkip, ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // DocSkip records the document number before every SkipInterval th document in TermFreqs. // Document numbers are represented as differences from the previous value in the sequence. // Case 2: current field stores payloads // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip // DocSkip,FreqSkip,ProxSkip --> VInt // PayloadLength --> VInt // In this case DocSkip/2 is the difference between // the current and the previous value. If DocSkip // is odd, then a PayloadLength encoded as VInt follows, // if DocSkip is even, then it is assumed that the // current payload length equals the length at the previous // skip point if (curStorePayloads) { int32_t delta = curDoc - lastSkipDoc[level]; if (curPayloadLength == lastSkipPayloadLength[level]) { // the current payload length equals the length at the previous skip point, so we don't store // the length again skipBuffer->writeVInt(delta * 2); } else { // the payload length is different from the previous one. We shift the DocSkip, set the lowest // bit and store the current payload length as VInt. skipBuffer->writeVInt(delta * 2 + 1); skipBuffer->writeVInt(curPayloadLength); lastSkipPayloadLength[level] = curPayloadLength; } } else { // current field does not store payloads skipBuffer->writeVInt(curDoc - lastSkipDoc[level]); } skipBuffer->writeVInt((int32_t)(curFreqPointer - lastSkipFreqPointer[level])); skipBuffer->writeVInt((int32_t)(curProxPointer - lastSkipProxPointer[level])); lastSkipDoc[level] = curDoc; lastSkipFreqPointer[level] = curFreqPointer; lastSkipProxPointer[level] = curProxPointer; } } LucenePlusPlus-rel_3.0.9/src/core/index/DirectoryReader.cpp000066400000000000000000001171101456444476200237520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "DirectoryReader.h" #include "_DirectoryReader.h" #include "Directory.h" #include "ReadOnlyDirectoryReader.h" #include "IndexWriter.h" #include "_IndexWriter.h" #include "IndexCommit.h" #include "IndexDeletionPolicy.h" #include "IndexFileDeleter.h" #include "IndexFileNames.h" #include "KeepOnlyLastCommitDeletionPolicy.h" #include "SegmentInfos.h" #include "SegmentInfo.h" #include "SegmentReader.h" #include "DefaultSimilarity.h" #include "ReadOnlySegmentReader.h" #include "SegmentMergeInfo.h" #include "Lock.h" #include "FieldCache.h" #include "MiscUtils.h" namespace Lucene { DirectoryReader::DirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& sis, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor) { normsCache = MapStringByteArray::newInstance(); _maxDoc = 0; _numDocs = -1; _hasDeletions = false; synced = HashSet::newInstance(); stale = false; rollbackHasChanges = false; this->_directory = directory; this->readOnly = readOnly; this->segmentInfos = sis; this->deletionPolicy = deletionPolicy; this->termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously properly sync'd HashSet files(sis->files(directory, true)); synced.addAll(files.begin(), files.end()); } // To reduce the chance of hitting FileNotFound (and having to retry), we open segments in // reverse because IndexWriter merges & deletes the newest segments first. Collection readers(Collection::newInstance(sis->size())); for (int32_t i = sis->size() - 1; i >= 0; --i) { bool success = false; LuceneException finally; try { readers[i] = SegmentReader::get(readOnly, sis->info(i), termInfosIndexDivisor); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { // Close all readers we had opened for (Collection::iterator closeReader = readers.begin(); closeReader != readers.end(); ++closeReader) { try { if (*closeReader) { (*closeReader)->close(); } } catch (...) { // keep going - we want to clean up as much as possible } } } finally.throwException(); } _initialize(readers); } DirectoryReader::DirectoryReader(const IndexWriterPtr& writer, const SegmentInfosPtr& infos, int32_t termInfosIndexDivisor) { normsCache = MapStringByteArray::newInstance(); _maxDoc = 0; _numDocs = -1; _hasDeletions = false; synced = HashSet::newInstance(); stale = false; rollbackHasChanges = false; this->_directory = writer->getDirectory(); this->readOnly = true; this->segmentInfos = infos; this->segmentInfosStart = boost::dynamic_pointer_cast(infos->clone()); this->termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously properly sync'd HashSet files(infos->files(_directory, true)); synced.addAll(files.begin(), files.end()); } // IndexWriter synchronizes externally before calling us, which ensures infos will not change; so there's // no need to process segments in reverse order int32_t numSegments = infos->size(); Collection readers(Collection::newInstance(numSegments)); DirectoryPtr dir(writer->getDirectory()); int32_t upto = 0; for (int32_t i = 0; i < numSegments; ++i) { bool success = false; LuceneException finally; try { SegmentInfoPtr info(infos->info(i)); if (info->dir == dir) { readers[upto++] = boost::dynamic_pointer_cast(writer->readerPool->getReadOnlyClone(info, true, termInfosIndexDivisor)); } success = true; } catch (LuceneException& e) { finally = e; } if (!success) { // Close all readers we had opened for (--upto; upto >= 0; --upto) { try { if (readers[upto]) { readers[upto]->close(); } } catch (...) { // keep going - we want to clean up as much as possible } } } finally.throwException(); } this->_writer = writer; if (upto < readers.size()) { // This means some segments were in a foreign Directory readers.resize(upto); } _initialize(readers); } DirectoryReader::DirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& infos, Collection oldReaders, Collection oldStarts, MapStringByteArray oldNormsCache, bool readOnly, bool doClone, int32_t termInfosIndexDivisor) { normsCache = MapStringByteArray::newInstance(); _maxDoc = 0; _numDocs = -1; _hasDeletions = false; synced = HashSet::newInstance(); stale = false; rollbackHasChanges = false; this->_directory = directory; this->readOnly = readOnly; this->segmentInfos = infos; this->termInfosIndexDivisor = termInfosIndexDivisor; if (!readOnly) { // We assume that this segments_N was previously properly sync'd HashSet files(infos->files(directory, true)); synced.addAll(files.begin(), files.end()); } // we put the old SegmentReaders in a map, that allows us to lookup a reader using its segment name MapStringInt segmentReaders(MapStringInt::newInstance()); if (oldReaders) { int32_t segReader = 0; // create a Map SegmentName->SegmentReader for (Collection::iterator reader = oldReaders.begin(); reader != oldReaders.end(); ++reader) { segmentReaders.put((*reader)->getSegmentName(), segReader++); } } Collection newReaders(Collection::newInstance(infos->size())); // remember which readers are shared between the old and the re-opened DirectoryReader - we have to incRef those readers Collection readerShared(Collection::newInstance(infos->size())); for (int32_t i = infos->size() - 1; i >= 0; --i) { // find SegmentReader for this segment MapStringInt::iterator oldReaderIndex = segmentReaders.find(infos->info(i)->name); if (oldReaderIndex == segmentReaders.end()) { // this is a new segment, no old SegmentReader can be reused newReaders[i].reset(); } else { // there is an old reader for this segment - we'll try to reopen it newReaders[i] = oldReaders[oldReaderIndex->second]; } bool success = false; LuceneException finally; try { SegmentReaderPtr newReader; if (!newReaders[i] || infos->info(i)->getUseCompoundFile() != newReaders[i]->getSegmentInfo()->getUseCompoundFile()) { // We should never see a totally new segment during cloning BOOST_ASSERT(!doClone); // this is a new reader; in case we hit an exception we can close it safely newReader = SegmentReader::get(readOnly, infos->info(i), termInfosIndexDivisor); } else { newReader = newReaders[i]->reopenSegment(infos->info(i), doClone, readOnly); } if (newReader == newReaders[i]) { // this reader will be shared between the old and the new one, so we must incRef it readerShared[i] = true; newReader->incRef(); } else { readerShared[i] = false; newReaders[i] = newReader; } success = true; } catch (LuceneException& e) { finally = e; } if (!success) { for (++i; i < infos->size(); ++i) { if (newReaders[i]) { try { if (!readerShared[i]) { // this is a new subReader that is not used by the old one, we can close it newReaders[i]->close(); } else { // this subReader is also used by the old reader, so instead closing we must decRef it newReaders[i]->decRef(); } } catch (...) { // keep going - we want to clean up as much as possible } } } } finally.throwException(); } // initialize the readers to calculate maxDoc before we try to reuse the old normsCache _initialize(newReaders); // try to copy unchanged norms from the old normsCache to the new one if (oldNormsCache) { for (MapStringByteArray::iterator entry = oldNormsCache.begin(); entry != oldNormsCache.end(); ++entry) { if (!hasNorms(entry->first)) { continue; } ByteArray bytes(ByteArray::newInstance(maxDoc())); for (int32_t i = 0; i < subReaders.size(); ++i) { MapStringInt::iterator oldReaderIndex = segmentReaders.find(subReaders[i]->getSegmentName()); // this SegmentReader was not re-opened, we can copy all of its norms if (oldReaderIndex != segmentReaders.end() && (oldReaders[oldReaderIndex->second] == subReaders[i] || oldReaders[oldReaderIndex->second]->_norms.get(entry->first) == subReaders[i]->_norms.get(entry->first))) { // we don't have to synchronize here: either this constructor is called from a SegmentReader, in which // case no old norms cache is present, or it is called from MultiReader.reopen(), which is synchronized MiscUtils::arrayCopy(entry->second.get(), oldStarts[oldReaderIndex->second], bytes.get(), starts[i], starts[i + 1] - starts[i]); } else { subReaders[i]->norms(entry->first, bytes, starts[i]); } } normsCache.put(entry->first, bytes); // update cache } } } DirectoryReader::~DirectoryReader() { } void DirectoryReader::_initialize(Collection subReaders) { this->subReaders = subReaders; starts = Collection::newInstance(subReaders.size() + 1); for (int32_t i = 0; i < subReaders.size(); ++i) { starts[i] = _maxDoc; _maxDoc += subReaders[i]->maxDoc(); // compute maxDocs if (subReaders[i]->hasDeletions()) { _hasDeletions = true; } } starts[subReaders.size()] = _maxDoc; if (!readOnly) { maxIndexVersion = SegmentInfos::readCurrentVersion(_directory); } } IndexReaderPtr DirectoryReader::open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, const IndexCommitPtr& commit, bool readOnly, int32_t termInfosIndexDivisor) { return newLucene(readOnly, deletionPolicy, termInfosIndexDivisor, newLucene(), directory)->run(commit); } LuceneObjectPtr DirectoryReader::clone(const LuceneObjectPtr& other) { try { return DirectoryReader::clone(readOnly, other); // Preserve current readOnly } catch (LuceneException& e) { boost::throw_exception(RuntimeException(e.getError())); } return DirectoryReaderPtr(); } LuceneObjectPtr DirectoryReader::clone(bool openReadOnly, const LuceneObjectPtr& other) { SyncLock syncLock(this); DirectoryReaderPtr newReader(doReopen(boost::dynamic_pointer_cast(segmentInfos->clone()), true, openReadOnly)); if (shared_from_this() != newReader) { newReader->deletionPolicy = deletionPolicy; } newReader->_writer = _writer; // If we're cloning a non-readOnly reader, move the writeLock (if there is one) to the new reader if (!openReadOnly && writeLock) { // In near real-time search, reader is always readonly BOOST_ASSERT(_writer.expired()); newReader->writeLock = writeLock; newReader->_hasChanges = _hasChanges; newReader->_hasDeletions = _hasDeletions; writeLock.reset(); _hasChanges = false; } return newReader; } IndexReaderPtr DirectoryReader::reopen() { // Preserve current readOnly return doReopen(readOnly, IndexCommitPtr()); } IndexReaderPtr DirectoryReader::reopen(bool openReadOnly) { return doReopen(openReadOnly, IndexCommitPtr()); } IndexReaderPtr DirectoryReader::reopen(const IndexCommitPtr& commit) { return doReopen(true, commit); } IndexReaderPtr DirectoryReader::doReopenFromWriter(bool openReadOnly, const IndexCommitPtr& commit) { BOOST_ASSERT(readOnly); if (!openReadOnly) { boost::throw_exception(IllegalArgumentException(L"a reader obtained from IndexWriter.getReader() can only be reopened with openReadOnly=true (got false)")); } if (commit) { boost::throw_exception(IllegalArgumentException(L"a reader obtained from IndexWriter.getReader() cannot currently accept a commit")); } return IndexWriterPtr(_writer)->getReader(); } IndexReaderPtr DirectoryReader::doReopen(bool openReadOnly, const IndexCommitPtr& commit) { ensureOpen(); BOOST_ASSERT(!commit || openReadOnly); IndexWriterPtr writer(_writer.lock()); // If we were obtained by writer.getReader(), re-ask the writer to get a new reader. if (writer) { return doReopenFromWriter(openReadOnly, commit); } else { return doReopenNoWriter(openReadOnly, commit); } } IndexReaderPtr DirectoryReader::doReopenNoWriter(bool openReadOnly, const IndexCommitPtr& commit) { SyncLock syncLock(this); if (!commit) { if (_hasChanges) { // We have changes, which means we are not readOnly BOOST_ASSERT(!readOnly); // and we hold the write lock BOOST_ASSERT(writeLock); // so no other writer holds the write lock, which means no changes could have been done to the index BOOST_ASSERT(isCurrent()); if (openReadOnly) { return boost::dynamic_pointer_cast(clone(openReadOnly)); } else { return shared_from_this(); } } else if (isCurrent()) { if (openReadOnly != readOnly) { // Just fallback to clone return boost::dynamic_pointer_cast(clone(openReadOnly)); } else { return shared_from_this(); } } } else { if (_directory != commit->getDirectory()) { boost::throw_exception(IOException(L"the specified commit does not match the specified Directory")); } if (segmentInfos && commit->getSegmentsFileName() == segmentInfos->getCurrentSegmentFileName()) { if (readOnly != openReadOnly) { // Just fallback to clone return boost::dynamic_pointer_cast(clone(openReadOnly)); } else { return shared_from_this(); } } } return newLucene(shared_from_this(), openReadOnly, newLucene(), _directory)->run(commit); } DirectoryReaderPtr DirectoryReader::doReopen(const SegmentInfosPtr& infos, bool doClone, bool openReadOnly) { SyncLock syncLock(this); if (openReadOnly) { return newLucene(_directory, infos, subReaders, starts, normsCache, doClone, termInfosIndexDivisor); } else { return newLucene(_directory, infos, subReaders, starts, normsCache, false, doClone, termInfosIndexDivisor); } } int64_t DirectoryReader::getVersion() { ensureOpen(); return segmentInfos->getVersion(); } Collection DirectoryReader::getTermFreqVectors(int32_t docNumber) { ensureOpen(); int32_t i = readerIndex(docNumber); // find segment num return subReaders[i]->getTermFreqVectors(docNumber - starts[i]); // dispatch to segment } TermFreqVectorPtr DirectoryReader::getTermFreqVector(int32_t docNumber, const String& field) { ensureOpen(); int32_t i = readerIndex(docNumber); // find segment num return subReaders[i]->getTermFreqVector(docNumber - starts[i], field); } void DirectoryReader::getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper) { ensureOpen(); int32_t i = readerIndex(docNumber); // find segment num subReaders[i]->getTermFreqVector(docNumber - starts[i], field, mapper); } void DirectoryReader::getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper) { ensureOpen(); int32_t i = readerIndex(docNumber); // find segment num subReaders[i]->getTermFreqVector(docNumber - starts[i], mapper); } bool DirectoryReader::isOptimized() { ensureOpen(); return (segmentInfos->size() == 1 && !hasDeletions()); } int32_t DirectoryReader::numDocs() { // Don't call ensureOpen() here (it could affect performance) // NOTE: multiple threads may wind up init'ing numDocs... but that's harmless if (_numDocs == -1) { // check cache int32_t n = 0; // cache miss - recompute for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { n += (*reader)->numDocs(); // sum from readers } _numDocs = n; } return _numDocs; } int32_t DirectoryReader::maxDoc() { // Don't call ensureOpen() here (it could affect performance) return _maxDoc; } DocumentPtr DirectoryReader::document(int32_t n, const FieldSelectorPtr& fieldSelector) { ensureOpen(); int32_t i = readerIndex(n); // find segment num return subReaders[i]->document(n - starts[i], fieldSelector); // dispatch to segment reader } bool DirectoryReader::isDeleted(int32_t n) { // Don't call ensureOpen() here (it could affect performance) int32_t i = readerIndex(n); // find segment num return subReaders[i]->isDeleted(n - starts[i]); // dispatch to segment reader } bool DirectoryReader::hasDeletions() { // Don't call ensureOpen() here (it could affect performance) return _hasDeletions; } void DirectoryReader::doDelete(int32_t docNum) { _numDocs = -1; // invalidate cache int32_t i = readerIndex(docNum); // find segment num subReaders[i]->deleteDocument(docNum - starts[i]); // dispatch to segment reader _hasDeletions = true; } void DirectoryReader::doUndeleteAll() { for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { (*reader)->undeleteAll(); } _hasDeletions = false; _numDocs = -1; // invalidate cache } int32_t DirectoryReader::readerIndex(int32_t n) { return readerIndex(n, this->starts, this->subReaders.size()); } int32_t DirectoryReader::readerIndex(int32_t n, Collection starts, int32_t numSubReaders) { // Binary search to locate reader Collection::iterator reader = std::upper_bound(starts.begin(), starts.begin() + numSubReaders, n); return (int32_t)(std::distance(starts.begin(), reader) - 1); } bool DirectoryReader::hasNorms(const String& field) { ensureOpen(); for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { if ((*reader)->hasNorms(field)) { return true; } } return false; } ByteArray DirectoryReader::norms(const String& field) { SyncLock syncLock(this); ensureOpen(); ByteArray bytes(normsCache.get(field)); if (bytes) { return bytes; // cache hit } if (!hasNorms(field)) { return ByteArray(); } bytes = ByteArray::newInstance(maxDoc()); for (int32_t i = 0; i < subReaders.size(); ++i) { subReaders[i]->norms(field, bytes, starts[i]); } normsCache.put(field, bytes); // update cache return bytes; } void DirectoryReader::norms(const String& field, ByteArray norms, int32_t offset) { SyncLock syncLock(this); ensureOpen(); ByteArray bytes(normsCache.get(field)); if (!bytes && !hasNorms(field)) { MiscUtils::arrayFill(norms.get(), offset, norms.size(), DefaultSimilarity::encodeNorm(1.0)); } else if (bytes) { // cache hit MiscUtils::arrayCopy(bytes.get(), 0, norms.get(), offset, maxDoc()); } else { for (int32_t i = 0; i < subReaders.size(); ++i) { // read from segments subReaders[i]->norms(field, norms, offset + starts[i]); } } } void DirectoryReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { { SyncLock normsLock(&normsCache); normsCache.remove(field); // clear cache } int32_t i = readerIndex(doc); // find segment num subReaders[i]->setNorm(doc - starts[i], field, value); // dispatch } TermEnumPtr DirectoryReader::terms() { ensureOpen(); return newLucene(shared_from_this(), Collection::newInstance(subReaders.begin(), subReaders.end()), starts, TermPtr()); } TermEnumPtr DirectoryReader::terms(const TermPtr& t) { ensureOpen(); return newLucene(shared_from_this(), Collection::newInstance(subReaders.begin(), subReaders.end()), starts, t); } int32_t DirectoryReader::docFreq(const TermPtr& t) { ensureOpen(); int32_t total = 0; // sum freqs in segments for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { total += (*reader)->docFreq(t); } return total; } TermDocsPtr DirectoryReader::termDocs() { ensureOpen(); return newLucene(shared_from_this(), Collection::newInstance(subReaders.begin(), subReaders.end()), starts); } TermPositionsPtr DirectoryReader::termPositions() { ensureOpen(); return newLucene(shared_from_this(), Collection::newInstance(subReaders.begin(), subReaders.end()), starts); } void DirectoryReader::acquireWriteLock() { if (readOnly) { // NOTE: we should not reach this code with the core IndexReader classes; // however, an external subclass of IndexReader could reach this. ReadOnlySegmentReader::noWrite(); } if (segmentInfos) { ensureOpen(); if (stale) { boost::throw_exception(StaleReaderException(L"IndexReader out of date and no longer valid for delete, undelete, or setNorm operations")); } if (!writeLock) { LockPtr writeLock(_directory->makeLock(IndexWriter::WRITE_LOCK_NAME)); if (!writeLock->obtain((int32_t)IndexWriter::WRITE_LOCK_TIMEOUT)) { // obtain write lock boost::throw_exception(LockObtainFailedException(L"Index locked for write: " + writeLock->toString())); } this->writeLock = writeLock; // we have to check whether index has changed since this reader was opened. // if so, this reader is no longer valid for deletion if (SegmentInfos::readCurrentVersion(_directory) > maxIndexVersion) { stale = true; this->writeLock->release(); this->writeLock.reset(); boost::throw_exception(StaleReaderException(L"IndexReader out of date and no longer valid for delete, undelete, or setNorm operations")); } } } } void DirectoryReader::doCommit(MapStringString commitUserData) { if (_hasChanges) { segmentInfos->setUserData(commitUserData); // Default deleter (for backwards compatibility) is KeepOnlyLastCommitDeleter IndexFileDeleterPtr deleter(newLucene(_directory, deletionPolicy ? deletionPolicy : newLucene(), segmentInfos, InfoStreamPtr(), DocumentsWriterPtr(), synced)); segmentInfos->updateGeneration(deleter->getLastSegmentInfos()); // Checkpoint the state we are about to change, in case we have to roll back startCommit(); bool success = false; LuceneException finally; try { for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { (*reader)->commit(); } // Sync all files we just wrote HashSet files(segmentInfos->files(_directory, false)); for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { if (!synced.contains(*fileName)) { BOOST_ASSERT(_directory->fileExists(*fileName)); _directory->sync(*fileName); synced.add(*fileName); } } segmentInfos->commit(_directory); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { // Rollback changes that were made to SegmentInfos but failed to get [fully] // committed. This way this reader instance remains consistent (matched to what's // actually in the index) rollbackCommit(); // Recompute deletable files & remove them (so partially written .del files, etc, // are removed) deleter->refresh(); } finally.throwException(); // Have the deleter remove any now unreferenced files due to this commit deleter->checkpoint(segmentInfos, true); deleter->close(); maxIndexVersion = segmentInfos->getVersion(); if (writeLock) { writeLock->release(); // release write lock writeLock.reset(); } } _hasChanges = false; } void DirectoryReader::startCommit() { rollbackHasChanges = _hasChanges; for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { (*reader)->startCommit(); } } void DirectoryReader::rollbackCommit() { _hasChanges = rollbackHasChanges; for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { (*reader)->rollbackCommit(); } } MapStringString DirectoryReader::getCommitUserData() { ensureOpen(); return segmentInfos->getUserData(); } bool DirectoryReader::isCurrent() { ensureOpen(); IndexWriterPtr writer(_writer.lock()); if (!writer || writer->isClosed()) { // we loaded SegmentInfos from the directory return (SegmentInfos::readCurrentVersion(_directory) == segmentInfos->getVersion()); } else { return writer->nrtIsCurrent(segmentInfosStart); } } void DirectoryReader::doClose() { SyncLock syncLock(this); LuceneException ioe; normsCache.reset(); for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { // try to close each reader, even if an exception is thrown try { (*reader)->decRef(); } catch (LuceneException& e) { if (ioe.isNull()) { ioe = e; } } } // NOTE: only needed in case someone had asked for FieldCache for top-level reader (which is // generally not a good idea): FieldCache::DEFAULT()->purge(shared_from_this()); // throw the first exception ioe.throwException(); } HashSet DirectoryReader::getFieldNames(FieldOption fieldOption) { ensureOpen(); return getFieldNames(fieldOption, Collection::newInstance(subReaders.begin(), subReaders.end())); } HashSet DirectoryReader::getFieldNames(FieldOption fieldOption, Collection subReaders) { // maintain a unique set of field names HashSet fieldSet(HashSet::newInstance()); for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { HashSet names((*reader)->getFieldNames(fieldOption)); fieldSet.addAll(names.begin(), names.end()); } return fieldSet; } Collection DirectoryReader::getSequentialSubReaders() { return Collection::newInstance(subReaders.begin(), subReaders.end()); } DirectoryPtr DirectoryReader::directory() { // Don't ensureOpen here -- in certain cases, when a cloned/reopened reader needs to commit, it may call // this method on the closed original reader return _directory; } int32_t DirectoryReader::getTermInfosIndexDivisor() { return termInfosIndexDivisor; } IndexCommitPtr DirectoryReader::getIndexCommit() { return newLucene(segmentInfos, _directory); } Collection DirectoryReader::listCommits(const DirectoryPtr& dir) { HashSet files(dir->listAll()); Collection commits(Collection::newInstance()); SegmentInfosPtr latest(newLucene()); latest->read(dir); int64_t currentGen = latest->getGeneration(); commits.add(newLucene(latest, dir)); for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { if (boost::starts_with(*fileName, IndexFileNames::SEGMENTS()) && *fileName != IndexFileNames::SEGMENTS_GEN() && SegmentInfos::generationFromSegmentsFileName(*fileName) < currentGen) { SegmentInfosPtr sis(newLucene()); try { // IOException allowed to throw there, in case segments_N is corrupt sis->read(dir, *fileName); } catch (FileNotFoundException&) { sis.reset(); } if (sis) { commits.add(newLucene(sis, dir)); } } } return commits; } FindSegmentsOpen::FindSegmentsOpen(bool readOnly, const IndexDeletionPolicyPtr& deletionPolicy, int32_t termInfosIndexDivisor, const SegmentInfosPtr& infos, const DirectoryPtr& directory) : FindSegmentsFileT(infos, directory) { this->readOnly = readOnly; this->deletionPolicy = deletionPolicy; this->termInfosIndexDivisor = termInfosIndexDivisor; } FindSegmentsOpen::~FindSegmentsOpen() { } IndexReaderPtr FindSegmentsOpen::doBody(const String& segmentFileName) { SegmentInfosPtr segmentInfos(_segmentInfos); segmentInfos->read(directory, segmentFileName); if (readOnly) { return newLucene(directory, segmentInfos, deletionPolicy, termInfosIndexDivisor); } else { return newLucene(directory, segmentInfos, deletionPolicy, false, termInfosIndexDivisor); } } FindSegmentsReopen::FindSegmentsReopen(const DirectoryReaderPtr& reader, bool openReadOnly, const SegmentInfosPtr& infos, const DirectoryPtr& directory) : FindSegmentsFileT(infos, directory) { this->_reader = reader; this->openReadOnly = openReadOnly; } FindSegmentsReopen::~FindSegmentsReopen() { } DirectoryReaderPtr FindSegmentsReopen::doBody(const String& segmentFileName) { SegmentInfosPtr segmentInfos(_segmentInfos); segmentInfos->read(directory, segmentFileName); return DirectoryReaderPtr(_reader)->doReopen(segmentInfos, false, openReadOnly); } MultiTermEnum::MultiTermEnum(const IndexReaderPtr& topReader, Collection readers, Collection starts, const TermPtr& t) { _docFreq = 0; this->_topReader = topReader; queue = newLucene(readers.size()); matchingSegments = Collection::newInstance(readers.size() + 1); for (int32_t i = 0; i < readers.size(); ++i) { IndexReaderPtr reader(readers[i]); TermEnumPtr termEnum; if (t) { termEnum = reader->terms(t); } else { termEnum = reader->terms(); } SegmentMergeInfoPtr smi(newLucene(starts[i], termEnum, reader)); smi->ord = i; if (t.get() != NULL ? termEnum->term().get() != NULL : smi->next()) { queue->add(smi); // initialize queue } else { smi->close(); } } if (t && !queue->empty()) { next(); } } MultiTermEnum::~MultiTermEnum() { } bool MultiTermEnum::next() { for (Collection::iterator smi = matchingSegments.begin(); smi != matchingSegments.end(); ++smi) { if (!(*smi)) { break; } if ((*smi)->next()) { queue->add(*smi); } else { (*smi)->close(); // done with segment } } int32_t numMatchingSegments = 0; matchingSegments[0].reset(); SegmentMergeInfoPtr top(queue->top()); if (!top) { _term.reset(); return false; } _term = top->term; _docFreq = 0; while (top && _term->compareTo(top->term) == 0) { matchingSegments[numMatchingSegments++] = top; queue->pop(); _docFreq += top->termEnum->docFreq(); // increment freq top = queue->top(); } matchingSegments[numMatchingSegments].reset(); return true; } TermPtr MultiTermEnum::term() { return _term; } int32_t MultiTermEnum::docFreq() { return _docFreq; } void MultiTermEnum::close() { queue->close(); } MultiTermDocs::MultiTermDocs(const IndexReaderPtr& topReader, Collection r, Collection s) { this->_topReader = topReader; readers = r; starts = s; base = 0; pointer = 0; readerTermDocs = Collection::newInstance(r.size()); } MultiTermDocs::~MultiTermDocs() { } int32_t MultiTermDocs::doc() { return base + current->doc(); } int32_t MultiTermDocs::freq() { return current->freq(); } void MultiTermDocs::seek(const TermPtr& term) { this->term = term; this->base = 0; this->pointer = 0; this->current.reset(); this->tenum.reset(); this->smi.reset(); this->matchingSegmentPos = 0; } void MultiTermDocs::seek(const TermEnumPtr& termEnum) { seek(termEnum->term()); MultiTermEnumPtr multiTermEnum(boost::dynamic_pointer_cast(termEnum)); if (multiTermEnum) { tenum = multiTermEnum; if (IndexReaderPtr(_topReader) != IndexReaderPtr(tenum->_topReader)) { tenum.reset(); } } } bool MultiTermDocs::next() { while (true) { if (current && current->next()) { return true; } else if (pointer < readers.size()) { if (tenum) { smi = tenum->matchingSegments[matchingSegmentPos++]; if (!smi) { pointer = readers.size(); return false; } pointer = smi->ord; } base = starts[pointer]; current = termDocs(pointer++); } else { return false; } } } int32_t MultiTermDocs::read(Collection& docs, Collection& freqs) { while (true) { while (!current) { if (pointer < readers.size()) { // try next segment if (tenum) { smi = tenum->matchingSegments[matchingSegmentPos++]; if (!smi) { pointer = readers.size(); return 0; } pointer = smi->ord; } base = starts[pointer]; current = termDocs(pointer++); } else { return 0; } } int32_t end = current->read(docs, freqs); if (end == 0) { // none left in segment current.reset(); } else { // got some for (int32_t i = 0; i < end; ++i) { // adjust doc numbers docs[i] += base; } return end; } } } bool MultiTermDocs::skipTo(int32_t target) { while (true) { if (current && current->skipTo(target - base)) { return true; } else if (pointer < readers.size()) { if (tenum) { smi = tenum->matchingSegments[matchingSegmentPos++]; if (!smi) { pointer = readers.size(); return false; } pointer = smi->ord; } base = starts[pointer]; current = termDocs(pointer++); } else { return false; } } } TermDocsPtr MultiTermDocs::termDocs(int32_t i) { TermDocsPtr result(readerTermDocs[i]); if (!result) { readerTermDocs[i] = termDocs(readers[i]); result = readerTermDocs[i]; } if (smi) { BOOST_ASSERT(smi->ord == i); BOOST_ASSERT(smi->termEnum->term()->equals(term)); result->seek(smi->termEnum); } else { result->seek(term); } return result; } TermDocsPtr MultiTermDocs::termDocs(const IndexReaderPtr& reader) { return term ? reader->termDocs() : reader->termDocs(TermPtr()); } void MultiTermDocs::close() { for (Collection::iterator termDoc = readerTermDocs.begin(); termDoc != readerTermDocs.end(); ++termDoc) { if (*termDoc) { (*termDoc)->close(); } } } MultiTermPositions::MultiTermPositions(const IndexReaderPtr& topReader, Collection r, Collection s) : MultiTermDocs(topReader, r, s) { } MultiTermPositions::~MultiTermPositions() { } TermDocsPtr MultiTermPositions::termDocs(const IndexReaderPtr& reader) { return reader->termPositions(); } int32_t MultiTermPositions::nextPosition() { return boost::static_pointer_cast(current)->nextPosition(); } int32_t MultiTermPositions::getPayloadLength() { return boost::static_pointer_cast(current)->getPayloadLength(); } ByteArray MultiTermPositions::getPayload(ByteArray data, int32_t offset) { return boost::static_pointer_cast(current)->getPayload(data, offset); } bool MultiTermPositions::isPayloadAvailable() { return boost::static_pointer_cast(current)->isPayloadAvailable(); } ReaderCommit::ReaderCommit(const SegmentInfosPtr& infos, const DirectoryPtr& dir) { segmentsFileName = infos->getCurrentSegmentFileName(); this->dir = dir; userData = infos->getUserData(); HashSet files(infos->files(dir, true)); this->files = HashSet::newInstance(files.begin(), files.end()); version = infos->getVersion(); generation = infos->getGeneration(); _isOptimized = infos->size() == 1 && !infos->info(0)->hasDeletions(); } ReaderCommit::~ReaderCommit() { } String ReaderCommit::toString() { return L"DirectoryReader::ReaderCommit(" + segmentsFileName + L")"; } bool ReaderCommit::isOptimized() { return _isOptimized; } String ReaderCommit::getSegmentsFileName() { return segmentsFileName; } HashSet ReaderCommit::getFileNames() { return files; } DirectoryPtr ReaderCommit::getDirectory() { return dir; } int64_t ReaderCommit::getVersion() { return version; } int64_t ReaderCommit::getGeneration() { return generation; } bool ReaderCommit::isDeleted() { return false; } MapStringString ReaderCommit::getUserData() { return userData; } void ReaderCommit::deleteCommit() { boost::throw_exception(UnsupportedOperationException(L"This IndexCommit does not support deletions.")); } } LucenePlusPlus-rel_3.0.9/src/core/index/DocConsumer.cpp000066400000000000000000000006731456444476200231110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocConsumer.h" namespace Lucene { DocConsumer::~DocConsumer() { } } LucenePlusPlus-rel_3.0.9/src/core/index/DocConsumerPerThread.cpp000066400000000000000000000007261456444476200247070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocConsumerPerThread.h" namespace Lucene { DocConsumerPerThread::~DocConsumerPerThread() { } } LucenePlusPlus-rel_3.0.9/src/core/index/DocFieldConsumer.cpp000066400000000000000000000010701456444476200240450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldConsumer.h" namespace Lucene { DocFieldConsumer::~DocFieldConsumer() { } void DocFieldConsumer::setFieldInfos(const FieldInfosPtr& fieldInfos) { this->fieldInfos = fieldInfos; } } LucenePlusPlus-rel_3.0.9/src/core/index/DocFieldConsumerPerField.cpp000066400000000000000000000007421456444476200254650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldConsumerPerField.h" namespace Lucene { DocFieldConsumerPerField::~DocFieldConsumerPerField() { } } LucenePlusPlus-rel_3.0.9/src/core/index/DocFieldConsumerPerThread.cpp000066400000000000000000000007451456444476200256540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldConsumerPerThread.h" namespace Lucene { DocFieldConsumerPerThread::~DocFieldConsumerPerThread() { } } LucenePlusPlus-rel_3.0.9/src/core/index/DocFieldConsumers.cpp000066400000000000000000000120561456444476200242360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldConsumers.h" #include "DocFieldConsumersPerField.h" #include "DocFieldConsumersPerThread.h" #include "MiscUtils.h" namespace Lucene { DocFieldConsumers::DocFieldConsumers(const DocFieldConsumerPtr& one, const DocFieldConsumerPtr& two) { freeCount = 0; allocCount = 0; docFreeList = Collection::newInstance(1); this->one = one; this->two = two; } DocFieldConsumers::~DocFieldConsumers() { } void DocFieldConsumers::setFieldInfos(const FieldInfosPtr& fieldInfos) { DocFieldConsumer::setFieldInfos(fieldInfos); one->setFieldInfos(fieldInfos); two->setFieldInfos(fieldInfos); } void DocFieldConsumers::flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) { MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField oneThreadsAndFields(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::newInstance()); MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField twoThreadsAndFields(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::newInstance()); for (MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { Collection oneFields(Collection::newInstance()); Collection twoFields(Collection::newInstance()); for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end(); ++perField) { oneFields.add(boost::static_pointer_cast(*perField)->one); twoFields.add(boost::static_pointer_cast(*perField)->two); } oneThreadsAndFields.put(boost::static_pointer_cast(entry->first)->one, oneFields); twoThreadsAndFields.put(boost::static_pointer_cast(entry->first)->two, oneFields); } one->flush(oneThreadsAndFields, state); two->flush(twoThreadsAndFields, state); } void DocFieldConsumers::closeDocStore(const SegmentWriteStatePtr& state) { LuceneException finally; try { one->closeDocStore(state); } catch (LuceneException& e) { finally = e; } try { two->closeDocStore(state); } catch (LuceneException& e) { finally = e; } finally.throwException(); } bool DocFieldConsumers::freeRAM() { return (one->freeRAM() || two->freeRAM()); } DocFieldConsumerPerThreadPtr DocFieldConsumers::addThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread) { return newLucene(docFieldProcessorPerThread, shared_from_this(), one->addThread(docFieldProcessorPerThread), two->addThread(docFieldProcessorPerThread)); } DocFieldConsumersPerDocPtr DocFieldConsumers::getPerDoc() { SyncLock syncLock(this); if (freeCount == 0) { ++allocCount; if (allocCount > docFreeList.size()) { // Grow our free list up front to make sure we have enough space to recycle all outstanding // PerDoc instances BOOST_ASSERT(allocCount == 1 + docFreeList.size()); docFreeList.resize(MiscUtils::getNextSize(allocCount)); } return newLucene(shared_from_this()); } else { return docFreeList[--freeCount]; } } void DocFieldConsumers::freePerDoc(const DocFieldConsumersPerDocPtr& perDoc) { SyncLock syncLock(this); BOOST_ASSERT(freeCount < docFreeList.size()); docFreeList[freeCount++] = perDoc; } DocFieldConsumersPerDoc::DocFieldConsumersPerDoc(const DocFieldConsumersPtr& fieldConsumers) { this->_fieldConsumers = fieldConsumers; } DocFieldConsumersPerDoc::~DocFieldConsumersPerDoc() { } int64_t DocFieldConsumersPerDoc::sizeInBytes() { return one->sizeInBytes() + two->sizeInBytes(); } void DocFieldConsumersPerDoc::finish() { LuceneException finally; try { one->finish(); } catch (LuceneException& e) { finally = e; } try { two->finish(); } catch (LuceneException& e) { finally = e; } DocFieldConsumersPtr(_fieldConsumers)->freePerDoc(shared_from_this()); finally.throwException(); } void DocFieldConsumersPerDoc::abort() { LuceneException finally; try { one->abort(); } catch (LuceneException& e) { finally = e; } try { two->abort(); } catch (LuceneException& e) { finally = e; } DocFieldConsumersPtr(_fieldConsumers)->freePerDoc(shared_from_this()); finally.throwException(); } } LucenePlusPlus-rel_3.0.9/src/core/index/DocFieldConsumersPerField.cpp000066400000000000000000000023001456444476200256400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldConsumersPerField.h" namespace Lucene { DocFieldConsumersPerField::DocFieldConsumersPerField(const DocFieldConsumersPerThreadPtr& perThread, const DocFieldConsumerPerFieldPtr& one, const DocFieldConsumerPerFieldPtr& two) { this->_perThread = perThread; this->one = one; this->two = two; } DocFieldConsumersPerField::~DocFieldConsumersPerField() { } void DocFieldConsumersPerField::processFields(Collection fields, int32_t count) { one->processFields(fields, count); two->processFields(fields, count); } void DocFieldConsumersPerField::abort() { LuceneException finally; try { one->abort(); } catch (LuceneException& e) { finally = e; } try { two->abort(); } catch (LuceneException& e) { finally = e; } finally.throwException(); } } LucenePlusPlus-rel_3.0.9/src/core/index/DocFieldConsumersPerThread.cpp000066400000000000000000000041621456444476200260340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldConsumersPerThread.h" #include "DocFieldProcessorPerThread.h" #include "DocFieldConsumers.h" #include "DocFieldConsumersPerField.h" namespace Lucene { DocFieldConsumersPerThread::DocFieldConsumersPerThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread, const DocFieldConsumersPtr& parent, const DocFieldConsumerPerThreadPtr& one, const DocFieldConsumerPerThreadPtr& two) { this->_parent = parent; this->one = one; this->two = two; docState = docFieldProcessorPerThread->docState; } DocFieldConsumersPerThread::~DocFieldConsumersPerThread() { } void DocFieldConsumersPerThread::startDocument() { one->startDocument(); two->startDocument(); } void DocFieldConsumersPerThread::abort() { LuceneException finally; try { one->abort(); } catch (LuceneException& e) { finally = e; } try { two->abort(); } catch (LuceneException& e) { finally = e; } finally.throwException(); } DocWriterPtr DocFieldConsumersPerThread::finishDocument() { DocWriterPtr oneDoc(one->finishDocument()); DocWriterPtr twoDoc(two->finishDocument()); if (!oneDoc) { return twoDoc; } else if (!twoDoc) { return oneDoc; } else { DocFieldConsumersPerDocPtr both(DocFieldConsumersPtr(_parent)->getPerDoc()); both->docID = docState->docID; BOOST_ASSERT(oneDoc->docID == docState->docID); BOOST_ASSERT(twoDoc->docID == docState->docID); both->one = oneDoc; both->two = twoDoc; return both; } } DocFieldConsumerPerFieldPtr DocFieldConsumersPerThread::addField(const FieldInfoPtr& fi) { return newLucene(shared_from_this(), one->addField(fi), two->addField(fi)); } } LucenePlusPlus-rel_3.0.9/src/core/index/DocFieldProcessor.cpp000066400000000000000000000051711456444476200242370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldProcessor.h" #include "DocFieldProcessorPerThread.h" #include "DocFieldConsumerPerThread.h" #include "DocFieldConsumer.h" #include "StoredFieldsWriter.h" #include "SegmentWriteState.h" #include "IndexFileNames.h" #include "FieldInfos.h" #include "TestPoint.h" namespace Lucene { DocFieldProcessor::DocFieldProcessor(const DocumentsWriterPtr& docWriter, const DocFieldConsumerPtr& consumer) { this->fieldInfos = newLucene(); this->_docWriter = docWriter; this->consumer = consumer; consumer->setFieldInfos(fieldInfos); fieldsWriter = newLucene(docWriter, fieldInfos); } DocFieldProcessor::~DocFieldProcessor() { } void DocFieldProcessor::closeDocStore(const SegmentWriteStatePtr& state) { consumer->closeDocStore(state); fieldsWriter->closeDocStore(state); } void DocFieldProcessor::flush(Collection threads, const SegmentWriteStatePtr& state) { TestScope testScope(L"DocFieldProcessor", L"flush"); MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField childThreadsAndFields(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::newInstance()); for (Collection::iterator thread = threads.begin(); thread != threads.end(); ++thread) { DocFieldProcessorPerThreadPtr perThread(boost::static_pointer_cast(*thread)); childThreadsAndFields.put(perThread->consumer, perThread->fields()); perThread->trimFields(state); } fieldsWriter->flush(state); consumer->flush(childThreadsAndFields, state); // Important to save after asking consumer to flush so consumer can alter the FieldInfo* if necessary. // eg FreqProxTermsWriter does this with FieldInfo.storePayload. String fileName(state->segmentFileName(IndexFileNames::FIELD_INFOS_EXTENSION())); fieldInfos->write(state->directory, fileName); state->flushedFiles.add(fileName); } void DocFieldProcessor::abort() { fieldsWriter->abort(); consumer->abort(); } bool DocFieldProcessor::freeRAM() { return consumer->freeRAM(); } DocConsumerPerThreadPtr DocFieldProcessor::addThread(const DocumentsWriterThreadStatePtr& perThread) { return newLucene(perThread, shared_from_this()); } } LucenePlusPlus-rel_3.0.9/src/core/index/DocFieldProcessorPerField.cpp000066400000000000000000000017441456444476200256540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldProcessorPerField.h" #include "DocFieldProcessorPerThread.h" #include "DocFieldConsumerPerThread.h" #include "DocFieldConsumerPerField.h" namespace Lucene { DocFieldProcessorPerField::DocFieldProcessorPerField(const DocFieldProcessorPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo) { lastGen = -1; fieldCount = 0; fields = Collection::newInstance(1); this->consumer = perThread->consumer->addField(fieldInfo); this->fieldInfo = fieldInfo; } DocFieldProcessorPerField::~DocFieldProcessorPerField() { } void DocFieldProcessorPerField::abort() { consumer->abort(); } } LucenePlusPlus-rel_3.0.9/src/core/index/DocFieldProcessorPerThread.cpp000066400000000000000000000262721456444476200260430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocFieldProcessorPerThread.h" #include "DocFieldProcessorPerField.h" #include "DocFieldProcessor.h" #include "DocFieldConsumer.h" #include "DocFieldConsumerPerThread.h" #include "DocFieldConsumerPerField.h" #include "DocumentsWriterThreadState.h" #include "DocumentsWriter.h" #include "StoredFieldsWriter.h" #include "StoredFieldsWriterPerThread.h" #include "SegmentWriteState.h" #include "FieldInfo.h" #include "FieldInfos.h" #include "Fieldable.h" #include "IndexWriter.h" #include "Document.h" #include "InfoStream.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { DocFieldProcessorPerThread::DocFieldProcessorPerThread(const DocumentsWriterThreadStatePtr& threadState, const DocFieldProcessorPtr& docFieldProcessor) { _fields = Collection::newInstance(1); fieldHash = Collection::newInstance(2); hashMask = 1; fieldGen = 0; fieldCount = 0; totalFieldCount = 0; this->docState = threadState->docState; this->_docFieldProcessor = docFieldProcessor; this->fieldInfos = docFieldProcessor->fieldInfos; docFreeList = Collection::newInstance(1); freeCount = 0; allocCount = 0; } DocFieldProcessorPerThread::~DocFieldProcessorPerThread() { } void DocFieldProcessorPerThread::initialize() { DocFieldProcessorPtr docFieldProcessor(_docFieldProcessor); consumer = docFieldProcessor->consumer->addThread(shared_from_this()); fieldsWriter = docFieldProcessor->fieldsWriter->addThread(docState); } void DocFieldProcessorPerThread::abort() { for (Collection::iterator field = fieldHash.begin(); field != fieldHash.end(); ++field) { DocFieldProcessorPerFieldPtr current(*field); while (current) { DocFieldProcessorPerFieldPtr next(current->next); current->abort(); current = next; } } fieldsWriter->abort(); consumer->abort(); } Collection DocFieldProcessorPerThread::fields() { Collection fields(Collection::newInstance()); for (Collection::iterator field = fieldHash.begin(); field != fieldHash.end(); ++field) { DocFieldProcessorPerFieldPtr current(*field); while (current) { fields.add(current->consumer); current = current->next; } } BOOST_ASSERT(fields.size() == totalFieldCount); return fields; } void DocFieldProcessorPerThread::trimFields(const SegmentWriteStatePtr& state) { for (Collection::iterator perField = fieldHash.begin(); perField != fieldHash.end(); ++perField) { DocFieldProcessorPerFieldPtr current(*perField); DocFieldProcessorPerFieldPtr lastPerField; while (current) { if (current->lastGen == -1) { // This field was not seen since the previous flush, so, free up its resources now // Unhash if (!lastPerField) { *perField = current->next; } else { lastPerField->next = current->next; } DocumentsWriterPtr docWriter(state->_docWriter); if (docWriter->infoStream) { *(docWriter->infoStream) << L" purge field=" << current->fieldInfo->name << L"\n"; } --totalFieldCount; } else { // Reset current->lastGen = -1; lastPerField = current; } current = current->next; } } } void DocFieldProcessorPerThread::rehash() { int32_t newHashSize = (fieldHash.size() * 2); BOOST_ASSERT(newHashSize > fieldHash.size()); Collection newHashArray(Collection::newInstance(newHashSize)); // Rehash int32_t newHashMask = newHashSize - 1; for (Collection::iterator fp0 = fieldHash.begin(); fp0 != fieldHash.end(); ++fp0) { DocFieldProcessorPerFieldPtr current(*fp0); while (current) { int32_t hashPos2 = StringUtils::hashCode(current->fieldInfo->name) & newHashMask; DocFieldProcessorPerFieldPtr nextFP0(current->next); current->next = newHashArray[hashPos2]; newHashArray[hashPos2] = current; current = nextFP0; } } fieldHash = newHashArray; hashMask = newHashMask; } struct lessFieldInfoName { inline bool operator()(const DocFieldProcessorPerFieldPtr& first, const DocFieldProcessorPerFieldPtr& second) const { return (first->fieldInfo->name < second->fieldInfo->name); } }; DocWriterPtr DocFieldProcessorPerThread::processDocument() { consumer->startDocument(); fieldsWriter->startDocument(); DocumentPtr doc(docState->doc); DocFieldProcessorPtr docFieldProcessor(_docFieldProcessor); DocumentsWriterPtr docWriter(docFieldProcessor->_docWriter); bool testPoint = IndexWriterPtr(docWriter->_writer)->testPoint(L"DocumentsWriter.ThreadState.init start"); BOOST_ASSERT(testPoint); fieldCount = 0; int32_t thisFieldGen = fieldGen++; Collection docFields(doc->getFields()); // Absorb any new fields first seen in this document. // Also absorb any changes to fields we had already seen before (eg suddenly turning on norms or // vectors, etc.) for (Collection::iterator field = docFields.begin(); field != docFields.end(); ++field) { String fieldName((*field)->name()); // Make sure we have a PerField allocated int32_t hashPos = StringUtils::hashCode(fieldName) & hashMask; DocFieldProcessorPerFieldPtr fp(fieldHash[hashPos]); while (fp && fp->fieldInfo->name != fieldName) { fp = fp->next; } if (!fp) { FieldInfoPtr fi(fieldInfos->add(fieldName, (*field)->isIndexed(), (*field)->isTermVectorStored(), (*field)->isStorePositionWithTermVector(), (*field)->isStoreOffsetWithTermVector(), (*field)->getOmitNorms(), false, (*field)->getOmitTermFreqAndPositions())); fp = newLucene(shared_from_this(), fi); fp->next = fieldHash[hashPos]; fieldHash[hashPos] = fp; ++totalFieldCount; if (totalFieldCount >= fieldHash.size() / 2) { rehash(); } } else { fp->fieldInfo->update((*field)->isIndexed(), (*field)->isTermVectorStored(), (*field)->isStorePositionWithTermVector(), (*field)->isStoreOffsetWithTermVector(), (*field)->getOmitNorms(), false, (*field)->getOmitTermFreqAndPositions()); } if (thisFieldGen != fp->lastGen) { // First time we're seeing this field for this doc fp->fieldCount = 0; if (fieldCount == _fields.size()) { _fields.resize(_fields.size() * 2); } _fields[fieldCount++] = fp; fp->lastGen = thisFieldGen; } if (fp->fieldCount == fp->fields.size()) { fp->fields.resize(fp->fields.size() * 2); } fp->fields[fp->fieldCount++] = *field; if ((*field)->isStored()) { fieldsWriter->addField(*field, fp->fieldInfo); } } // If we are writing vectors then we must visit fields in sorted order so they are written in sorted order. std::sort(_fields.begin(), _fields.begin() + fieldCount, lessFieldInfoName()); for (int32_t i = 0; i < fieldCount; ++i) { _fields[i]->consumer->processFields(_fields[i]->fields, _fields[i]->fieldCount); } if (!docState->maxTermPrefix.empty() && docState->infoStream) { *(docState->infoStream) << L"WARNING: document contains at least one immense term (longer than the max length " << StringUtils::toString(DocumentsWriter::MAX_TERM_LENGTH) << L"), all of which were skipped. " << L"Please correct the analyzer to not produce such terms. The prefix of the first immense " << L"term is: '" << StringUtils::toString(docState->maxTermPrefix) << L"...'\n"; docState->maxTermPrefix.clear(); } DocWriterPtr one(fieldsWriter->finishDocument()); DocWriterPtr two(consumer->finishDocument()); if (!one) { return two; } else if (!two) { return one; } else { DocFieldProcessorPerThreadPerDocPtr both(getPerDoc()); both->docID = docState->docID; BOOST_ASSERT(one->docID == docState->docID); BOOST_ASSERT(two->docID == docState->docID); both->one = one; both->two = two; return both; } } DocFieldProcessorPerThreadPerDocPtr DocFieldProcessorPerThread::getPerDoc() { SyncLock syncLock(this); if (freeCount == 0) { ++allocCount; if (allocCount > docFreeList.size()) { // Grow our free list up front to make sure we have enough space to recycle all // outstanding PerDoc instances BOOST_ASSERT(allocCount == docFreeList.size() + 1); docFreeList.resize(MiscUtils::getNextSize(allocCount)); } return newLucene(shared_from_this()); } else { return docFreeList[--freeCount]; } } void DocFieldProcessorPerThread::freePerDoc(const DocFieldProcessorPerThreadPerDocPtr& perDoc) { SyncLock syncLock(this); BOOST_ASSERT(freeCount < docFreeList.size()); docFreeList[freeCount++] = perDoc; } DocFieldProcessorPerThreadPerDoc::DocFieldProcessorPerThreadPerDoc(const DocFieldProcessorPerThreadPtr& docProcessor) { this->_docProcessor = docProcessor; } DocFieldProcessorPerThreadPerDoc::~DocFieldProcessorPerThreadPerDoc() { } int64_t DocFieldProcessorPerThreadPerDoc::sizeInBytes() { return one->sizeInBytes() + two->sizeInBytes(); } void DocFieldProcessorPerThreadPerDoc::finish() { LuceneException finally; try { try { one->finish(); } catch (LuceneException& e) { finally = e; } two->finish(); } catch (LuceneException& e) { finally = e; } DocFieldProcessorPerThreadPtr(_docProcessor)->freePerDoc(shared_from_this()); finally.throwException(); } void DocFieldProcessorPerThreadPerDoc::abort() { LuceneException finally; try { try { one->abort(); } catch (LuceneException& e) { finally = e; } two->abort(); } catch (LuceneException& e) { finally = e; } DocFieldProcessorPerThreadPtr(_docProcessor)->freePerDoc(shared_from_this()); finally.throwException(); } } LucenePlusPlus-rel_3.0.9/src/core/index/DocInverter.cpp000066400000000000000000000063471456444476200231200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocInverter.h" #include "InvertedDocConsumer.h" #include "InvertedDocEndConsumer.h" #include "InvertedDocConsumerPerThread.h" #include "InvertedDocEndConsumerPerThread.h" #include "DocFieldConsumerPerThread.h" #include "DocFieldConsumerPerField.h" #include "DocInverterPerField.h" #include "DocInverterPerThread.h" namespace Lucene { DocInverter::DocInverter(const InvertedDocConsumerPtr& consumer, const InvertedDocEndConsumerPtr& endConsumer) { this->consumer = consumer; this->endConsumer = endConsumer; } DocInverter::~DocInverter() { } void DocInverter::setFieldInfos(const FieldInfosPtr& fieldInfos) { DocFieldConsumer::setFieldInfos(fieldInfos); consumer->setFieldInfos(fieldInfos); endConsumer->setFieldInfos(fieldInfos); } void DocInverter::flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) { MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField childThreadsAndFields(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField::newInstance()); MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField endChildThreadsAndFields(MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField::newInstance()); for (MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { Collection childFields(Collection::newInstance()); Collection endChildFields(Collection::newInstance()); for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end(); ++perField) { childFields.add(boost::static_pointer_cast(*perField)->consumer); endChildFields.add(boost::static_pointer_cast(*perField)->endConsumer); } childThreadsAndFields.put(boost::static_pointer_cast(entry->first)->consumer, childFields); endChildThreadsAndFields.put(boost::static_pointer_cast(entry->first)->endConsumer, endChildFields); } consumer->flush(childThreadsAndFields, state); endConsumer->flush(endChildThreadsAndFields, state); } void DocInverter::closeDocStore(const SegmentWriteStatePtr& state) { consumer->closeDocStore(state); endConsumer->closeDocStore(state); } void DocInverter::abort() { consumer->abort(); endConsumer->abort(); } bool DocInverter::freeRAM() { return consumer->freeRAM(); } DocFieldConsumerPerThreadPtr DocInverter::addThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread) { return newLucene(docFieldProcessorPerThread, shared_from_this()); } } LucenePlusPlus-rel_3.0.9/src/core/index/DocInverterPerField.cpp000066400000000000000000000174621456444476200245330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocInverterPerField.h" #include "DocInverterPerThread.h" #include "InvertedDocConsumerPerThread.h" #include "InvertedDocEndConsumerPerThread.h" #include "InvertedDocConsumerPerField.h" #include "InvertedDocEndConsumerPerField.h" #include "Fieldable.h" #include "FieldInfo.h" #include "FieldInvertState.h" #include "DocumentsWriter.h" #include "Document.h" #include "Analyzer.h" #include "ReusableStringReader.h" #include "TokenStream.h" #include "PositionIncrementAttribute.h" #include "OffsetAttribute.h" #include "AttributeSource.h" #include "InfoStream.h" #include "StringUtils.h" namespace Lucene { DocInverterPerField::DocInverterPerField(const DocInverterPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo) { this->_perThread = perThread; this->fieldInfo = fieldInfo; docState = perThread->docState; fieldState = perThread->fieldState; } DocInverterPerField::~DocInverterPerField() { } void DocInverterPerField::initialize() { DocInverterPerThreadPtr perThread(_perThread); consumer = perThread->consumer->addField(shared_from_this(), fieldInfo); endConsumer = perThread->endConsumer->addField(shared_from_this(), fieldInfo); } void DocInverterPerField::abort() { consumer->abort(); endConsumer->abort(); } void DocInverterPerField::processFields(Collection fields, int32_t count) { fieldState->reset(docState->doc->getBoost()); int32_t maxFieldLength = docState->maxFieldLength; bool doInvert = consumer->start(fields, count); DocumentsWriterPtr docWriter(docState->_docWriter); DocInverterPerThreadPtr perThread(_perThread); for (int32_t i = 0; i < count; ++i) { FieldablePtr field = fields[i]; if (field->isIndexed() && doInvert) { bool anyToken; if (fieldState->length > 0) { fieldState->position += docState->analyzer->getPositionIncrementGap(fieldInfo->name); } if (!field->isTokenized()) { // un-tokenized field String stringValue(field->stringValue()); int32_t valueLength = (int32_t)stringValue.length(); perThread->singleToken->reinit(stringValue, 0, valueLength); fieldState->attributeSource = perThread->singleToken; consumer->start(field); bool success = false; LuceneException finally; try { consumer->add(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { docWriter->setAborting(); } finally.throwException(); fieldState->offset += valueLength; ++fieldState->length; ++fieldState->position; anyToken = (valueLength > 0); } else { // tokenized field TokenStreamPtr stream; TokenStreamPtr streamValue(field->tokenStreamValue()); if (streamValue) { stream = streamValue; } else { // the field does not have a TokenStream, so we have to obtain one from the analyzer ReaderPtr reader; // find or make Reader ReaderPtr readerValue(field->readerValue()); if (readerValue) { reader = readerValue; } else { String stringValue(field->stringValue()); perThread->stringReader->init(stringValue); reader = perThread->stringReader; } // Tokenize field and add to postingTable stream = docState->analyzer->reusableTokenStream(fieldInfo->name, reader); } // reset the TokenStream to the first token stream->reset(); int32_t startLength = fieldState->length; LuceneException finally; try { int32_t offsetEnd = fieldState->offset - 1; bool hasMoreTokens = stream->incrementToken(); fieldState->attributeSource = stream; OffsetAttributePtr offsetAttribute(fieldState->attributeSource->addAttribute()); PositionIncrementAttributePtr posIncrAttribute(fieldState->attributeSource->addAttribute()); consumer->start(field); while (true) { // If we hit an exception in stream.next below (which is fairly common, eg if analyzer // chokes on a given document), then it's non-aborting and (above) this one document // will be marked as deleted, but still consume a docID if (!hasMoreTokens) { break; } int32_t posIncr = posIncrAttribute->getPositionIncrement(); fieldState->position += posIncr; if (fieldState->position > 0) { --fieldState->position; } if (posIncr == 0) { ++fieldState->numOverlap; } bool success = false; try { // If we hit an exception in here, we abort all buffered documents since the last // flush, on the likelihood that the internal state of the consumer is now corrupt // and should not be flushed to a new segment consumer->add(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { docWriter->setAborting(); } finally.throwException(); ++fieldState->position; offsetEnd = fieldState->offset + offsetAttribute->endOffset(); if (++fieldState->length >= maxFieldLength) { if (docState->infoStream) { *docState->infoStream << L"maxFieldLength " << StringUtils::toString(maxFieldLength) << L" reached for field " << fieldInfo->name << L", ignoring following tokens\n"; } break; } hasMoreTokens = stream->incrementToken(); } // trigger streams to perform end-of-stream operations stream->end(); fieldState->offset += offsetAttribute->endOffset(); anyToken = (fieldState->length > startLength); } catch (LuceneException& e) { finally = e; } stream->close(); finally.throwException(); } if (anyToken) { fieldState->offset += docState->analyzer->getOffsetGap(field); } fieldState->boost *= field->getBoost(); } // don't hang onto the field fields[i].reset(); } consumer->finish(); endConsumer->finish(); } } LucenePlusPlus-rel_3.0.9/src/core/index/DocInverterPerThread.cpp000066400000000000000000000051301456444476200247040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocInverterPerThread.h" #include "DocInverterPerField.h" #include "DocInverter.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "DocFieldProcessorPerThread.h" #include "InvertedDocConsumer.h" #include "InvertedDocEndConsumer.h" #include "InvertedDocConsumerPerThread.h" #include "InvertedDocEndConsumerPerThread.h" #include "FieldInvertState.h" #include "ReusableStringReader.h" namespace Lucene { DocInverterPerThread::DocInverterPerThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread, const DocInverterPtr& docInverter) { this->fieldState = newLucene(); this->stringReader = newLucene(); this->singleToken = newLucene(); this->_docInverter = docInverter; this->docState = docFieldProcessorPerThread->docState; } DocInverterPerThread::~DocInverterPerThread() { } void DocInverterPerThread::initialize() { DocInverterPtr docInverter(_docInverter); consumer = docInverter->consumer->addThread(shared_from_this()); endConsumer = docInverter->endConsumer->addThread(shared_from_this()); } void DocInverterPerThread::startDocument() { consumer->startDocument(); endConsumer->startDocument(); } DocWriterPtr DocInverterPerThread::finishDocument() { endConsumer->finishDocument(); return consumer->finishDocument(); } void DocInverterPerThread::abort() { LuceneException finally; try { consumer->abort(); } catch (LuceneException& e) { finally = e; } try { endConsumer->abort(); } catch (LuceneException& e) { finally = e; } finally.throwException(); } DocFieldConsumerPerFieldPtr DocInverterPerThread::addField(const FieldInfoPtr& fi) { return newLucene(shared_from_this(), fi); } SingleTokenAttributeSource::SingleTokenAttributeSource() { termAttribute = addAttribute(); offsetAttribute = addAttribute(); } SingleTokenAttributeSource::~SingleTokenAttributeSource() { } void SingleTokenAttributeSource::reinit(const String& stringValue, int32_t startOffset, int32_t endOffset) { termAttribute->setTermBuffer(stringValue); offsetAttribute->setOffset(startOffset, endOffset); } } LucenePlusPlus-rel_3.0.9/src/core/index/DocumentsWriter.cpp000066400000000000000000001401671456444476200240310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocumentsWriter.h" #include "DocumentsWriterThreadState.h" #include "LuceneThread.h" #include "IndexWriter.h" #include "_IndexWriter.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "DocFieldProcessor.h" #include "Term.h" #include "TermDocs.h" #include "TermVectorsTermsWriter.h" #include "FreqProxTermsWriter.h" #include "TermsHashConsumer.h" #include "InvertedDocConsumer.h" #include "TermsHash.h" #include "DocInverter.h" #include "NormsWriter.h" #include "BufferedDeletes.h" #include "FieldInfos.h" #include "InfoStream.h" #include "DocConsumerPerThread.h" #include "SegmentWriteState.h" #include "IndexFileNames.h" #include "CompoundFileWriter.h" #include "MergeDocIDRemapper.h" #include "SegmentReader.h" #include "SegmentInfos.h" #include "SegmentInfo.h" #include "Query.h" #include "Weight.h" #include "Scorer.h" #include "TestPoint.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { /// Max # ThreadState instances; if there are more threads than this they share ThreadStates const int32_t DocumentsWriter::MAX_THREAD_STATE = 5; /// Coarse estimates used to measure RAM usage of buffered deletes const int32_t DocumentsWriter::OBJECT_HEADER_BYTES = 8; #ifdef LPP_BUILD_64 const int32_t DocumentsWriter::POINTER_NUM_BYTE = 8; #else const int32_t DocumentsWriter::POINTER_NUM_BYTE = 4; #endif const int32_t DocumentsWriter::INT_NUM_BYTE = 4; #ifdef LPP_UNICODE_CHAR_SIZE_4 const int32_t DocumentsWriter::CHAR_NUM_BYTE = 4; #else const int32_t DocumentsWriter::CHAR_NUM_BYTE = 2; #endif /// Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object /// with Term key, BufferedDeletes.Num val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Term is /// object with String field and String text (OBJ_HEADER + 2*POINTER). We don't count Term's field since /// it's interned. Term's text is String (OBJ_HEADER + 4*INT + POINTER + OBJ_HEADER + string.length*CHAR). /// BufferedDeletes.num is OBJ_HEADER + INT. const int32_t DocumentsWriter::BYTES_PER_DEL_TERM = 8 * DocumentsWriter::POINTER_NUM_BYTE + 5 * DocumentsWriter::OBJECT_HEADER_BYTES + 6 * DocumentsWriter::INT_NUM_BYTE; /// Rough logic: del docIDs are List. Say list allocates ~2X size (2*POINTER). Integer is /// OBJ_HEADER + int const int32_t DocumentsWriter::BYTES_PER_DEL_DOCID = 2 * DocumentsWriter::POINTER_NUM_BYTE + DocumentsWriter::OBJECT_HEADER_BYTES + DocumentsWriter::INT_NUM_BYTE; /// Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object /// with Query key, Integer val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Query we often undercount /// (say 24 bytes). Integer is OBJ_HEADER + INT. const int32_t DocumentsWriter::BYTES_PER_DEL_QUERY = 5 * DocumentsWriter::POINTER_NUM_BYTE + 2 * DocumentsWriter::OBJECT_HEADER_BYTES + 2 * DocumentsWriter::INT_NUM_BYTE + 24; /// Initial chunks size of the shared byte[] blocks used to store postings data const int32_t DocumentsWriter::BYTE_BLOCK_SHIFT = 15; const int32_t DocumentsWriter::BYTE_BLOCK_SIZE = 1 << DocumentsWriter::BYTE_BLOCK_SHIFT; const int32_t DocumentsWriter::BYTE_BLOCK_MASK = DocumentsWriter::BYTE_BLOCK_SIZE - 1; const int32_t DocumentsWriter::BYTE_BLOCK_NOT_MASK = ~DocumentsWriter::BYTE_BLOCK_MASK; /// Initial chunk size of the shared char[] blocks used to store term text const int32_t DocumentsWriter::CHAR_BLOCK_SHIFT = 14; const int32_t DocumentsWriter::CHAR_BLOCK_SIZE = 1 << DocumentsWriter::CHAR_BLOCK_SHIFT; const int32_t DocumentsWriter::CHAR_BLOCK_MASK = DocumentsWriter::CHAR_BLOCK_SIZE - 1; const int32_t DocumentsWriter::MAX_TERM_LENGTH = DocumentsWriter::CHAR_BLOCK_SIZE - 1; /// Initial chunks size of the shared int[] blocks used to store postings data const int32_t DocumentsWriter::INT_BLOCK_SHIFT = 13; const int32_t DocumentsWriter::INT_BLOCK_SIZE = 1 << DocumentsWriter::INT_BLOCK_SHIFT; const int32_t DocumentsWriter::INT_BLOCK_MASK = DocumentsWriter::INT_BLOCK_SIZE - 1; const int32_t DocumentsWriter::PER_DOC_BLOCK_SIZE = 1024; DocumentsWriter::DocumentsWriter(const DirectoryPtr& directory, const IndexWriterPtr& writer, const IndexingChainPtr& indexingChain) { this->threadStates = Collection::newInstance(); this->threadBindings = MapThreadDocumentsWriterThreadState::newInstance(); this->_openFiles = HashSet::newInstance(); this->_closedFiles = HashSet::newInstance(); this->freeIntBlocks = Collection::newInstance(); this->freeCharBlocks = Collection::newInstance(); this->directory = directory; this->_writer = writer; this->indexingChain = indexingChain; } DocumentsWriter::~DocumentsWriter() { } void DocumentsWriter::initialize() { docStoreOffset = 0; nextDocID = 0; numDocsInRAM = 0; numDocsInStore = 0; pauseThreads = 0; flushPending = false; bufferIsFull = false; aborting = false; maxFieldLength = IndexWriter::DEFAULT_MAX_FIELD_LENGTH; deletesInRAM = newLucene(false); deletesFlushed = newLucene(true); maxBufferedDeleteTerms = IndexWriter::DEFAULT_MAX_BUFFERED_DELETE_TERMS; ramBufferSize = (int64_t)(IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024); waitQueuePauseBytes = (int64_t)((double)ramBufferSize * 0.1); waitQueueResumeBytes = (int64_t)((double)ramBufferSize * 0.05); freeTrigger = (int64_t)(IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB * 1024.0 * 1024.0 * 1.05); freeLevel = (int64_t)(IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB * 1024.0 * 1024.0 * 0.95); maxBufferedDocs = IndexWriter::DEFAULT_MAX_BUFFERED_DOCS; flushedDocCount = 0; closed = false; waitQueue = newLucene(shared_from_this()); skipDocWriter = newLucene(); numBytesAlloc = 0; numBytesUsed = 0; byteBlockAllocator = newLucene(shared_from_this(), BYTE_BLOCK_SIZE); perDocAllocator = newLucene(shared_from_this(), PER_DOC_BLOCK_SIZE); IndexWriterPtr writer(_writer); this->similarity = writer->getSimilarity(); flushedDocCount = writer->maxDoc(); consumer = indexingChain->getChain(shared_from_this()); docFieldProcessor = boost::dynamic_pointer_cast(consumer); } PerDocBufferPtr DocumentsWriter::newPerDocBuffer() { return newLucene(shared_from_this()); } IndexingChainPtr DocumentsWriter::getDefaultIndexingChain() { static DefaultIndexingChainPtr defaultIndexingChain; LUCENE_RUN_ONCE( defaultIndexingChain = newLucene(); CycleCheck::addStatic(defaultIndexingChain); ); return defaultIndexingChain; } void DocumentsWriter::updateFlushedDocCount(int32_t n) { SyncLock syncLock(this); flushedDocCount += n; } int32_t DocumentsWriter::getFlushedDocCount() { SyncLock syncLock(this); return flushedDocCount; } void DocumentsWriter::setFlushedDocCount(int32_t n) { SyncLock syncLock(this); flushedDocCount = n; } bool DocumentsWriter::hasProx() { return docFieldProcessor ? docFieldProcessor->fieldInfos->hasProx() : true; } void DocumentsWriter::setInfoStream(const InfoStreamPtr& infoStream) { SyncLock syncLock(this); this->infoStream = infoStream; for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { (*threadState)->docState->infoStream = infoStream; } } void DocumentsWriter::setMaxFieldLength(int32_t maxFieldLength) { SyncLock syncLock(this); this->maxFieldLength = maxFieldLength; for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { (*threadState)->docState->maxFieldLength = maxFieldLength; } } void DocumentsWriter::setSimilarity(const SimilarityPtr& similarity) { SyncLock syncLock(this); this->similarity = similarity; for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { (*threadState)->docState->similarity = similarity; } } void DocumentsWriter::setRAMBufferSizeMB(double mb) { SyncLock syncLock(this); if (mb == IndexWriter::DISABLE_AUTO_FLUSH) { ramBufferSize = IndexWriter::DISABLE_AUTO_FLUSH; waitQueuePauseBytes = 4 * 1024 * 1024; waitQueueResumeBytes = 2 * 1024 * 1024; } else { ramBufferSize = (int64_t)(mb * 1024.0 * 1024.0); waitQueuePauseBytes = (int64_t)((double)ramBufferSize * 0.1); waitQueueResumeBytes = (int64_t)((double)ramBufferSize * 0.05); freeTrigger = (int64_t)(1.05 * (double)ramBufferSize); freeLevel = (int64_t)(0.95 * (double)ramBufferSize); } } double DocumentsWriter::getRAMBufferSizeMB() { SyncLock syncLock(this); if (ramBufferSize == IndexWriter::DISABLE_AUTO_FLUSH) { return (double)ramBufferSize; } else { return (double)ramBufferSize / 1024.0 / 1024.0; } } void DocumentsWriter::setMaxBufferedDocs(int32_t count) { maxBufferedDocs = count; } int32_t DocumentsWriter::getMaxBufferedDocs() { return maxBufferedDocs; } String DocumentsWriter::getSegment() { return segment; } int32_t DocumentsWriter::getNumDocsInRAM() { return numDocsInRAM; } String DocumentsWriter::getDocStoreSegment() { SyncLock syncLock(this); return docStoreSegment; } int32_t DocumentsWriter::getDocStoreOffset() { return docStoreOffset; } String DocumentsWriter::closeDocStore() { TestScope testScope(L"DocumentsWriter", L"closeDocStore"); SyncLock syncLock(this); BOOST_ASSERT(allThreadsIdle()); if (infoStream) { message(L"closeDocStore: " + StringUtils::toString(_openFiles.size()) + L" files to flush to segment " + docStoreSegment + L" numDocs=" + StringUtils::toString(numDocsInStore)); } bool success = false; LuceneException finally; String s; try { initFlushState(true); _closedFiles.clear(); consumer->closeDocStore(flushState); BOOST_ASSERT(_openFiles.empty()); s = docStoreSegment; docStoreSegment.clear(); docStoreOffset = 0; numDocsInStore = 0; success = true; } catch (LuceneException& e) { finally = e; } if (!success) { abort(); } finally.throwException(); return s; } HashSet DocumentsWriter::abortedFiles() { return _abortedFiles; } void DocumentsWriter::message(const String& message) { if (infoStream) { *infoStream << L"DW " << message << L"\n"; } } HashSet DocumentsWriter::openFiles() { SyncLock syncLock(this); return HashSet::newInstance(_openFiles.begin(), _openFiles.end()); } HashSet DocumentsWriter::closedFiles() { SyncLock syncLock(this); return HashSet::newInstance(_closedFiles.begin(), _closedFiles.end()); } void DocumentsWriter::addOpenFile(const String& name) { SyncLock syncLock(this); BOOST_ASSERT(!_openFiles.contains(name)); _openFiles.add(name); } void DocumentsWriter::removeOpenFile(const String& name) { SyncLock syncLock(this); BOOST_ASSERT(_openFiles.contains(name)); _openFiles.remove(name); _closedFiles.add(name); } void DocumentsWriter::setAborting() { SyncLock syncLock(this); aborting = true; } void DocumentsWriter::abort() { TestScope testScope(L"DocumentsWriter", L"abort"); SyncLock syncLock(this); LuceneException finally; try { if (infoStream) { message(L"docWriter: now abort"); } // Forcefully remove waiting ThreadStates from line waitQueue->abort(); // Wait for all other threads to finish with DocumentsWriter pauseAllThreads(); try { BOOST_ASSERT(waitQueue->numWaiting == 0); waitQueue->waitingBytes = 0; try { _abortedFiles = openFiles(); } catch (...) { _abortedFiles.reset(); } deletesInRAM->clear(); deletesFlushed->clear(); _openFiles.clear(); for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { try { (*threadState)->consumer->abort(); } catch (...) { } } try { consumer->abort(); } catch (...) { } docStoreSegment.clear(); numDocsInStore = 0; docStoreOffset = 0; // Reset all postings data doAfterFlush(); } catch (LuceneException& e) { finally = e; } resumeAllThreads(); } catch (LuceneException& e) { if (finally.isNull()) { finally = e; } } aborting = false; notifyAll(); if (infoStream) { message(L"docWriter: done abort"); } finally.throwException(); } void DocumentsWriter::doAfterFlush() { // All ThreadStates should be idle when we are called BOOST_ASSERT(allThreadsIdle()); threadBindings.clear(); waitQueue->reset(); segment.clear(); numDocsInRAM = 0; nextDocID = 0; bufferIsFull = false; flushPending = false; for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { (*threadState)->doAfterFlush(); } numBytesUsed = 0; } bool DocumentsWriter::pauseAllThreads() { SyncLock syncLock(this); ++pauseThreads; while (!allThreadsIdle()) { wait(1000); } return aborting; } void DocumentsWriter::resumeAllThreads() { SyncLock syncLock(this); --pauseThreads; BOOST_ASSERT(pauseThreads >= 0); if (pauseThreads == 0) { notifyAll(); } } bool DocumentsWriter::allThreadsIdle() { SyncLock syncLock(this); for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { if (!(*threadState)->isIdle) { return false; } } return true; } bool DocumentsWriter::anyChanges() { SyncLock syncLock(this); return (numDocsInRAM != 0 || deletesInRAM->numTerms != 0 || !deletesInRAM->docIDs.empty() || !deletesInRAM->queries.empty()); } void DocumentsWriter::initFlushState(bool onlyDocStore) { SyncLock syncLock(this); initSegmentName(onlyDocStore); flushState = newLucene(shared_from_this(), directory, segment, docStoreSegment, numDocsInRAM, numDocsInStore, IndexWriterPtr(_writer)->getTermIndexInterval()); } int32_t DocumentsWriter::flush(bool _closeDocStore) { SyncLock syncLock(this); BOOST_ASSERT(allThreadsIdle()); BOOST_ASSERT(numDocsInRAM > 0); BOOST_ASSERT(nextDocID == numDocsInRAM); BOOST_ASSERT(waitQueue->numWaiting == 0); BOOST_ASSERT(waitQueue->waitingBytes == 0); initFlushState(false); docStoreOffset = numDocsInStore; if (infoStream) { message(L"flush postings as segment " + flushState->segmentName + L" numDocs=" + StringUtils::toString(numDocsInRAM)); } bool success = false; LuceneException finally; try { if (_closeDocStore) { BOOST_ASSERT(!flushState->docStoreSegmentName.empty()); BOOST_ASSERT(flushState->docStoreSegmentName == flushState->segmentName); closeDocStore(); flushState->numDocsInStore = 0; } Collection threads(Collection::newInstance()); for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { threads.add((*threadState)->consumer); } consumer->flush(threads, flushState); if (infoStream) { SegmentInfoPtr si(newLucene(flushState->segmentName, flushState->numDocs, directory)); int64_t newSegmentSize = si->sizeInBytes(); if (infoStream) { message(L" oldRAMSize=" + StringUtils::toString(numBytesUsed) + L" newFlushedSize=" + StringUtils::toString(newSegmentSize) + L" docs/MB=" + StringUtils::toString((double)numDocsInRAM / ((double)newSegmentSize / 1024.0 / 1024.0)) + L" new/old=" + StringUtils::toString(100.0 * (double)newSegmentSize / (double)numBytesUsed) + L"%"); } } flushedDocCount += flushState->numDocs; doAfterFlush(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { abort(); } finally.throwException(); BOOST_ASSERT(waitQueue->waitingBytes == 0); return flushState->numDocs; } HashSet DocumentsWriter::getFlushedFiles() { return flushState->flushedFiles; } void DocumentsWriter::createCompoundFile(const String& segment) { CompoundFileWriterPtr cfsWriter(newLucene(directory, segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION())); for (HashSet::iterator flushedFile = flushState->flushedFiles.begin(); flushedFile != flushState->flushedFiles.end(); ++flushedFile) { cfsWriter->addFile(*flushedFile); } // Perform the merge cfsWriter->close(); } bool DocumentsWriter::setFlushPending() { SyncLock syncLock(this); if (flushPending) { return false; } else { flushPending = true; return true; } } void DocumentsWriter::clearFlushPending() { SyncLock syncLock(this); flushPending = false; } void DocumentsWriter::pushDeletes() { SyncLock syncLock(this); deletesFlushed->update(deletesInRAM); } void DocumentsWriter::close() { SyncLock syncLock(this); closed = true; notifyAll(); } void DocumentsWriter::initSegmentName(bool onlyDocStore) { SyncLock syncLock(this); if (segment.empty() && (!onlyDocStore || docStoreSegment.empty())) { segment = IndexWriterPtr(_writer)->newSegmentName(); BOOST_ASSERT(numDocsInRAM == 0); } if (docStoreSegment.empty()) { docStoreSegment = segment; BOOST_ASSERT(numDocsInStore == 0); } } DocumentsWriterThreadStatePtr DocumentsWriter::getThreadState(const DocumentPtr& doc, const TermPtr& delTerm) { SyncLock syncLock(this); // First, find a thread state. If this thread already has affinity to a specific ThreadState, use that one again. DocumentsWriterThreadStatePtr state(threadBindings.get(LuceneThread::currentId())); if (!state) { // First time this thread has called us since last flush. Find the least loaded thread state DocumentsWriterThreadStatePtr minThreadState; for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { if (!minThreadState || (*threadState)->numThreads < minThreadState->numThreads) { minThreadState = *threadState; } } if (minThreadState && (minThreadState->numThreads == 0 || threadStates.size() >= MAX_THREAD_STATE)) { state = minThreadState; ++state->numThreads; } else { // Just create a new "private" thread state threadStates.resize(threadStates.size() + 1); state = newLucene(shared_from_this()); threadStates[threadStates.size() - 1] = state; } threadBindings.put(LuceneThread::currentId(), state); } // Next, wait until my thread state is idle (in case it's shared with other threads) and for threads to // not be paused nor a flush pending waitReady(state); // Allocate segment name if this is the first doc since last flush initSegmentName(false); state->isIdle = false; bool success = false; LuceneException finally; try { state->docState->docID = nextDocID; BOOST_ASSERT(IndexWriterPtr(_writer)->testPoint(L"DocumentsWriter.ThreadState.init start")); if (delTerm) { addDeleteTerm(delTerm, state->docState->docID); state->doFlushAfter = timeToFlushDeletes(); } BOOST_ASSERT(IndexWriterPtr(_writer)->testPoint(L"DocumentsWriter.ThreadState.init after delTerm")); ++nextDocID; ++numDocsInRAM; // We must at this point commit to flushing to ensure we always get N docs when we flush by doc // count, even if > 1 thread is adding documents if (!flushPending && maxBufferedDocs != IndexWriter::DISABLE_AUTO_FLUSH && numDocsInRAM >= maxBufferedDocs) { flushPending = true; state->doFlushAfter = true; } success = true; } catch (LuceneException& e) { finally = e; } if (!success) { // Forcefully idle this ThreadState state->isIdle = true; notifyAll(); if (state->doFlushAfter) { state->doFlushAfter = false; flushPending = false; } } finally.throwException(); return state; } bool DocumentsWriter::addDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer) { return updateDocument(doc, analyzer, TermPtr()); } bool DocumentsWriter::updateDocument(const TermPtr& t, const DocumentPtr& doc, const AnalyzerPtr& analyzer) { return updateDocument(doc, analyzer, t); } bool DocumentsWriter::updateDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer, const TermPtr& delTerm) { // This call is synchronized but fast DocumentsWriterThreadStatePtr state(getThreadState(doc, delTerm)); DocStatePtr docState(state->docState); docState->doc = doc; docState->analyzer = analyzer; bool success = false; LuceneException finally; try { // This call is not synchronized and does all the work DocWriterPtr perDoc; try { perDoc = state->consumer->processDocument(); } catch (LuceneException& e) { finally = e; } docState->clear(); finally.throwException(); // This call is synchronized but fast finishDocument(state, perDoc); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { SyncLock syncLock(this); if (aborting) { state->isIdle = true; notifyAll(); abort(); } else { skipDocWriter->docID = docState->docID; bool success2 = false; try { waitQueue->add(skipDocWriter); success2 = true; } catch (LuceneException& e) { finally = e; } if (!success2) { state->isIdle = true; notifyAll(); abort(); return false; } state->isIdle = true; notifyAll(); // If this thread state had decided to flush, we must clear it so another thread can flush if (state->doFlushAfter) { state->doFlushAfter = false; flushPending = false; notifyAll(); } // Immediately mark this document as deleted since likely it was partially added. This keeps // indexing as "all or none" (atomic) when adding a document addDeleteDocID(state->docState->docID); } } finally.throwException(); return (state->doFlushAfter || timeToFlushDeletes()); } int32_t DocumentsWriter::getNumBufferedDeleteTerms() { SyncLock syncLock(this); return deletesInRAM->numTerms; } MapTermNum DocumentsWriter::getBufferedDeleteTerms() { SyncLock syncLock(this); return deletesInRAM->terms; } void DocumentsWriter::remapDeletes(const SegmentInfosPtr& infos, Collection< Collection > docMaps, Collection delCounts, const OneMergePtr& merge, int32_t mergeDocCount) { SyncLock syncLock(this); if (!docMaps) { // The merged segments had no deletes so docIDs did not change and we have nothing to do return; } MergeDocIDRemapperPtr mapper(newLucene(infos, docMaps, delCounts, merge, mergeDocCount)); deletesInRAM->remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount); deletesFlushed->remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount); flushedDocCount -= mapper->docShift; } void DocumentsWriter::waitReady(const DocumentsWriterThreadStatePtr& state) { SyncLock syncLock(this); while (!closed && ((state && !state->isIdle) || pauseThreads != 0 || flushPending || aborting)) { wait(1000); } if (closed) { boost::throw_exception(AlreadyClosedException(L"this IndexWriter is closed")); } } bool DocumentsWriter::bufferDeleteTerms(Collection terms) { SyncLock syncLock(this); waitReady(DocumentsWriterThreadStatePtr()); for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) { addDeleteTerm(*term, numDocsInRAM); } return timeToFlushDeletes(); } bool DocumentsWriter::bufferDeleteTerm(const TermPtr& term) { SyncLock syncLock(this); waitReady(DocumentsWriterThreadStatePtr()); addDeleteTerm(term, numDocsInRAM); return timeToFlushDeletes(); } bool DocumentsWriter::bufferDeleteQueries(Collection queries) { SyncLock syncLock(this); waitReady(DocumentsWriterThreadStatePtr()); for (Collection::iterator query = queries.begin(); query != queries.end(); ++query) { addDeleteQuery(*query, numDocsInRAM); } return timeToFlushDeletes(); } bool DocumentsWriter::bufferDeleteQuery(const QueryPtr& query) { SyncLock syncLock(this); waitReady(DocumentsWriterThreadStatePtr()); addDeleteQuery(query, numDocsInRAM); return timeToFlushDeletes(); } bool DocumentsWriter::deletesFull() { SyncLock syncLock(this); return ((ramBufferSize != IndexWriter::DISABLE_AUTO_FLUSH && (deletesInRAM->bytesUsed + deletesFlushed->bytesUsed + numBytesUsed) >= ramBufferSize) || (maxBufferedDeleteTerms != IndexWriter::DISABLE_AUTO_FLUSH && ((deletesInRAM->size() + deletesFlushed->size()) >= maxBufferedDeleteTerms))); } bool DocumentsWriter::doApplyDeletes() { SyncLock syncLock(this); // Very similar to deletesFull(), except we don't count numBytesAlloc, because we are checking whether // deletes (alone) are consuming too many resources now and thus should be applied. We apply deletes // if RAM usage is > 1/2 of our allowed RAM buffer, to prevent too-frequent flushing of a long tail of // tiny segments when merges (which always apply deletes) are infrequent. return ((ramBufferSize != IndexWriter::DISABLE_AUTO_FLUSH && (deletesInRAM->bytesUsed + deletesFlushed->bytesUsed) >= ramBufferSize / 2) || (maxBufferedDeleteTerms != IndexWriter::DISABLE_AUTO_FLUSH && ((deletesInRAM->size() + deletesFlushed->size()) >= maxBufferedDeleteTerms))); } bool DocumentsWriter::timeToFlushDeletes() { SyncLock syncLock(this); return ((bufferIsFull || deletesFull()) && setFlushPending()); } bool DocumentsWriter::checkDeleteTerm(const TermPtr& term) { if (term) { BOOST_ASSERT(!lastDeleteTerm || term->compareTo(lastDeleteTerm) > 0); } lastDeleteTerm = term; return true; } void DocumentsWriter::setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms) { this->maxBufferedDeleteTerms = maxBufferedDeleteTerms; } int32_t DocumentsWriter::getMaxBufferedDeleteTerms() { return maxBufferedDeleteTerms; } bool DocumentsWriter::hasDeletes() { SyncLock syncLock(this); return deletesFlushed->any(); } bool DocumentsWriter::applyDeletes(const SegmentInfosPtr& infos) { SyncLock syncLock(this); if (!hasDeletes()) { return false; } if (infoStream) { message(L"apply " + StringUtils::toString(deletesFlushed->numTerms) + L" buffered deleted terms and " + StringUtils::toString(deletesFlushed->docIDs.size()) + L" deleted docIDs and " + StringUtils::toString(deletesFlushed->queries.size()) + L" deleted queries on " + StringUtils::toString(infos->size()) + L" segments."); } int32_t infosEnd = infos->size(); int32_t docStart = 0; bool any = false; IndexWriterPtr writer(_writer); for (int32_t i = 0; i < infosEnd; ++i) { // Make sure we never attempt to apply deletes to segment in external dir BOOST_ASSERT(infos->info(i)->dir == directory); SegmentReaderPtr reader(writer->readerPool->get(infos->info(i), false)); LuceneException finally; try { if (applyDeletes(reader, docStart)) { any = true; } docStart += reader->maxDoc(); } catch (LuceneException& e) { finally = e; } writer->readerPool->release(reader); finally.throwException(); } deletesFlushed->clear(); return any; } bool DocumentsWriter::applyDeletes(const IndexReaderPtr& reader, int32_t docIDStart) { SyncLock syncLock(this); int32_t docEnd = docIDStart + reader->maxDoc(); bool any = false; BOOST_ASSERT(checkDeleteTerm(TermPtr())); // Delete by term TermDocsPtr docs(reader->termDocs()); LuceneException finally; try { for (MapTermNum::iterator entry = deletesFlushed->terms.begin(); entry != deletesFlushed->terms.end(); ++entry) { // we should be iterating a Map here, so terms better be in order BOOST_ASSERT(checkDeleteTerm(entry->first)); docs->seek(entry->first); int32_t limit = entry->second->getNum(); while (docs->next()) { int32_t docID = docs->doc(); if (docIDStart + docID >= limit) { break; } reader->deleteDocument(docID); any = true; } } } catch (LuceneException& e) { finally = e; } docs->close(); finally.throwException(); // Delete by docID for (Collection::iterator docID = deletesFlushed->docIDs.begin(); docID != deletesFlushed->docIDs.end(); ++docID) { if (*docID >= docIDStart && *docID < docEnd) { reader->deleteDocument(*docID - docIDStart); any = true; } } // Delete by query IndexSearcherPtr searcher(newLucene(reader)); for (MapQueryInt::iterator entry = deletesFlushed->queries.begin(); entry != deletesFlushed->queries.end(); ++entry) { WeightPtr weight(entry->first->weight(searcher)); ScorerPtr scorer(weight->scorer(reader, true, false)); if (scorer) { while (true) { int32_t doc = scorer->nextDoc(); if ((int64_t)docIDStart + doc >= entry->second) { break; } reader->deleteDocument(doc); any = true; } } } searcher->close(); return any; } void DocumentsWriter::addDeleteTerm(const TermPtr& term, int32_t docCount) { SyncLock syncLock(this); NumPtr num(deletesInRAM->terms.get(term)); int32_t docIDUpto = flushedDocCount + docCount; if (!num) { deletesInRAM->terms.put(term, newLucene(docIDUpto)); } else { num->setNum(docIDUpto); } ++deletesInRAM->numTerms; deletesInRAM->addBytesUsed(BYTES_PER_DEL_TERM + term->_text.length() * CHAR_NUM_BYTE); } void DocumentsWriter::addDeleteDocID(int32_t docID) { SyncLock syncLock(this); deletesInRAM->docIDs.add(flushedDocCount + docID); deletesInRAM->addBytesUsed(BYTES_PER_DEL_DOCID); } void DocumentsWriter::addDeleteQuery(const QueryPtr& query, int32_t docID) { SyncLock syncLock(this); deletesInRAM->queries.put(query, flushedDocCount + docID); deletesInRAM->addBytesUsed(BYTES_PER_DEL_QUERY); } bool DocumentsWriter::doBalanceRAM() { SyncLock syncLock(this); return (ramBufferSize != IndexWriter::DISABLE_AUTO_FLUSH && !bufferIsFull && (numBytesUsed + deletesInRAM->bytesUsed + deletesFlushed->bytesUsed >= ramBufferSize || numBytesAlloc >= freeTrigger)); } void DocumentsWriter::finishDocument(const DocumentsWriterThreadStatePtr& perThread, const DocWriterPtr& docWriter) { if (doBalanceRAM()) { // Must call this without holding synchronized(this) else we'll hit deadlock balanceRAM(); } { SyncLock syncLock(this); BOOST_ASSERT(!docWriter || docWriter->docID == perThread->docState->docID); if (aborting) { // We are currently aborting, and another thread is waiting for me to become idle. We // just forcefully idle this threadState; it will be fully reset by abort() if (docWriter) { try { docWriter->abort(); } catch (...) { } } perThread->isIdle = true; notifyAll(); return; } bool doPause; if (docWriter) { doPause = waitQueue->add(docWriter); } else { skipDocWriter->docID = perThread->docState->docID; doPause = waitQueue->add(skipDocWriter); } if (doPause) { waitForWaitQueue(); } if (bufferIsFull && !flushPending) { flushPending = true; perThread->doFlushAfter = true; } perThread->isIdle = true; notifyAll(); } } void DocumentsWriter::waitForWaitQueue() { SyncLock syncLock(this); do { wait(1000); } while (!waitQueue->doResume()); } int64_t DocumentsWriter::getRAMUsed() { return numBytesUsed + deletesInRAM->bytesUsed + deletesFlushed->bytesUsed; } IntArray DocumentsWriter::getIntBlock(bool trackAllocations) { SyncLock syncLock(this); int32_t size = freeIntBlocks.size(); IntArray b; if (size == 0) { // Always record a block allocated, even if trackAllocations is false. This is necessary because // this block will be shared between things that don't track allocations (term vectors) and things // that do (freq/prox postings). numBytesAlloc += INT_BLOCK_SIZE * INT_NUM_BYTE; b = IntArray::newInstance(INT_BLOCK_SIZE); } else { b = freeIntBlocks.removeLast(); } if (trackAllocations) { numBytesUsed += INT_BLOCK_SIZE * INT_NUM_BYTE; } BOOST_ASSERT(numBytesUsed <= numBytesAlloc); return b; } void DocumentsWriter::bytesAllocated(int64_t numBytes) { SyncLock syncLock(this); numBytesAlloc += numBytes; } void DocumentsWriter::bytesUsed(int64_t numBytes) { SyncLock syncLock(this); numBytesUsed += numBytes; BOOST_ASSERT(numBytesUsed <= numBytesAlloc); } void DocumentsWriter::recycleIntBlocks(Collection blocks, int32_t start, int32_t end) { SyncLock syncLock(this); for (int32_t i = start; i < end; ++i) { freeIntBlocks.add(blocks[i]); blocks[i].reset(); } } CharArray DocumentsWriter::getCharBlock() { SyncLock syncLock(this); int32_t size = freeCharBlocks.size(); CharArray c; if (size == 0) { numBytesAlloc += CHAR_BLOCK_SIZE * CHAR_NUM_BYTE; c = CharArray::newInstance(CHAR_BLOCK_SIZE); } else { c = freeCharBlocks.removeLast(); } // We always track allocations of char blocks for now because nothing that skips allocation tracking // (currently only term vectors) uses its own char blocks. numBytesUsed += CHAR_BLOCK_SIZE * CHAR_NUM_BYTE; BOOST_ASSERT(numBytesUsed <= numBytesAlloc); return c; } void DocumentsWriter::recycleCharBlocks(Collection blocks, int32_t numBlocks) { SyncLock syncLock(this); for (int32_t i = 0; i < numBlocks; ++i) { freeCharBlocks.add(blocks[i]); blocks[i].reset(); } } String DocumentsWriter::toMB(int64_t v) { return StringUtils::toString((double)v / 1024.0 / 1024.0); } void DocumentsWriter::balanceRAM() { // We flush when we've used our target usage int64_t flushTrigger = ramBufferSize; int64_t deletesRAMUsed = deletesInRAM->bytesUsed + deletesFlushed->bytesUsed; if (numBytesAlloc + deletesRAMUsed > freeTrigger) { if (infoStream) { message(L" RAM: now balance allocations: usedMB=" + toMB(numBytesUsed) + L" vs trigger=" + toMB(flushTrigger) + L" allocMB=" + toMB(numBytesAlloc) + L" deletesMB=" + toMB(deletesRAMUsed) + L" vs trigger=" + toMB(freeTrigger) + L" byteBlockFree=" + toMB(byteBlockAllocator->freeByteBlocks.size() * BYTE_BLOCK_SIZE) + L" perDocFree=" + toMB(perDocAllocator->freeByteBlocks.size() * PER_DOC_BLOCK_SIZE) + L" charBlockFree=" + toMB(freeCharBlocks.size() * CHAR_BLOCK_SIZE * CHAR_NUM_BYTE)); } int64_t startBytesAlloc = numBytesAlloc + deletesRAMUsed; int32_t iter = 0; // We free equally from each pool in 32 KB chunks until we are below our threshold (freeLevel) bool any = true; while (numBytesAlloc + deletesRAMUsed > freeLevel) { { SyncLock syncLock(this); if (perDocAllocator->freeByteBlocks.empty() && byteBlockAllocator->freeByteBlocks.empty() && freeCharBlocks.empty() && freeIntBlocks.empty() && !any) { // Nothing else to free -- must flush now. bufferIsFull = (numBytesUsed + deletesRAMUsed > flushTrigger); if (infoStream) { if (bufferIsFull) { message(L" nothing to free; now set bufferIsFull"); } else { message(L" nothing to free"); } } BOOST_ASSERT(numBytesUsed <= numBytesAlloc); break; } if ((iter % 5) == 0 && !byteBlockAllocator->freeByteBlocks.empty()) { byteBlockAllocator->freeByteBlocks.removeLast(); numBytesAlloc -= BYTE_BLOCK_SIZE; } if ((iter % 5) == 1 && !freeCharBlocks.empty()) { freeCharBlocks.removeLast(); numBytesAlloc -= CHAR_BLOCK_SIZE * CHAR_NUM_BYTE; } if ((iter % 5) == 2 && !freeIntBlocks.empty()) { freeIntBlocks.removeLast(); numBytesAlloc -= INT_BLOCK_SIZE * INT_NUM_BYTE; } if ((iter % 5) == 3 && !perDocAllocator->freeByteBlocks.empty()) { // Remove upwards of 32 blocks (each block is 1K) for (int32_t i = 0; i < 32; ++i) { perDocAllocator->freeByteBlocks.removeLast(); numBytesAlloc -= PER_DOC_BLOCK_SIZE; if (perDocAllocator->freeByteBlocks.empty()) { break; } } } } if ((iter % 5) == 4 && any) { // Ask consumer to free any recycled state any = consumer->freeRAM(); } ++iter; } if (infoStream) { message(L" after free: freedMB=" + StringUtils::toString((double)(startBytesAlloc - numBytesAlloc - deletesRAMUsed) / 1024.0 / 1024.0) + L" usedMB=" + StringUtils::toString((double)(numBytesUsed + deletesRAMUsed) / 1024.0 / 1024.0) + L" allocMB=" + StringUtils::toString((double)numBytesAlloc / 1024.0 / 1024.0)); } } else { // If we have not crossed the 100% mark, but have crossed the 95% mark of RAM we are actually // using, go ahead and flush. This prevents over-allocating and then freeing, with every flush. SyncLock syncLock(this); if (numBytesUsed + deletesRAMUsed > flushTrigger) { if (infoStream) { message(L" RAM: now flush @ usedMB=" + StringUtils::toString((double)numBytesUsed / 1024.0 / 1024.0) + L" allocMB=" + StringUtils::toString((double)numBytesAlloc / 1024.0 / 1024.0) + L" deletesMB=" + StringUtils::toString((double)deletesRAMUsed / 1024.0 / 1024.0) + L" triggerMB=" + StringUtils::toString((double)flushTrigger / 1024.0 / 1024.0)); } bufferIsFull = true; } } } DocState::DocState() { maxFieldLength = 0; docID = 0; } DocState::~DocState() { } bool DocState::testPoint(const String& name) { return IndexWriterPtr(DocumentsWriterPtr(_docWriter)->_writer)->testPoint(name); } void DocState::clear() { // don't hold onto doc nor analyzer, in case it is large doc.reset(); analyzer.reset(); } PerDocBuffer::PerDocBuffer(const DocumentsWriterPtr& docWriter) { _docWriter = docWriter; } PerDocBuffer::~PerDocBuffer() { } ByteArray PerDocBuffer::newBuffer(int32_t size) { BOOST_ASSERT(size == DocumentsWriter::PER_DOC_BLOCK_SIZE); return DocumentsWriterPtr(_docWriter)->perDocAllocator->getByteBlock(false); } void PerDocBuffer::recycle() { SyncLock syncLock(this); if (!buffers.empty()) { setLength(0); // Recycle the blocks DocumentsWriterPtr(_docWriter)->perDocAllocator->recycleByteBlocks(buffers); buffers.clear(); sizeInBytes = 0; BOOST_ASSERT(numBuffers() == 0); } } DocWriter::DocWriter() { docID = 0; } DocWriter::~DocWriter() { } void DocWriter::setNext(const DocWriterPtr& next) { this->next = next; } IndexingChain::~IndexingChain() { } DefaultIndexingChain::~DefaultIndexingChain() { } DocConsumerPtr DefaultIndexingChain::getChain(const DocumentsWriterPtr& documentsWriter) { TermsHashConsumerPtr termVectorsWriter(newLucene(documentsWriter)); TermsHashConsumerPtr freqProxWriter(newLucene()); InvertedDocConsumerPtr termsHash(newLucene(documentsWriter, true, freqProxWriter, newLucene(documentsWriter, false, termVectorsWriter, TermsHashPtr()))); DocInverterPtr docInverter(newLucene(termsHash, newLucene())); return newLucene(documentsWriter, docInverter); } SkipDocWriter::~SkipDocWriter() { } void SkipDocWriter::finish() { } void SkipDocWriter::abort() { } int64_t SkipDocWriter::sizeInBytes() { return 0; } WaitQueue::WaitQueue(const DocumentsWriterPtr& docWriter) { this->_docWriter = docWriter; waiting = Collection::newInstance(10); nextWriteDocID = 0; nextWriteLoc = 0; numWaiting = 0; waitingBytes = 0; } WaitQueue::~WaitQueue() { } void WaitQueue::reset() { SyncLock syncLock(this); // NOTE: nextWriteLoc doesn't need to be reset BOOST_ASSERT(numWaiting == 0); BOOST_ASSERT(waitingBytes == 0); nextWriteDocID = 0; } bool WaitQueue::doResume() { SyncLock syncLock(this); return (waitingBytes <= DocumentsWriterPtr(_docWriter)->waitQueueResumeBytes); } bool WaitQueue::doPause() { SyncLock syncLock(this); return (waitingBytes > DocumentsWriterPtr(_docWriter)->waitQueuePauseBytes); } void WaitQueue::abort() { SyncLock syncLock(this); int32_t count = 0; for (Collection::iterator doc = waiting.begin(); doc != waiting.end(); ++doc) { if (*doc) { (*doc)->abort(); doc->reset(); ++count; } } waitingBytes = 0; BOOST_ASSERT(count == numWaiting); numWaiting = 0; } void WaitQueue::writeDocument(const DocWriterPtr& doc) { DocumentsWriterPtr docWriter(_docWriter); BOOST_ASSERT(doc == DocumentsWriterPtr(docWriter)->skipDocWriter || nextWriteDocID == doc->docID); bool success = false; LuceneException finally; try { doc->finish(); ++nextWriteDocID; ++docWriter->numDocsInStore; ++nextWriteLoc; BOOST_ASSERT(nextWriteLoc <= waiting.size()); if (nextWriteLoc == waiting.size()) { nextWriteLoc = 0; } success = true; } catch (LuceneException& e) { finally = e; } if (!success) { docWriter->setAborting(); } finally.throwException(); } bool WaitQueue::add(const DocWriterPtr& doc) { DocWriterPtr _doc(doc); SyncLock syncLock(this); BOOST_ASSERT(_doc->docID >= nextWriteDocID); if (_doc->docID == nextWriteDocID) { writeDocument(_doc); while (true) { _doc = waiting[nextWriteLoc]; if (_doc) { --numWaiting; waiting[nextWriteLoc].reset(); waitingBytes -= _doc->sizeInBytes(); writeDocument(_doc); } else { break; } } } else { // I finished before documents that were added before me. This can easily happen when I am a small doc // and the docs before me were large, or just due to luck in the thread scheduling. Just add myself to // the queue and when that large doc finishes, it will flush me int32_t gap = _doc->docID - nextWriteDocID; if (gap >= waiting.size()) { // Grow queue Collection newArray(Collection::newInstance(MiscUtils::getNextSize(gap))); BOOST_ASSERT(nextWriteLoc >= 0); MiscUtils::arrayCopy(waiting.begin(), nextWriteLoc, newArray.begin(), 0, waiting.size() - nextWriteLoc); MiscUtils::arrayCopy(waiting.begin(), 0, newArray.begin(), waiting.size() - nextWriteLoc, nextWriteLoc); nextWriteLoc = 0; waiting = newArray; gap = _doc->docID - nextWriteDocID; } int32_t loc = nextWriteLoc + gap; if (loc >= waiting.size()) { loc -= waiting.size(); } // We should only wrap one time BOOST_ASSERT(loc < waiting.size()); // Nobody should be in my spot! BOOST_ASSERT(!waiting[loc]); waiting[loc] = _doc; ++numWaiting; waitingBytes += _doc->sizeInBytes(); } return doPause(); } ByteBlockAllocator::ByteBlockAllocator(const DocumentsWriterPtr& docWriter, int32_t blockSize) { this->blockSize = blockSize; this->freeByteBlocks = Collection::newInstance(); this->_docWriter = docWriter; } ByteBlockAllocator::~ByteBlockAllocator() { } ByteArray ByteBlockAllocator::getByteBlock(bool trackAllocations) { DocumentsWriterPtr docWriter(_docWriter); SyncLock syncLock(docWriter); int32_t size = freeByteBlocks.size(); ByteArray b; if (size == 0) { // Always record a block allocated, even if trackAllocations is false. This is necessary because this block will // be shared between things that don't track allocations (term vectors) and things that do (freq/prox postings). docWriter->numBytesAlloc += blockSize; b = ByteArray::newInstance(blockSize); MiscUtils::arrayFill(b.get(), 0, b.size(), 0); } else { b = freeByteBlocks.removeLast(); } if (trackAllocations) { docWriter->numBytesUsed += blockSize; } BOOST_ASSERT(docWriter->numBytesUsed <= docWriter->numBytesAlloc); return b; } void ByteBlockAllocator::recycleByteBlocks(Collection blocks, int32_t start, int32_t end) { DocumentsWriterPtr docWriter(_docWriter); SyncLock syncLock(docWriter); for (int32_t i = start; i < end; ++i) { freeByteBlocks.add(blocks[i]); blocks[i].reset(); } } void ByteBlockAllocator::recycleByteBlocks(Collection blocks) { DocumentsWriterPtr docWriter(_docWriter); SyncLock syncLock(docWriter); int32_t size = blocks.size(); for (int32_t i = 0; i < size; ++i) { freeByteBlocks.add(blocks[i]); } } } LucenePlusPlus-rel_3.0.9/src/core/index/DocumentsWriterThreadState.cpp000066400000000000000000000023231456444476200261510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocumentsWriterThreadState.h" #include "DocumentsWriter.h" #include "DocConsumer.h" namespace Lucene { DocumentsWriterThreadState::DocumentsWriterThreadState(const DocumentsWriterPtr& docWriter) { this->_docWriter = docWriter; } DocumentsWriterThreadState::~DocumentsWriterThreadState() { } void DocumentsWriterThreadState::initialize() { isIdle = true; doFlushAfter = false; numThreads = 1; DocumentsWriterPtr docWriter(_docWriter); docState = newLucene(); docState->maxFieldLength = docWriter->maxFieldLength; docState->infoStream = docWriter->infoStream; docState->similarity = docWriter->similarity; docState->_docWriter = docWriter; consumer = docWriter->consumer->addThread(shared_from_this()); } void DocumentsWriterThreadState::doAfterFlush() { numThreads = 0; doFlushAfter = false; } } LucenePlusPlus-rel_3.0.9/src/core/index/FieldInfo.cpp000066400000000000000000000055661456444476200225350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldInfo.h" namespace Lucene { FieldInfo::FieldInfo(const String& name, bool isIndexed, int32_t number, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) { this->name = name; this->isIndexed = isIndexed; this->number = number; // for non-indexed fields, leave defaults this->storeTermVector = isIndexed ? storeTermVector : false; this->storeOffsetWithTermVector = isIndexed ? storeOffsetWithTermVector : false; this->storePositionWithTermVector = isIndexed ? storePositionWithTermVector : false; this->storePayloads = isIndexed ? storePayloads : false; this->omitNorms = isIndexed ? omitNorms : true; this->omitTermFreqAndPositions = isIndexed ? omitTermFreqAndPositions : false; } FieldInfo::~FieldInfo() { } LuceneObjectPtr FieldInfo::clone(const LuceneObjectPtr& other) { return newLucene(name, isIndexed, number, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); } void FieldInfo::update(bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) { if (this->isIndexed != isIndexed) { this->isIndexed = true; // once indexed, always index } if (isIndexed) { // if updated field data is not for indexing, leave the updates out if (this->storeTermVector != storeTermVector) { this->storeTermVector = true; // once vector, always vector } if (this->storePositionWithTermVector != storePositionWithTermVector) { this->storePositionWithTermVector = true; // once vector, always vector } if (this->storeOffsetWithTermVector != storeOffsetWithTermVector) { this->storeOffsetWithTermVector = true; // once vector, always vector } if (this->storePayloads != storePayloads) { this->storePayloads = true; } if (this->omitNorms != omitNorms) { this->omitNorms = false; // once norms are stored, always store } if (this->omitTermFreqAndPositions != omitTermFreqAndPositions) { this->omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life } } } } LucenePlusPlus-rel_3.0.9/src/core/index/FieldInfos.cpp000066400000000000000000000227531456444476200227150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "IndexInput.h" #include "IndexOutput.h" #include "Directory.h" #include "Document.h" #include "Fieldable.h" #include "StringUtils.h" namespace Lucene { // Used internally (ie not written to *.fnm files) for pre-2.9 files const int32_t FieldInfos::FORMAT_PRE = -1; // First used in 2.9; prior to 2.9 there was no format header const int32_t FieldInfos::FORMAT_START = -2; const int32_t FieldInfos::CURRENT_FORMAT = FieldInfos::FORMAT_START; const uint8_t FieldInfos::IS_INDEXED = 0x1; const uint8_t FieldInfos::STORE_TERMVECTOR = 0x2; const uint8_t FieldInfos::STORE_POSITIONS_WITH_TERMVECTOR = 0x4; const uint8_t FieldInfos::STORE_OFFSET_WITH_TERMVECTOR = 0x8; const uint8_t FieldInfos::OMIT_NORMS = 0x10; const uint8_t FieldInfos::STORE_PAYLOADS = 0x20; const uint8_t FieldInfos::OMIT_TERM_FREQ_AND_POSITIONS = 0x40; FieldInfos::FieldInfos() { format = 0; byNumber = Collection::newInstance(); byName = MapStringFieldInfo::newInstance(); } FieldInfos::FieldInfos(const DirectoryPtr& d, const String& name) { format = 0; byNumber = Collection::newInstance(); byName = MapStringFieldInfo::newInstance(); IndexInputPtr input(d->openInput(name)); LuceneException finally; try { try { read(input, name); } catch (IOException& e) { if (format == FORMAT_PRE) { input->seek(0); input->setModifiedUTF8StringsMode(); byNumber.clear(); byName.clear(); try { read(input, name); } catch (...) { // Ignore any new exception & throw original IOE finally = e; } } else { finally = e; } } } catch (LuceneException& e) { finally = e; } input->close(); finally.throwException(); } FieldInfos::~FieldInfos() { } LuceneObjectPtr FieldInfos::clone(const LuceneObjectPtr& other) { SyncLock syncLock(this); FieldInfosPtr fis(newLucene()); for (Collection::iterator field = byNumber.begin(); field != byNumber.end(); ++field) { FieldInfoPtr fi(boost::dynamic_pointer_cast((*field)->clone())); fis->byNumber.add(fi); fis->byName.put(fi->name, fi); } return fis; } void FieldInfos::add(const DocumentPtr& doc) { SyncLock syncLock(this); Collection fields(doc->getFields()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { add((*field)->name(), (*field)->isIndexed(), (*field)->isTermVectorStored(), (*field)->isStorePositionWithTermVector(), (*field)->isStoreOffsetWithTermVector(), (*field)->getOmitNorms(), false, (*field)->getOmitTermFreqAndPositions()); } } bool FieldInfos::hasProx() { for (Collection::iterator fi = byNumber.begin(); fi != byNumber.end(); ++fi) { if ((*fi)->isIndexed && !(*fi)->omitTermFreqAndPositions) { return true; } } return false; } void FieldInfos::addIndexed(HashSet names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector) { SyncLock syncLock(this); for (HashSet::iterator name = names.begin(); name != names.end(); ++name) { add(*name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector); } } void FieldInfos::add(HashSet names, bool isIndexed) { SyncLock syncLock(this); for (HashSet::iterator name = names.begin(); name != names.end(); ++name) { add(*name, isIndexed); } } void FieldInfos::add(const String& name, bool isIndexed) { add(name, isIndexed, false, false, false, false); } void FieldInfos::add(const String& name, bool isIndexed, bool storeTermVector) { add(name, isIndexed, storeTermVector, false, false, false); } void FieldInfos::add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector) { add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false); } void FieldInfos::add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms) { add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, false, false); } FieldInfoPtr FieldInfos::add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) { SyncLock syncLock(this); FieldInfoPtr fi(fieldInfo(name)); if (!fi) { return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); } else { fi->update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); } return fi; } FieldInfoPtr FieldInfos::addInternal(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) { FieldInfoPtr fi(newLucene(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions)); byNumber.add(fi); byName.put(name, fi); return fi; } int32_t FieldInfos::fieldNumber(const String& fieldName) { FieldInfoPtr fi(fieldInfo(fieldName)); return fi ? fi->number : -1; } FieldInfoPtr FieldInfos::fieldInfo(const String& fieldName) { return byName.get(fieldName); } String FieldInfos::fieldName(int32_t fieldNumber) { FieldInfoPtr fi(fieldInfo(fieldNumber)); return fi ? fi->name : L""; } FieldInfoPtr FieldInfos::fieldInfo(int32_t fieldNumber) { return (fieldNumber >= 0 && fieldNumber < byNumber.size()) ? byNumber[fieldNumber] : FieldInfoPtr(); } int32_t FieldInfos::size() { return byNumber.size(); } bool FieldInfos::hasVectors() { for (Collection::iterator fi = byNumber.begin(); fi != byNumber.end(); ++fi) { if ((*fi)->storeTermVector) { return true; } } return false; } void FieldInfos::write(const DirectoryPtr& d, const String& name) { IndexOutputPtr output(d->createOutput(name)); LuceneException finally; try { write(output); } catch (LuceneException& e) { finally = e; } output->close(); finally.throwException(); } void FieldInfos::write(const IndexOutputPtr& output) { output->writeVInt(CURRENT_FORMAT); output->writeVInt(size()); for (Collection::iterator fi = byNumber.begin(); fi != byNumber.end(); ++fi) { uint8_t bits = 0x0; if ((*fi)->isIndexed) { bits |= IS_INDEXED; } if ((*fi)->storeTermVector) { bits |= STORE_TERMVECTOR; } if ((*fi)->storePositionWithTermVector) { bits |= STORE_POSITIONS_WITH_TERMVECTOR; } if ((*fi)->storeOffsetWithTermVector) { bits |= STORE_OFFSET_WITH_TERMVECTOR; } if ((*fi)->omitNorms) { bits |= OMIT_NORMS; } if ((*fi)->storePayloads) { bits |= STORE_PAYLOADS; } if ((*fi)->omitTermFreqAndPositions) { bits |= OMIT_TERM_FREQ_AND_POSITIONS; } output->writeString((*fi)->name); output->writeByte(bits); } } void FieldInfos::read(const IndexInputPtr& input, const String& fileName) { int32_t firstInt = input->readVInt(); format = firstInt < 0 ? firstInt : FORMAT_PRE; // This is a real format? if (format != FORMAT_PRE && format != FORMAT_START) { boost::throw_exception(CorruptIndexException(L"unrecognized format " + StringUtils::toString(format) + L" in file \"" + fileName + L"\"")); } int32_t size = format == FORMAT_PRE ? firstInt : input->readVInt(); // read in the size if required for (int32_t i = 0; i < size; ++i) { String name(input->readString()); uint8_t bits = input->readByte(); addInternal(name, (bits & IS_INDEXED) != 0, (bits & STORE_TERMVECTOR) != 0, (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0, (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0, (bits & OMIT_NORMS) != 0, (bits & STORE_PAYLOADS) != 0, (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0); } if (input->getFilePointer() != input->length()) { boost::throw_exception(CorruptIndexException(L"did not read all bytes from file \"" + fileName + L"\": read " + StringUtils::toString(input->getFilePointer()) + L" vs size " + StringUtils::toString(input->length()))); } } } LucenePlusPlus-rel_3.0.9/src/core/index/FieldInvertState.cpp000066400000000000000000000024351456444476200241020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldInvertState.h" namespace Lucene { FieldInvertState::FieldInvertState(int32_t position, int32_t length, int32_t numOverlap, int32_t offset, double boost) { this->position = position; this->length = length; this->numOverlap = numOverlap; this->offset = offset; this->boost = boost; } FieldInvertState::~FieldInvertState() { } void FieldInvertState::reset(double docBoost) { position = 0; length = 0; numOverlap = 0; offset = 0; boost = docBoost; attributeSource.reset(); } int32_t FieldInvertState::getPosition() { return position; } int32_t FieldInvertState::getLength() { return length; } int32_t FieldInvertState::getNumOverlap() { return numOverlap; } int32_t FieldInvertState::getOffset() { return offset; } double FieldInvertState::getBoost() { return boost; } AttributeSourcePtr FieldInvertState::getAttributeSource() { return attributeSource; } } LucenePlusPlus-rel_3.0.9/src/core/index/FieldSortedTermVectorMapper.cpp000066400000000000000000000037431456444476200262550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldSortedTermVectorMapper.h" #include "TermVectorEntry.h" namespace Lucene { FieldSortedTermVectorMapper::FieldSortedTermVectorMapper(TermVectorEntryComparator comparator) : TermVectorMapper(false, false) { this->fieldToTerms = MapStringCollectionTermVectorEntry::newInstance(); this->comparator = comparator; } FieldSortedTermVectorMapper::FieldSortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator) : TermVectorMapper(ignoringPositions, ignoringOffsets) { this->fieldToTerms = MapStringCollectionTermVectorEntry::newInstance(); this->comparator = comparator; } FieldSortedTermVectorMapper::~FieldSortedTermVectorMapper() { } void FieldSortedTermVectorMapper::map(const String& term, int32_t frequency, Collection offsets, Collection positions) { TermVectorEntryPtr entry(newLucene(currentField, term, frequency, offsets, positions)); if (!currentSet.contains_if(luceneEqualTo(entry))) { currentSet.insert(std::upper_bound(currentSet.begin(), currentSet.end(), entry, comparator), entry); } } void FieldSortedTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) { currentSet = Collection::newInstance(); currentField = field; fieldToTerms.put(field, currentSet); } MapStringCollectionTermVectorEntry FieldSortedTermVectorMapper::getFieldToTerms() { return fieldToTerms; } TermVectorEntryComparator FieldSortedTermVectorMapper::getComparator() { return comparator; } } LucenePlusPlus-rel_3.0.9/src/core/index/FieldsReader.cpp000066400000000000000000000444311456444476200232210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldsReader.h" #include "BufferedIndexInput.h" #include "IndexFileNames.h" #include "FieldsWriter.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "FieldSelector.h" #include "Directory.h" #include "Document.h" #include "Field.h" #include "CompressionTools.h" #include "MiscUtils.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { FieldsReader::FieldsReader(const FieldInfosPtr& fieldInfos, int32_t numTotalDocs, int32_t size, int32_t format, int32_t formatSize, int32_t docStoreOffset, const IndexInputPtr& cloneableFieldsStream, const IndexInputPtr& cloneableIndexStream) { closed = false; isOriginal = false; this->fieldInfos = fieldInfos; this->numTotalDocs = numTotalDocs; this->_size = size; this->format = format; this->formatSize = formatSize; this->docStoreOffset = docStoreOffset; this->cloneableFieldsStream = cloneableFieldsStream; this->cloneableIndexStream = cloneableIndexStream; fieldsStream = boost::dynamic_pointer_cast(cloneableFieldsStream->clone()); indexStream = boost::dynamic_pointer_cast(cloneableIndexStream->clone()); } FieldsReader::FieldsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn) { ConstructReader(d, segment, fn, BufferedIndexInput::BUFFER_SIZE, -1, 0); } FieldsReader::FieldsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn, int32_t readBufferSize, int32_t docStoreOffset, int32_t size) { ConstructReader(d, segment, fn, readBufferSize, docStoreOffset, size); } FieldsReader::~FieldsReader() { } void FieldsReader::ConstructReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn, int32_t readBufferSize, int32_t docStoreOffset, int32_t size) { bool success = false; isOriginal = true; numTotalDocs = 0; _size = 0; closed = false; format = 0; formatSize = 0; docStoreOffset = docStoreOffset; LuceneException finally; try { fieldInfos = fn; cloneableFieldsStream = d->openInput(segment + L"." + IndexFileNames::FIELDS_EXTENSION(), readBufferSize); cloneableIndexStream = d->openInput(segment + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION(), readBufferSize); // First version of fdx did not include a format header, but, the first int will always be 0 in that case format = cloneableIndexStream->readInt(); if (format > FieldsWriter::FORMAT_CURRENT) { boost::throw_exception(CorruptIndexException(L"Incompatible format version: " + StringUtils::toString(format) + L" expected " + StringUtils::toString(FieldsWriter::FORMAT_CURRENT) + L" or lower")); } formatSize = format > FieldsWriter::FORMAT ? 4 : 0; if (format < FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { cloneableFieldsStream->setModifiedUTF8StringsMode(); } fieldsStream = boost::dynamic_pointer_cast(cloneableFieldsStream->clone()); int64_t indexSize = cloneableIndexStream->length() - formatSize; if (docStoreOffset != -1) { // We read only a slice out of this shared fields file this->docStoreOffset = docStoreOffset; this->_size = size; // Verify the file is long enough to hold all of our docs BOOST_ASSERT(((int32_t)((double)indexSize / 8.0)) >= _size + this->docStoreOffset); } else { this->docStoreOffset = 0; this->_size = (int32_t)(indexSize >> 3); } indexStream = boost::dynamic_pointer_cast(cloneableIndexStream->clone()); numTotalDocs = (int32_t)(indexSize >> 3); success = true; } catch (LuceneException& e) { finally = e; } // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception above. // In this case, we want to explicitly close any subset of things that were opened if (!success) { close(); } finally.throwException(); } LuceneObjectPtr FieldsReader::clone(const LuceneObjectPtr& other) { ensureOpen(); return newLucene(fieldInfos, numTotalDocs, _size, format, formatSize, docStoreOffset, cloneableFieldsStream, cloneableIndexStream); } void FieldsReader::ensureOpen() { if (closed) { boost::throw_exception(AlreadyClosedException(L"this FieldsReader is closed")); } } void FieldsReader::close() { if (!closed) { if (fieldsStream) { fieldsStream->close(); } if (isOriginal) { if (cloneableFieldsStream) { cloneableFieldsStream->close(); } if (cloneableIndexStream) { cloneableIndexStream->close(); } } if (indexStream) { indexStream->close(); } fieldsStreamTL.close(); closed = true; } } int32_t FieldsReader::size() { return _size; } void FieldsReader::seekIndex(int32_t docID) { indexStream->seek(formatSize + (docID + docStoreOffset) * 8); } bool FieldsReader::canReadRawDocs() { // Disable reading raw docs in 2.x format, because of the removal of compressed fields in 3.0. // We don't want rawDocs() to decode field bits to figure out if a field was compressed, hence // we enforce ordinary (non-raw) stored field merges for <3.0 indexes. return (format >= FieldsWriter::FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS); } DocumentPtr FieldsReader::doc(int32_t n, const FieldSelectorPtr& fieldSelector) { seekIndex(n); int64_t position = indexStream->readLong(); fieldsStream->seek(position); DocumentPtr doc(newLucene()); int32_t numFields = fieldsStream->readVInt(); for (int32_t i = 0; i < numFields; ++i) { int32_t fieldNumber = fieldsStream->readVInt(); FieldInfoPtr fi = fieldInfos->fieldInfo(fieldNumber); FieldSelector::FieldSelectorResult acceptField = fieldSelector ? fieldSelector->accept(fi->name) : FieldSelector::SELECTOR_LOAD; uint8_t bits = fieldsStream->readByte(); BOOST_ASSERT(bits <= FieldsWriter::FIELD_IS_COMPRESSED + FieldsWriter::FIELD_IS_TOKENIZED + FieldsWriter::FIELD_IS_BINARY); bool compressed = ((bits & FieldsWriter::FIELD_IS_COMPRESSED) != 0); // compressed fields are only allowed in indexes of version <= 2.9 BOOST_ASSERT(compressed ? (format < FieldsWriter::FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS) : true); bool tokenize = ((bits & FieldsWriter::FIELD_IS_TOKENIZED) != 0); bool binary = ((bits & FieldsWriter::FIELD_IS_BINARY) != 0); if (acceptField == FieldSelector::SELECTOR_LOAD) { addField(doc, fi, binary, compressed, tokenize); } else if (acceptField == FieldSelector::SELECTOR_LOAD_AND_BREAK) { addField(doc, fi, binary, compressed, tokenize); break; // Get out of this loop } else if (acceptField == FieldSelector::SELECTOR_LAZY_LOAD) { addFieldLazy(doc, fi, binary, compressed, tokenize); } else if (acceptField == FieldSelector::SELECTOR_SIZE) { skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed)); } else if (acceptField == FieldSelector::SELECTOR_SIZE_AND_BREAK) { addFieldSize(doc, fi, binary, compressed); break; } else { skipField(binary, compressed); } } return doc; } IndexInputPtr FieldsReader::rawDocs(Collection lengths, int32_t startDocID, int32_t numDocs) { seekIndex(startDocID); int64_t startOffset = indexStream->readLong(); int64_t lastOffset = startOffset; int32_t count = 0; while (count < numDocs) { int32_t docID = docStoreOffset + startDocID + count + 1; BOOST_ASSERT(docID <= numTotalDocs); int64_t offset = docID < numTotalDocs ? indexStream->readLong() : fieldsStream->length(); lengths[count++] = (int32_t)(offset - lastOffset); lastOffset = offset; } fieldsStream->seek(startOffset); return fieldsStream; } void FieldsReader::skipField(bool binary, bool compressed) { skipField(binary, compressed, fieldsStream->readVInt()); } void FieldsReader::skipField(bool binary, bool compressed, int32_t toRead) { if (format >= FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed) { fieldsStream->seek(fieldsStream->getFilePointer() + toRead); } else { // We need to skip chars. This will slow us down, but still better fieldsStream->skipChars(toRead); } } void FieldsReader::addFieldLazy(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed, bool tokenize) { if (binary) { int32_t toRead = fieldsStream->readVInt(); int64_t pointer = fieldsStream->getFilePointer(); doc->add(newLucene(shared_from_this(), fi->name, Field::STORE_YES, toRead, pointer, binary, compressed)); fieldsStream->seek(pointer + toRead); } else { Field::Store store = Field::STORE_YES; Field::Index index = Field::toIndex(fi->isIndexed, tokenize); Field::TermVector termVector = Field::toTermVector(fi->storeTermVector, fi->storeOffsetWithTermVector, fi->storePositionWithTermVector); AbstractFieldPtr f; if (compressed) { int32_t toRead = fieldsStream->readVInt(); int64_t pointer = fieldsStream->getFilePointer(); f = newLucene(shared_from_this(), fi->name, store, toRead, pointer, binary, compressed); // skip over the part that we aren't loading fieldsStream->seek(pointer + toRead); f->setOmitNorms(fi->omitNorms); f->setOmitTermFreqAndPositions(fi->omitTermFreqAndPositions); } else { int32_t length = fieldsStream->readVInt(); int64_t pointer = fieldsStream->getFilePointer(); // skip ahead of where we are by the length of what is stored if (format >= FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { fieldsStream->seek(pointer + length); } else { fieldsStream->skipChars(length); } f = newLucene(shared_from_this(), fi->name, store, index, termVector, length, pointer, binary, compressed); f->setOmitNorms(fi->omitNorms); f->setOmitTermFreqAndPositions(fi->omitTermFreqAndPositions); } doc->add(f); } } void FieldsReader::addField(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed, bool tokenize) { // we have a binary stored field, and it may be compressed if (binary) { int32_t toRead = fieldsStream->readVInt(); ByteArray b(ByteArray::newInstance(toRead)); fieldsStream->readBytes(b.get(), 0, b.size()); if (compressed) { doc->add(newLucene(fi->name, uncompress(b), Field::STORE_YES)); } else { doc->add(newLucene(fi->name, b, Field::STORE_YES)); } } else { Field::Store store = Field::STORE_YES; Field::Index index = Field::toIndex(fi->isIndexed, tokenize); Field::TermVector termVector = Field::toTermVector(fi->storeTermVector, fi->storeOffsetWithTermVector, fi->storePositionWithTermVector); AbstractFieldPtr f; if (compressed) { int32_t toRead = fieldsStream->readVInt(); ByteArray b(ByteArray::newInstance(toRead)); fieldsStream->readBytes(b.get(), 0, b.size()); f = newLucene(fi->name, uncompressString(b), store, index, termVector); f->setOmitTermFreqAndPositions(fi->omitTermFreqAndPositions); f->setOmitNorms(fi->omitNorms); } else { f = newLucene(fi->name, fieldsStream->readString(), store, index, termVector); f->setOmitTermFreqAndPositions(fi->omitTermFreqAndPositions); f->setOmitNorms(fi->omitNorms); } doc->add(f); } } int32_t FieldsReader::addFieldSize(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed) { int32_t size = fieldsStream->readVInt(); int32_t bytesize = (binary || compressed) ? size : 2 * size; ByteArray sizebytes(ByteArray::newInstance(4)); sizebytes[0] = (uint8_t)MiscUtils::unsignedShift(bytesize, 24); sizebytes[1] = (uint8_t)MiscUtils::unsignedShift(bytesize, 16); sizebytes[2] = (uint8_t)MiscUtils::unsignedShift(bytesize, 8); sizebytes[3] = (uint8_t)(bytesize); doc->add(newLucene(fi->name, sizebytes, Field::STORE_YES)); return size; } ByteArray FieldsReader::uncompress(ByteArray b) { try { return CompressionTools::decompress(b); } catch (LuceneException& e) { boost::throw_exception(CorruptIndexException(L"field data are in wrong format [" + e.getError() + L"]")); } return ByteArray(); } String FieldsReader::uncompressString(ByteArray b) { try { return CompressionTools::decompressString(b); } catch (LuceneException& e) { boost::throw_exception(CorruptIndexException(L"field data are in wrong format [" + e.getError() + L"]")); } return L""; } LazyField::LazyField(const FieldsReaderPtr& reader, const String& name, Field::Store store, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed) : AbstractField(name, store, Field::INDEX_NO, Field::TERM_VECTOR_NO) { this->_reader = reader; this->toRead = toRead; this->pointer = pointer; this->_isBinary = isBinary; if (isBinary) { binaryLength = toRead; } lazy = true; this->isCompressed = isCompressed; } LazyField::LazyField(const FieldsReaderPtr& reader, const String& name, Field::Store store, Field::Index index, Field::TermVector termVector, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed) : AbstractField(name, store, index, termVector) { this->_reader = reader; this->toRead = toRead; this->pointer = pointer; this->_isBinary = isBinary; if (isBinary) { binaryLength = toRead; } lazy = true; this->isCompressed = isCompressed; } LazyField::~LazyField() { } IndexInputPtr LazyField::getFieldStream() { FieldsReaderPtr reader(_reader); IndexInputPtr localFieldsStream = reader->fieldsStreamTL.get(); if (!localFieldsStream) { localFieldsStream = boost::static_pointer_cast(reader->cloneableFieldsStream->clone()); reader->fieldsStreamTL.set(localFieldsStream); } return localFieldsStream; } ReaderPtr LazyField::readerValue() { FieldsReaderPtr(_reader)->ensureOpen(); return ReaderPtr(); } TokenStreamPtr LazyField::tokenStreamValue() { FieldsReaderPtr(_reader)->ensureOpen(); return TokenStreamPtr(); } String LazyField::stringValue() { FieldsReaderPtr reader(_reader); reader->ensureOpen(); if (_isBinary) { return L""; } else { if (VariantUtils::isNull(fieldsData)) { IndexInputPtr localFieldsStream(getFieldStream()); try { localFieldsStream->seek(pointer); if (isCompressed) { ByteArray b(ByteArray::newInstance(toRead)); localFieldsStream->readBytes(b.get(), 0, b.size()); fieldsData = reader->uncompressString(b); } else { if (reader->format >= FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { ByteArray bytes(ByteArray::newInstance(toRead)); localFieldsStream->readBytes(bytes.get(), 0, toRead); fieldsData = StringUtils::toUnicode(bytes.get(), toRead); } else { // read in chars because we already know the length we need to read CharArray chars(CharArray::newInstance(toRead)); int32_t length = localFieldsStream->readChars(chars.get(), 0, toRead); fieldsData = String(chars.get(), length); } } } catch (IOException& e) { boost::throw_exception(FieldReaderException(e.getError())); } } return VariantUtils::get(fieldsData); } } int64_t LazyField::getPointer() { FieldsReaderPtr(_reader)->ensureOpen(); return pointer; } void LazyField::setPointer(int64_t pointer) { FieldsReaderPtr(_reader)->ensureOpen(); this->pointer = pointer; } int32_t LazyField::getToRead() { FieldsReaderPtr(_reader)->ensureOpen(); return toRead; } void LazyField::setToRead(int32_t toRead) { FieldsReaderPtr(_reader)->ensureOpen(); this->toRead = toRead; } ByteArray LazyField::getBinaryValue(ByteArray result) { FieldsReaderPtr reader(_reader); reader->ensureOpen(); if (_isBinary) { if (VariantUtils::isNull(fieldsData)) { ByteArray b; // Allocate new buffer if result is null or too small if (!result || result.size() < toRead) { b = ByteArray::newInstance(toRead); } else { b = result; } IndexInputPtr localFieldsStream(getFieldStream()); // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a // change for people since they are already handling this exception when getting the document. try { localFieldsStream->seek(pointer); localFieldsStream->readBytes(b.get(), 0, toRead); if (isCompressed) { fieldsData = reader->uncompress(b); } else { fieldsData = b; } } catch (IOException& e) { boost::throw_exception(FieldReaderException(e.getError())); } binaryOffset = 0; binaryLength = toRead; } return VariantUtils::get(fieldsData); } else { return ByteArray(); } } } LucenePlusPlus-rel_3.0.9/src/core/index/FieldsWriter.cpp000066400000000000000000000133171456444476200232720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldsWriter.h" #include "IndexFileNames.h" #include "Directory.h" #include "IndexOutput.h" #include "RAMOutputStream.h" #include "FieldInfo.h" #include "FieldInfos.h" #include "Fieldable.h" #include "Document.h" #include "TestPoint.h" namespace Lucene { const uint8_t FieldsWriter::FIELD_IS_TOKENIZED = 0x1; const uint8_t FieldsWriter::FIELD_IS_BINARY = 0x2; const uint8_t FieldsWriter::FIELD_IS_COMPRESSED = 0x4; const int32_t FieldsWriter::FORMAT = 0; // Original format const int32_t FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1; // Changed strings to UTF8 const int32_t FieldsWriter::FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2; // Lucene 3.0: Removal of compressed fields // NOTE: if you introduce a new format, make it 1 higher than the current one, and always change this if you // switch to a new format! const int32_t FieldsWriter::FORMAT_CURRENT = FieldsWriter::FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; FieldsWriter::FieldsWriter(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn) { fieldInfos = fn; bool success = false; String fieldsName(segment + L"." + IndexFileNames::FIELDS_EXTENSION()); LuceneException finally; try { fieldsStream = d->createOutput(fieldsName); fieldsStream->writeInt(FORMAT_CURRENT); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { try { close(); d->deleteFile(fieldsName); } catch (...) { // Suppress so we keep throwing the original exception } } finally.throwException(); success = false; String indexName(segment + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); try { indexStream = d->createOutput(indexName); indexStream->writeInt(FORMAT_CURRENT); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { try { close(); d->deleteFile(fieldsName); d->deleteFile(indexName); } catch (...) { // Suppress so we keep throwing the original exception } } finally.throwException(); doClose = true; } FieldsWriter::FieldsWriter(const IndexOutputPtr& fdx, const IndexOutputPtr& fdt, const FieldInfosPtr& fn) { fieldInfos = fn; fieldsStream = fdt; indexStream = fdx; doClose = false; } FieldsWriter::~FieldsWriter() { } void FieldsWriter::setFieldsStream(const IndexOutputPtr& stream) { this->fieldsStream = stream; } void FieldsWriter::flushDocument(int32_t numStoredFields, const RAMOutputStreamPtr& buffer) { TestScope testScope(L"FieldsWriter", L"flushDocument"); indexStream->writeLong(fieldsStream->getFilePointer()); fieldsStream->writeVInt(numStoredFields); buffer->writeTo(fieldsStream); } void FieldsWriter::skipDocument() { indexStream->writeLong(fieldsStream->getFilePointer()); fieldsStream->writeVInt(0); } void FieldsWriter::flush() { indexStream->flush(); fieldsStream->flush(); } void FieldsWriter::close() { if (doClose) { LuceneException finally; if (fieldsStream) { try { fieldsStream->close(); } catch (LuceneException& e) { finally = e; } fieldsStream.reset(); } if (indexStream) { try { indexStream->close(); } catch (LuceneException& e) { if (finally.isNull()) { // throw first exception hit finally = e; } } indexStream.reset(); } finally.throwException(); } } void FieldsWriter::writeField(const FieldInfoPtr& fi, const FieldablePtr& field) { fieldsStream->writeVInt(fi->number); uint8_t bits = 0; if (field->isTokenized()) { bits |= FIELD_IS_TOKENIZED; } if (field->isBinary()) { bits |= FIELD_IS_BINARY; } fieldsStream->writeByte(bits); if (field->isBinary()) { ByteArray data(field->getBinaryValue()); int32_t len = field->getBinaryLength(); int32_t offset = field->getBinaryOffset(); fieldsStream->writeVInt(len); fieldsStream->writeBytes(data.get(), offset, len); } else { fieldsStream->writeString(field->stringValue()); } } void FieldsWriter::addRawDocuments(const IndexInputPtr& stream, Collection lengths, int32_t numDocs) { int64_t position = fieldsStream->getFilePointer(); int64_t start = position; for (int32_t i = 0; i < numDocs; ++i) { indexStream->writeLong(position); position += lengths[i]; } fieldsStream->copyBytes(stream, position - start); BOOST_ASSERT(fieldsStream->getFilePointer() == position); } void FieldsWriter::addDocument(const DocumentPtr& doc) { indexStream->writeLong(fieldsStream->getFilePointer()); int32_t storedCount = 0; Collection fields(doc->getFields()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if ((*field)->isStored()) { ++storedCount; } } fieldsStream->writeVInt(storedCount); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if ((*field)->isStored()) { writeField(fieldInfos->fieldInfo((*field)->name()), *field); } } } } LucenePlusPlus-rel_3.0.9/src/core/index/FilterIndexReader.cpp000066400000000000000000000137131456444476200242270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FilterIndexReader.h" #include "FieldCache.h" namespace Lucene { FilterIndexReader::FilterIndexReader(const IndexReaderPtr& in) { this->in = in; } FilterIndexReader::~FilterIndexReader() { } DirectoryPtr FilterIndexReader::directory() { return in->directory(); } Collection FilterIndexReader::getTermFreqVectors(int32_t docNumber) { ensureOpen(); return in->getTermFreqVectors(docNumber); } TermFreqVectorPtr FilterIndexReader::getTermFreqVector(int32_t docNumber, const String& field) { ensureOpen(); return in->getTermFreqVector(docNumber, field); } void FilterIndexReader::getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper) { ensureOpen(); in->getTermFreqVector(docNumber, field, mapper); } void FilterIndexReader::getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper) { ensureOpen(); in->getTermFreqVector(docNumber, mapper); } int32_t FilterIndexReader::numDocs() { // Don't call ensureOpen() here (it could affect performance) return in->numDocs(); } int32_t FilterIndexReader::maxDoc() { // Don't call ensureOpen() here (it could affect performance) return in->maxDoc(); } DocumentPtr FilterIndexReader::document(int32_t n, const FieldSelectorPtr& fieldSelector) { ensureOpen(); return in->document(n, fieldSelector); } bool FilterIndexReader::isDeleted(int32_t n) { // Don't call ensureOpen() here (it could affect performance) return in->isDeleted(n); } bool FilterIndexReader::hasDeletions() { // Don't call ensureOpen() here (it could affect performance) return in->hasDeletions(); } void FilterIndexReader::doUndeleteAll() { in->undeleteAll(); } bool FilterIndexReader::hasNorms(const String& field) { ensureOpen(); return in->hasNorms(field); } ByteArray FilterIndexReader::norms(const String& field) { ensureOpen(); return in->norms(field); } void FilterIndexReader::norms(const String& field, ByteArray norms, int32_t offset) { ensureOpen(); in->norms(field, norms, offset); } void FilterIndexReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { in->setNorm(doc, field, value); } TermEnumPtr FilterIndexReader::terms() { ensureOpen(); return in->terms(); } TermEnumPtr FilterIndexReader::terms(const TermPtr& t) { ensureOpen(); return in->terms(t); } int32_t FilterIndexReader::docFreq(const TermPtr& t) { ensureOpen(); return in->docFreq(t); } TermDocsPtr FilterIndexReader::termDocs() { ensureOpen(); return in->termDocs(); } TermDocsPtr FilterIndexReader::termDocs(const TermPtr& term) { ensureOpen(); return in->termDocs(term); } TermPositionsPtr FilterIndexReader::termPositions() { ensureOpen(); return in->termPositions(); } void FilterIndexReader::doDelete(int32_t docNum) { in->deleteDocument(docNum); } void FilterIndexReader::doCommit(MapStringString commitUserData) { in->commit(commitUserData); } void FilterIndexReader::doClose() { in->close(); // NOTE: only needed in case someone had asked for FieldCache for top-level reader (which is // generally not a good idea) FieldCache::DEFAULT()->purge(shared_from_this()); } HashSet FilterIndexReader::getFieldNames(FieldOption fieldOption) { ensureOpen(); return in->getFieldNames(fieldOption); } int64_t FilterIndexReader::getVersion() { ensureOpen(); return in->getVersion(); } bool FilterIndexReader::isCurrent() { ensureOpen(); return in->isCurrent(); } bool FilterIndexReader::isOptimized() { ensureOpen(); return in->isOptimized(); } Collection FilterIndexReader::getSequentialSubReaders() { return in->getSequentialSubReaders(); } LuceneObjectPtr FilterIndexReader::getFieldCacheKey() { return in->getFieldCacheKey(); } LuceneObjectPtr FilterIndexReader::getDeletesCacheKey() { return in->getDeletesCacheKey(); } FilterTermDocs::FilterTermDocs(const TermDocsPtr& in) { this->in = in; } FilterTermDocs::~FilterTermDocs() { } void FilterTermDocs::seek(const TermPtr& term) { in->seek(term); } void FilterTermDocs::seek(const TermEnumPtr& termEnum) { in->seek(termEnum); } int32_t FilterTermDocs::doc() { return in->doc(); } int32_t FilterTermDocs::freq() { return in->freq(); } bool FilterTermDocs::next() { return in->next(); } int32_t FilterTermDocs::read(Collection& docs, Collection& freqs) { return in->read(docs, freqs); } bool FilterTermDocs::skipTo(int32_t target) { return in->skipTo(target); } void FilterTermDocs::close() { in->close(); } FilterTermPositions::FilterTermPositions(const TermPositionsPtr& in) : FilterTermDocs(in) { } FilterTermPositions::~FilterTermPositions() { } int32_t FilterTermPositions::nextPosition() { return boost::static_pointer_cast(in)->nextPosition(); } int32_t FilterTermPositions::getPayloadLength() { return boost::static_pointer_cast(in)->getPayloadLength(); } ByteArray FilterTermPositions::getPayload(ByteArray data, int32_t offset) { return boost::static_pointer_cast(in)->getPayload(data, offset); } bool FilterTermPositions::isPayloadAvailable() { return boost::static_pointer_cast(in)->isPayloadAvailable(); } FilterTermEnum::FilterTermEnum(const TermEnumPtr& in) { this->in = in; } FilterTermEnum::~FilterTermEnum() { } bool FilterTermEnum::next() { return in->next(); } TermPtr FilterTermEnum::term() { return in->term(); } int32_t FilterTermEnum::docFreq() { return in->docFreq(); } void FilterTermEnum::close() { in->close(); } } LucenePlusPlus-rel_3.0.9/src/core/index/FormatPostingsDocsConsumer.cpp000066400000000000000000000007501456444476200261700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FormatPostingsDocsConsumer.h" namespace Lucene { FormatPostingsDocsConsumer::~FormatPostingsDocsConsumer() { } } LucenePlusPlus-rel_3.0.9/src/core/index/FormatPostingsDocsWriter.cpp000066400000000000000000000070721456444476200256550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FormatPostingsDocsWriter.h" #include "FormatPostingsTermsWriter.h" #include "FormatPostingsFieldsWriter.h" #include "FormatPostingsPositionsWriter.h" #include "IndexFileNames.h" #include "SegmentWriteState.h" #include "Directory.h" #include "TermInfosWriter.h" #include "DefaultSkipListWriter.h" #include "FieldInfo.h" #include "IndexOutput.h" #include "TermInfo.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { FormatPostingsDocsWriter::FormatPostingsDocsWriter(const SegmentWriteStatePtr& state, const FormatPostingsTermsWriterPtr& parent) { this->lastDocID = 0; this->df = 0; this->omitTermFreqAndPositions = false; this->storePayloads = false; this->freqStart = 0; FormatPostingsFieldsWriterPtr parentPostings(parent->_parent); this->_parent = parent; this->state = state; String fileName(IndexFileNames::segmentFileName(parentPostings->segment, IndexFileNames::FREQ_EXTENSION())); state->flushedFiles.add(fileName); out = parentPostings->dir->createOutput(fileName); totalNumDocs = parentPostings->totalNumDocs; skipInterval = parentPostings->termsOut->skipInterval; skipListWriter = parentPostings->skipListWriter; skipListWriter->setFreqOutput(out); termInfo = newLucene(); utf8 = newLucene(); } FormatPostingsDocsWriter::~FormatPostingsDocsWriter() { } void FormatPostingsDocsWriter::initialize() { posWriter = newLucene(state, shared_from_this()); } void FormatPostingsDocsWriter::setField(const FieldInfoPtr& fieldInfo) { this->fieldInfo = fieldInfo; omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; storePayloads = fieldInfo->storePayloads; posWriter->setField(fieldInfo); } FormatPostingsPositionsConsumerPtr FormatPostingsDocsWriter::addDoc(int32_t docID, int32_t termDocFreq) { int32_t delta = docID - lastDocID; if (docID < 0 || (df > 0 && delta <= 0)) { boost::throw_exception(CorruptIndexException(L"docs out of order (" + StringUtils::toString(docID) + L" <= " + StringUtils::toString(lastDocID) + L" )")); } if ((++df % skipInterval) == 0) { skipListWriter->setSkipData(lastDocID, storePayloads, posWriter->lastPayloadLength); skipListWriter->bufferSkip(df); } BOOST_ASSERT(docID < totalNumDocs); lastDocID = docID; if (omitTermFreqAndPositions) { out->writeVInt(delta); } else if (termDocFreq == 1) { out->writeVInt((delta << 1) | 1); } else { out->writeVInt(delta << 1); out->writeVInt(termDocFreq); } return posWriter; } void FormatPostingsDocsWriter::finish() { int64_t skipPointer = skipListWriter->writeSkip(out); FormatPostingsTermsWriterPtr parent(_parent); termInfo->set(df, parent->freqStart, parent->proxStart, (int32_t)(skipPointer - parent->freqStart)); StringUtils::toUTF8(parent->currentTerm.get() + parent->currentTermStart, parent->currentTerm.size(), utf8); if (df > 0) { parent->termsOut->add(fieldInfo->number, utf8->result, utf8->length, termInfo); } lastDocID = 0; df = 0; } void FormatPostingsDocsWriter::close() { out->close(); posWriter->close(); } } LucenePlusPlus-rel_3.0.9/src/core/index/FormatPostingsFieldsConsumer.cpp000066400000000000000000000007561456444476200265140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FormatPostingsFieldsConsumer.h" namespace Lucene { FormatPostingsFieldsConsumer::~FormatPostingsFieldsConsumer() { } } LucenePlusPlus-rel_3.0.9/src/core/index/FormatPostingsFieldsWriter.cpp000066400000000000000000000033341456444476200261700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FormatPostingsFieldsWriter.h" #include "FormatPostingsTermsWriter.h" #include "SegmentWriteState.h" #include "TermInfosWriter.h" #include "IndexFileNames.h" #include "DefaultSkipListWriter.h" namespace Lucene { FormatPostingsFieldsWriter::FormatPostingsFieldsWriter(const SegmentWriteStatePtr& state, const FieldInfosPtr& fieldInfos) { dir = state->directory; segment = state->segmentName; totalNumDocs = state->numDocs; this->state = state; this->fieldInfos = fieldInfos; termsOut = newLucene(dir, segment, fieldInfos, state->termIndexInterval); skipListWriter = newLucene(termsOut->skipInterval, termsOut->maxSkipLevels, totalNumDocs, IndexOutputPtr(), IndexOutputPtr()); state->flushedFiles.add(state->segmentFileName(IndexFileNames::TERMS_EXTENSION())); state->flushedFiles.add(state->segmentFileName(IndexFileNames::TERMS_INDEX_EXTENSION())); } FormatPostingsFieldsWriter::~FormatPostingsFieldsWriter() { } void FormatPostingsFieldsWriter::initialize() { termsWriter = newLucene(state, shared_from_this()); } FormatPostingsTermsConsumerPtr FormatPostingsFieldsWriter::addField(const FieldInfoPtr& field) { termsWriter->setField(field); return termsWriter; } void FormatPostingsFieldsWriter::finish() { termsOut->close(); termsWriter->close(); } } LucenePlusPlus-rel_3.0.9/src/core/index/FormatPostingsPositionsConsumer.cpp000066400000000000000000000007671456444476200272770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FormatPostingsPositionsConsumer.h" namespace Lucene { FormatPostingsPositionsConsumer::~FormatPostingsPositionsConsumer() { } } LucenePlusPlus-rel_3.0.9/src/core/index/FormatPostingsPositionsWriter.cpp000066400000000000000000000054371456444476200267570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FormatPostingsPositionsWriter.h" #include "FormatPostingsDocsWriter.h" #include "FormatPostingsTermsWriter.h" #include "FormatPostingsFieldsWriter.h" #include "IndexFileNames.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "SegmentWriteState.h" #include "Directory.h" #include "DefaultSkipListWriter.h" #include "IndexOutput.h" namespace Lucene { FormatPostingsPositionsWriter::FormatPostingsPositionsWriter(const SegmentWriteStatePtr& state, const FormatPostingsDocsWriterPtr& parent) { lastPosition = 0; storePayloads = false; lastPayloadLength = -1; this->_parent = parent; FormatPostingsFieldsWriterPtr parentFieldsWriter(FormatPostingsTermsWriterPtr(parent->_parent)->_parent); omitTermFreqAndPositions = parent->omitTermFreqAndPositions; if (parentFieldsWriter->fieldInfos->hasProx()) { // At least one field does not omit TF, so create the prox file String fileName(IndexFileNames::segmentFileName(parentFieldsWriter->segment, IndexFileNames::PROX_EXTENSION())); state->flushedFiles.add(fileName); out = parentFieldsWriter->dir->createOutput(fileName); parent->skipListWriter->setProxOutput(out); } else { // Every field omits TF so we will write no prox file } } FormatPostingsPositionsWriter::~FormatPostingsPositionsWriter() { } void FormatPostingsPositionsWriter::addPosition(int32_t position, ByteArray payload, int32_t payloadOffset, int32_t payloadLength) { BOOST_ASSERT(!omitTermFreqAndPositions); BOOST_ASSERT(out); int32_t delta = position - lastPosition; lastPosition = position; if (storePayloads) { if (payloadLength != lastPayloadLength) { lastPayloadLength = payloadLength; out->writeVInt((delta << 1) | 1); out->writeVInt(payloadLength); } else { out->writeVInt(delta << 1); } if (payloadLength > 0) { out->writeBytes(payload.get(), payloadLength); } } else { out->writeVInt(delta); } } void FormatPostingsPositionsWriter::setField(const FieldInfoPtr& fieldInfo) { omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; storePayloads = omitTermFreqAndPositions ? false : fieldInfo->storePayloads; } void FormatPostingsPositionsWriter::finish() { lastPosition = 0; lastPayloadLength = -1; } void FormatPostingsPositionsWriter::close() { if (out) { out->close(); } } } LucenePlusPlus-rel_3.0.9/src/core/index/FormatPostingsTermsConsumer.cpp000066400000000000000000000020071456444476200263670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FormatPostingsTermsConsumer.h" #include "UTF8Stream.h" #include "MiscUtils.h" namespace Lucene { FormatPostingsTermsConsumer::~FormatPostingsTermsConsumer() { } FormatPostingsDocsConsumerPtr FormatPostingsTermsConsumer::addTerm(const String& text) { int32_t len = text.length(); if (!termBuffer) { termBuffer = CharArray::newInstance(MiscUtils::getNextSize(len + 1)); } if (termBuffer.size() < len + 1) { termBuffer.resize(MiscUtils::getNextSize(len + 1)); } MiscUtils::arrayCopy(text.begin(), 0, termBuffer.get(), 0, len); termBuffer[len] = UTF8Base::UNICODE_TERMINATOR; return addTerm(termBuffer, 0); } } LucenePlusPlus-rel_3.0.9/src/core/index/FormatPostingsTermsWriter.cpp000066400000000000000000000033441456444476200260550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FormatPostingsTermsWriter.h" #include "FormatPostingsDocsWriter.h" #include "FormatPostingsFieldsWriter.h" #include "FormatPostingsPositionsWriter.h" #include "IndexOutput.h" #include "DefaultSkipListWriter.h" namespace Lucene { FormatPostingsTermsWriter::FormatPostingsTermsWriter(const SegmentWriteStatePtr& state, const FormatPostingsFieldsWriterPtr& parent) { currentTermStart = 0; freqStart = 0; proxStart = 0; this->_parent = parent; this->state = state; termsOut = parent->termsOut; } FormatPostingsTermsWriter::~FormatPostingsTermsWriter() { } void FormatPostingsTermsWriter::initialize() { docsWriter = newLucene(state, shared_from_this()); } void FormatPostingsTermsWriter::setField(const FieldInfoPtr& fieldInfo) { this->fieldInfo = fieldInfo; docsWriter->setField(fieldInfo); } FormatPostingsDocsConsumerPtr FormatPostingsTermsWriter::addTerm(CharArray text, int32_t start) { currentTerm = text; currentTermStart = start; freqStart = docsWriter->out->getFilePointer(); if (docsWriter->posWriter->out) { proxStart = docsWriter->posWriter->out->getFilePointer(); } FormatPostingsFieldsWriterPtr(_parent)->skipListWriter->resetSkip(); return docsWriter; } void FormatPostingsTermsWriter::finish() { } void FormatPostingsTermsWriter::close() { docsWriter->close(); } } LucenePlusPlus-rel_3.0.9/src/core/index/FreqProxFieldMergeState.cpp000066400000000000000000000055711456444476200253650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FreqProxFieldMergeState.h" #include "FreqProxTermsWriterPerField.h" #include "FreqProxTermsWriterPerThread.h" #include "FreqProxTermsWriter.h" #include "TermsHashPerThread.h" #include "TermsHashPerField.h" #include "ByteSliceReader.h" #include "DocumentsWriter.h" #include "CharBlockPool.h" #include "FieldInfo.h" #include "MiscUtils.h" namespace Lucene { FreqProxFieldMergeState::FreqProxFieldMergeState(const FreqProxTermsWriterPerFieldPtr& field) { this->numPostings = 0; this->textOffset = 0; this->docID = 0; this->termFreq = 0; this->postingUpto = -1; this->freq = newLucene(); this->prox = newLucene(); this->field = field; this->charPool = TermsHashPerThreadPtr(FreqProxTermsWriterPerThreadPtr(field->_perThread)->_termsHashPerThread)->charPool; TermsHashPerFieldPtr termsHashPerField(field->_termsHashPerField); this->numPostings = termsHashPerField->numPostings; this->postings = termsHashPerField->sortPostings(); } FreqProxFieldMergeState::~FreqProxFieldMergeState() { } bool FreqProxFieldMergeState::nextTerm() { ++postingUpto; if (postingUpto == numPostings) { return false; } p = boost::static_pointer_cast(postings[postingUpto]); docID = 0; text = charPool->buffers[p->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT]; textOffset = (p->textStart & DocumentsWriter::CHAR_BLOCK_MASK); TermsHashPerFieldPtr termsHashPerField(field->_termsHashPerField); termsHashPerField->initReader(freq, p, 0); if (!field->fieldInfo->omitTermFreqAndPositions) { termsHashPerField->initReader(prox, p, 1); } // Should always be true bool result = nextDoc(); BOOST_ASSERT(result); return true; } bool FreqProxFieldMergeState::nextDoc() { if (freq->eof()) { if (p->lastDocCode != -1) { // Return last doc docID = p->lastDocID; if (!field->omitTermFreqAndPositions) { termFreq = p->docFreq; } p->lastDocCode = -1; return true; } else { // EOF return false; } } int32_t code = freq->readVInt(); if (field->omitTermFreqAndPositions) { docID += code; } else { docID += MiscUtils::unsignedShift(code, 1); if ((code & 1) != 0) { termFreq = 1; } else { termFreq = freq->readVInt(); } } BOOST_ASSERT(docID != p->lastDocID); return true; } } LucenePlusPlus-rel_3.0.9/src/core/index/FreqProxTermsWriter.cpp000066400000000000000000000244261456444476200246500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FreqProxTermsWriter.h" #include "FreqProxTermsWriterPerThread.h" #include "FreqProxTermsWriterPerField.h" #include "FreqProxFieldMergeState.h" #include "TermsHashConsumerPerThread.h" #include "TermsHashConsumerPerField.h" #include "TermsHashPerField.h" #include "TermsHashPerThread.h" #include "FormatPostingsDocsConsumer.h" #include "FormatPostingsFieldsConsumer.h" #include "FormatPostingsFieldsWriter.h" #include "FormatPostingsTermsConsumer.h" #include "FormatPostingsPositionsConsumer.h" #include "FieldInfo.h" #include "ByteSliceReader.h" #include "RawPostingList.h" #include "DocumentsWriter.h" #include "UTF8Stream.h" #include "TestPoint.h" namespace Lucene { FreqProxTermsWriter::~FreqProxTermsWriter() { } TermsHashConsumerPerThreadPtr FreqProxTermsWriter::addThread(const TermsHashPerThreadPtr& perThread) { return newLucene(perThread); } void FreqProxTermsWriter::createPostings(Collection postings, int32_t start, int32_t count) { int32_t end = start + count; for (int32_t i = start; i < end; ++i) { postings[i] = newLucene(); } } int32_t FreqProxTermsWriter::compareText(const wchar_t* text1, int32_t pos1, const wchar_t* text2, int32_t pos2) { while (true) { wchar_t c1 = text1[pos1++]; wchar_t c2 = text2[pos2++]; if (c1 != c2) { if (c2 == UTF8Base::UNICODE_TERMINATOR) { return 1; } else if (c1 == UTF8Base::UNICODE_TERMINATOR) { return -1; } else { return (c1 - c2); } } else if (c1 == UTF8Base::UNICODE_TERMINATOR) { return 0; } } } void FreqProxTermsWriter::closeDocStore(const SegmentWriteStatePtr& state) { } void FreqProxTermsWriter::abort() { } void FreqProxTermsWriter::flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) { // Gather all FieldData's that have postings, across all ThreadStates Collection allFields(Collection::newInstance()); for (MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end(); ++perField) { FreqProxTermsWriterPerFieldPtr freqProxPerField(boost::static_pointer_cast(*perField)); if (TermsHashPerFieldPtr(freqProxPerField->_termsHashPerField)->numPostings > 0) { allFields.add(freqProxPerField); } } } // Sort by field name std::sort(allFields.begin(), allFields.end(), luceneCompare()); int32_t numAllFields = allFields.size(); FormatPostingsFieldsConsumerPtr consumer(newLucene(state, fieldInfos)); // Current writer chain: // FormatPostingsFieldsConsumer // -> IMPL: FormatPostingsFieldsWriter // -> FormatPostingsTermsConsumer // -> IMPL: FormatPostingsTermsWriter // -> FormatPostingsDocConsumer // -> IMPL: FormatPostingsDocWriter // -> FormatPostingsPositionsConsumer // -> IMPL: FormatPostingsPositionsWriter int32_t start = 0; while (start < numAllFields) { FieldInfoPtr fieldInfo(allFields[start]->fieldInfo); String fieldName(fieldInfo->name); int32_t end = start + 1; while (end < numAllFields && allFields[end]->fieldInfo->name == fieldName) { ++end; } Collection fields(Collection::newInstance(end - start)); for (int32_t i = start; i < end; ++i) { fields[i - start] = allFields[i]; // Aggregate the storePayload as seen by the same field across multiple threads if (fields[i - start]->hasPayloads) { fieldInfo->storePayloads = true; } } // If this field has postings then add them to the segment appendPostings(fields, consumer); for (int32_t i = 0; i < fields.size(); ++i) { TermsHashPerFieldPtr perField(fields[i]->_termsHashPerField); int32_t numPostings = perField->numPostings; perField->reset(); perField->shrinkHash(numPostings); fields[i]->reset(); } start = end; } for (MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { TermsHashPerThreadPtr(boost::static_pointer_cast(entry->first)->_termsHashPerThread)->reset(true); } consumer->finish(); } void FreqProxTermsWriter::appendPostings(Collection fields, const FormatPostingsFieldsConsumerPtr& consumer) { TestScope testScope(L"FreqProxTermsWriter", L"appendPostings"); int32_t numFields = fields.size(); Collection mergeStates(Collection::newInstance(numFields)); for (int32_t i = 0; i < numFields; ++i) { FreqProxFieldMergeStatePtr fms(newLucene(fields[i])); mergeStates[i] = fms; BOOST_ASSERT(fms->field->fieldInfo == fields[0]->fieldInfo); // Should always be true bool result = fms->nextTerm(); BOOST_ASSERT(result); } FormatPostingsTermsConsumerPtr termsConsumer(consumer->addField(fields[0]->fieldInfo)); Collection termStates(Collection::newInstance(numFields)); bool currentFieldOmitTermFreqAndPositions = fields[0]->fieldInfo->omitTermFreqAndPositions; while (numFields > 0) { // Get the next term to merge termStates[0] = mergeStates[0]; int32_t numToMerge = 1; for (int32_t i = 1; i < numFields; ++i) { CharArray text = mergeStates[i]->text; int32_t textOffset = mergeStates[i]->textOffset; int32_t cmp = compareText(text.get(), textOffset, termStates[0]->text.get(), termStates[0]->textOffset); if (cmp < 0) { termStates[0] = mergeStates[i]; numToMerge = 1; } else if (cmp == 0) { termStates[numToMerge++] = mergeStates[i]; } } FormatPostingsDocsConsumerPtr docConsumer(termsConsumer->addTerm(termStates[0]->text, termStates[0]->textOffset)); // Now termStates has numToMerge FieldMergeStates which all share the same term. Now we must // interleave the docID streams. while (numToMerge > 0) { FreqProxFieldMergeStatePtr minState(termStates[0]); for (int32_t i = 1; i < numToMerge; ++i) { if (termStates[i]->docID < minState->docID) { minState = termStates[i]; } } int32_t termDocFreq = minState->termFreq; FormatPostingsPositionsConsumerPtr posConsumer(docConsumer->addDoc(minState->docID, termDocFreq)); ByteSliceReaderPtr prox(minState->prox); // Carefully copy over the prox + payload info, changing the format to match Lucene's segment format. if (!currentFieldOmitTermFreqAndPositions) { // omitTermFreqAndPositions == false so we do write positions & payload int32_t position = 0; for (int32_t j = 0; j < termDocFreq; ++j) { int32_t code = prox->readVInt(); position += (code >> 1); int32_t payloadLength; if ((code & 1) != 0) { // This position has a payload payloadLength = prox->readVInt(); if (!payloadBuffer) { payloadBuffer = ByteArray::newInstance(payloadLength); } if (payloadBuffer.size() < payloadLength) { payloadBuffer.resize(payloadLength); } prox->readBytes(payloadBuffer.get(), 0, payloadLength); } else { payloadLength = 0; } posConsumer->addPosition(position, payloadBuffer, 0, payloadLength); } posConsumer->finish(); } if (!minState->nextDoc()) { // Remove from termStates int32_t upto = 0; for (int32_t i = 0; i < numToMerge; ++i) { if (termStates[i] != minState) { termStates[upto++] = termStates[i]; } } --numToMerge; BOOST_ASSERT(upto == numToMerge); // Advance this state to the next term if (!minState->nextTerm()) { // OK, no more terms, so remove from mergeStates as well upto = 0; for (int32_t i = 0; i < numFields; ++i) { if (mergeStates[i] != minState) { mergeStates[upto++] = mergeStates[i]; } } --numFields; BOOST_ASSERT(upto == numFields); } } } docConsumer->finish(); } termsConsumer->finish(); } int32_t FreqProxTermsWriter::bytesPerPosting() { return RawPostingList::BYTES_SIZE + 4 * DocumentsWriter::INT_NUM_BYTE; } FreqProxTermsWriterPostingList::FreqProxTermsWriterPostingList() { docFreq = 0; lastDocID = 0; lastDocCode = 0; lastPosition = 0; } FreqProxTermsWriterPostingList::~FreqProxTermsWriterPostingList() { } } LucenePlusPlus-rel_3.0.9/src/core/index/FreqProxTermsWriterPerField.cpp000066400000000000000000000131561456444476200262610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FreqProxTermsWriterPerField.h" #include "FreqProxTermsWriter.h" #include "FieldInfo.h" #include "Fieldable.h" #include "TermsHashPerField.h" #include "FieldInvertState.h" #include "AttributeSource.h" #include "Payload.h" #include "PayloadAttribute.h" #include "DocumentsWriter.h" #include "RawPostingList.h" namespace Lucene { FreqProxTermsWriterPerField::FreqProxTermsWriterPerField(const TermsHashPerFieldPtr& termsHashPerField, const FreqProxTermsWriterPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo) { this->hasPayloads = false; this->_termsHashPerField = termsHashPerField; this->_perThread = perThread; this->fieldInfo = fieldInfo; docState = termsHashPerField->docState; fieldState = termsHashPerField->fieldState; omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; } FreqProxTermsWriterPerField::~FreqProxTermsWriterPerField() { } int32_t FreqProxTermsWriterPerField::getStreamCount() { return fieldInfo->omitTermFreqAndPositions ? 1 : 2; } void FreqProxTermsWriterPerField::finish() { } void FreqProxTermsWriterPerField::skippingLongTerm() { } int32_t FreqProxTermsWriterPerField::compareTo(const LuceneObjectPtr& other) { return fieldInfo->name.compare(boost::static_pointer_cast(other)->fieldInfo->name); } void FreqProxTermsWriterPerField::reset() { // Record, up front, whether our in-RAM format will be with or without term freqs omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; payloadAttribute.reset(); } bool FreqProxTermsWriterPerField::start(Collection fields, int32_t count) { for (int32_t i = 0; i < count; ++i) { if (fields[i]->isIndexed()) { return true; } } return false; } void FreqProxTermsWriterPerField::start(const FieldablePtr& field) { if (fieldState->attributeSource->hasAttribute()) { payloadAttribute = fieldState->attributeSource->getAttribute(); } else { payloadAttribute.reset(); } } void FreqProxTermsWriterPerField::writeProx(const FreqProxTermsWriterPostingListPtr& p, int32_t proxCode) { PayloadPtr payload; if (payloadAttribute) { payload = payloadAttribute->getPayload(); } TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); if (payload && payload->length() > 0) { termsHashPerField->writeVInt(1, (proxCode << 1) | 1); termsHashPerField->writeVInt(1, payload->length()); termsHashPerField->writeBytes(1, payload->getData().get(), payload->getOffset(), payload->length()); hasPayloads = true; } else { termsHashPerField->writeVInt(1, proxCode << 1); } p->lastPosition = fieldState->position; } void FreqProxTermsWriterPerField::newTerm(const RawPostingListPtr& p) { // First time we're seeing this term since the last flush BOOST_ASSERT(docState->testPoint(L"FreqProxTermsWriterPerField.newTerm start")); FreqProxTermsWriterPostingListPtr newPostingList(boost::static_pointer_cast(p)); newPostingList->lastDocID = docState->docID; if (omitTermFreqAndPositions) { newPostingList->lastDocCode = docState->docID; } else { newPostingList->lastDocCode = docState->docID << 1; newPostingList->docFreq = 1; writeProx(newPostingList, fieldState->position); } } void FreqProxTermsWriterPerField::addTerm(const RawPostingListPtr& p) { BOOST_ASSERT(docState->testPoint(L"FreqProxTermsWriterPerField.addTerm start")); FreqProxTermsWriterPostingListPtr addPostingList(boost::static_pointer_cast(p)); BOOST_ASSERT(omitTermFreqAndPositions || addPostingList->docFreq > 0); TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); if (omitTermFreqAndPositions) { if (docState->docID != addPostingList->lastDocID) { BOOST_ASSERT(docState->docID > addPostingList->lastDocID); termsHashPerField->writeVInt(0, addPostingList->lastDocCode); addPostingList->lastDocCode = docState->docID - addPostingList->lastDocID; addPostingList->lastDocID = docState->docID; } } else { if (docState->docID != addPostingList->lastDocID) { BOOST_ASSERT(docState->docID > addPostingList->lastDocID); // Term not yet seen in the current doc but previously seen in other doc(s) since // the last flush // Now that we know doc freq for previous doc, write it & lastDocCode if (addPostingList->docFreq == 1) { termsHashPerField->writeVInt(0, addPostingList->lastDocCode | 1); } else { termsHashPerField->writeVInt(0, addPostingList->lastDocCode); termsHashPerField->writeVInt(0, addPostingList->docFreq); } addPostingList->docFreq = 1; addPostingList->lastDocCode = (docState->docID - addPostingList->lastDocID) << 1; addPostingList->lastDocID = docState->docID; writeProx(addPostingList, fieldState->position); } else { ++addPostingList->docFreq; writeProx(addPostingList, fieldState->position - addPostingList->lastPosition); } } } void FreqProxTermsWriterPerField::abort() { } } LucenePlusPlus-rel_3.0.9/src/core/index/FreqProxTermsWriterPerThread.cpp000066400000000000000000000022501456444476200264360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FreqProxTermsWriterPerThread.h" #include "FreqProxTermsWriterPerField.h" #include "TermsHashPerThread.h" namespace Lucene { FreqProxTermsWriterPerThread::FreqProxTermsWriterPerThread(const TermsHashPerThreadPtr& perThread) { docState = perThread->docState; _termsHashPerThread = perThread; } FreqProxTermsWriterPerThread::~FreqProxTermsWriterPerThread() { } TermsHashConsumerPerFieldPtr FreqProxTermsWriterPerThread::addField(const TermsHashPerFieldPtr& termsHashPerField, const FieldInfoPtr& fieldInfo) { return newLucene(termsHashPerField, shared_from_this(), fieldInfo); } void FreqProxTermsWriterPerThread::startDocument() { } DocWriterPtr FreqProxTermsWriterPerThread::finishDocument() { return DocWriterPtr(); } void FreqProxTermsWriterPerThread::abort() { } } LucenePlusPlus-rel_3.0.9/src/core/index/IndexCommit.cpp000066400000000000000000000020231456444476200230770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "IndexCommit.h" #include "Directory.h" namespace Lucene { IndexCommit::~IndexCommit() { } bool IndexCommit::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } IndexCommitPtr otherCommit(boost::dynamic_pointer_cast(other)); if (!otherCommit) { return false; } return (otherCommit->getDirectory()->equals(getDirectory()) && otherCommit->getVersion() == getVersion()); } int32_t IndexCommit::hashCode() { return (getDirectory()->hashCode() + (int32_t)getVersion()); } int64_t IndexCommit::getTimestamp() { return getDirectory()->fileModified(getSegmentsFileName()); } } LucenePlusPlus-rel_3.0.9/src/core/index/IndexDeletionPolicy.cpp000066400000000000000000000010031456444476200245670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "IndexDeletionPolicy.h" namespace Lucene { IndexDeletionPolicy::IndexDeletionPolicy() { } IndexDeletionPolicy::~IndexDeletionPolicy() { } } LucenePlusPlus-rel_3.0.9/src/core/index/IndexFileDeleter.cpp000066400000000000000000000413611456444476200240430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "IndexFileDeleter.h" #include "IndexFileNameFilter.h" #include "IndexFileNames.h" #include "IndexDeletionPolicy.h" #include "SegmentInfos.h" #include "SegmentInfo.h" #include "Directory.h" #include "DocumentsWriter.h" #include "InfoStream.h" #include "DateTools.h" #include "LuceneThread.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { /// Change to true to see details of reference counts when infoStream != null bool IndexFileDeleter::VERBOSE_REF_COUNTS = false; IndexFileDeleter::IndexFileDeleter(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& policy, const SegmentInfosPtr& segmentInfos, const InfoStreamPtr& infoStream, const DocumentsWriterPtr& docWriter, HashSet synced) { this->lastFiles = Collection< HashSet >::newInstance(); this->commits = Collection::newInstance(); this->commitsToDelete = Collection::newInstance(); this->refCounts = MapStringRefCount::newInstance(); this->docWriter = docWriter; this->infoStream = infoStream; this->synced = synced; if (infoStream) { message(L"init: current segments file is \"" + segmentInfos->getCurrentSegmentFileName()); } this->policy = policy; this->directory = directory; // First pass: walk the files and initialize our ref counts int64_t currentGen = segmentInfos->getGeneration(); IndexFileNameFilterPtr filter(IndexFileNameFilter::getFilter()); HashSet files(directory->listAll()); CommitPointPtr currentCommitPoint; for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { if (filter->accept(L"", *fileName) && *fileName != IndexFileNames::SEGMENTS_GEN()) { // Add this file to refCounts with initial count 0 getRefCount(*fileName); if (boost::starts_with(*fileName, IndexFileNames::SEGMENTS())) { // This is a commit (segments or segments_N), and it's valid (<= the max gen). // Load it, then incref all files it refers to if (infoStream) { message(L"init: load commit \"" + *fileName + L"\""); } SegmentInfosPtr sis(newLucene()); try { sis->read(directory, *fileName); } catch (IOException& e) { if (SegmentInfos::generationFromSegmentsFileName(*fileName) <= currentGen) { boost::throw_exception(e); } else { // Most likely we are opening an index that has an aborted "future" commit, // so suppress exc in this case sis.reset(); } } catch (...) { if (infoStream) { message(L"init: hit exception when loading commit \"" + *fileName + L"\"; skipping this commit point"); } sis.reset(); } if (sis) { CommitPointPtr commitPoint(newLucene(commitsToDelete, directory, sis)); if (sis->getGeneration() == segmentInfos->getGeneration()) { currentCommitPoint = commitPoint; } commits.add(commitPoint); incRef(sis, true); if (!lastSegmentInfos || sis->getGeneration() > lastSegmentInfos->getGeneration()) { lastSegmentInfos = sis; } } } } } if (!currentCommitPoint) { // We did not in fact see the segments_N file corresponding to the segmentInfos that was passed // in. Yet, it must exist, because our caller holds the write lock. This can happen when the // directory listing was stale (eg when index accessed via NFS client with stale directory listing // cache). So we try now to explicitly open this commit point. SegmentInfosPtr sis(newLucene()); try { sis->read(directory, segmentInfos->getCurrentSegmentFileName()); } catch (LuceneException&) { boost::throw_exception(CorruptIndexException(L"failed to locate current segments_N file")); } if (infoStream) { message(L"forced open of current segments file " + segmentInfos->getCurrentSegmentFileName()); } currentCommitPoint = newLucene(commitsToDelete, directory, sis); commits.add(currentCommitPoint); incRef(sis, true); } // We keep commits list in sorted order (oldest to newest) std::sort(commits.begin(), commits.end(), luceneCompare()); // Now delete anything with ref count at 0. These are presumably abandoned files eg due to crash of IndexWriter. for (MapStringRefCount::iterator entry = refCounts.begin(); entry != refCounts.end(); ++entry) { if (entry->second->count == 0) { if (infoStream) { message(L"init: removing unreferenced file \"" + entry->first + L"\""); } deleteFile(entry->first); } } // Finally, give policy a chance to remove things on startup policy->onInit(commits); // Always protect the incoming segmentInfos since sometime it may not be the most recent commit checkpoint(segmentInfos, false); startingCommitDeleted = currentCommitPoint->isDeleted(); deleteCommits(); } IndexFileDeleter::~IndexFileDeleter() { } void IndexFileDeleter::setInfoStream(const InfoStreamPtr& infoStream) { this->infoStream = infoStream; } void IndexFileDeleter::message(const String& message) { if (infoStream) { *infoStream << L"IFD [" << DateTools::timeToString(MiscUtils::currentTimeMillis(), DateTools::RESOLUTION_SECOND); *infoStream << L"; " << StringUtils::toString(LuceneThread::currentId()) << L"]: " << message << L"\n"; } } SegmentInfosPtr IndexFileDeleter::getLastSegmentInfos() { return lastSegmentInfos; } void IndexFileDeleter::deleteCommits() { if (!commitsToDelete.empty()) { // First decref all files that had been referred to by the now-deleted commits for (Collection::iterator commit = commitsToDelete.begin(); commit != commitsToDelete.end(); ++commit) { if (infoStream) { message(L"deleteCommits: now decRef commit \"" + (*commit)->getSegmentsFileName() + L"\""); } for (HashSet::iterator file = (*commit)->files.begin(); file != (*commit)->files.end(); ++file) { decRef(*file); } } commitsToDelete.clear(); // Now compact commits to remove deleted ones (preserving the sort) int32_t size = commits.size(); int32_t readFrom = 0; int32_t writeTo = 0; while (readFrom < size) { CommitPointPtr commit(boost::dynamic_pointer_cast(commits[readFrom])); if (!commit->deleted) { if (writeTo != readFrom) { commits[writeTo] = commits[readFrom]; } ++writeTo; } ++readFrom; } while (size > writeTo) { commits.removeLast(); --size; } } } void IndexFileDeleter::refresh(const String& segmentName) { HashSet files(directory->listAll()); IndexFileNameFilterPtr filter(IndexFileNameFilter::getFilter()); String segmentPrefix1(segmentName + L"."); String segmentPrefix2(segmentName + L"_"); for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { if (filter->accept(L"", *fileName) && (segmentName.empty() || boost::starts_with(*fileName, segmentPrefix1) || boost::starts_with(*fileName, segmentPrefix2)) && !refCounts.contains(*fileName) && *fileName != IndexFileNames::SEGMENTS_GEN()) { // Unreferenced file, so remove it if (infoStream) { message(L"refresh [prefix=" + segmentName + L"]: removing newly created unreferenced file \"" + *fileName + L"\""); } deleteFile(*fileName); } } } void IndexFileDeleter::refresh() { refresh(L""); } void IndexFileDeleter::close() { // DecRef old files from the last checkpoint, if any for (Collection< HashSet >::iterator file = lastFiles.begin(); file != lastFiles.end(); ++file) { decRef(*file); } lastFiles.clear(); deletePendingFiles(); } void IndexFileDeleter::deletePendingFiles() { if (deletable) { HashSet oldDeletable(deletable); deletable.reset(); for (HashSet::iterator fileName = oldDeletable.begin(); fileName != oldDeletable.end(); ++fileName) { if (infoStream) { message(L"delete pending file " + *fileName); } deleteFile(*fileName); } } } void IndexFileDeleter::checkpoint(const SegmentInfosPtr& segmentInfos, bool isCommit) { if (infoStream) { message(L"now checkpoint \"" + segmentInfos->getCurrentSegmentFileName() + L"\" [" + StringUtils::toString(segmentInfos->size()) + L" segments; isCommit = " + StringUtils::toString(isCommit) + L"]"); } // Try again now to delete any previously un-deletable files (because they were in use, on Windows) deletePendingFiles(); // Incref the files incRef(segmentInfos, isCommit); if (isCommit) { // Append to our commits list commits.add(newLucene(commitsToDelete, directory, segmentInfos)); // Tell policy so it can remove commits policy->onCommit(commits); // Decref files for commits that were deleted by the policy deleteCommits(); } else { HashSet docWriterFiles; if (docWriter) { docWriterFiles = docWriter->openFiles(); if (docWriterFiles) { // We must incRef these files before decRef'ing last files to make sure we // don't accidentally delete them incRef(docWriterFiles); } } // DecRef old files from the last checkpoint, if any for (Collection< HashSet >::iterator file = lastFiles.begin(); file != lastFiles.end(); ++file) { decRef(*file); } lastFiles.clear(); // Save files so we can decr on next checkpoint/commit lastFiles.add(segmentInfos->files(directory, false)); if (docWriterFiles) { lastFiles.add(docWriterFiles); } } } void IndexFileDeleter::incRef(const SegmentInfosPtr& segmentInfos, bool isCommit) { // If this is a commit point, also incRef the segments_N file HashSet files(segmentInfos->files(directory, isCommit)); for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { incRef(*fileName); } } void IndexFileDeleter::incRef(HashSet files) { for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { incRef(*file); } } void IndexFileDeleter::incRef(const String& fileName) { RefCountPtr rc(getRefCount(fileName)); if (infoStream && VERBOSE_REF_COUNTS) { message(L" IncRef \"" + fileName + L"\": pre-incr count is " + StringUtils::toString(rc->count)); } rc->IncRef(); } void IndexFileDeleter::decRef(HashSet files) { for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { decRef(*file); } } void IndexFileDeleter::decRef(const String& fileName) { RefCountPtr rc(getRefCount(fileName)); if (infoStream && VERBOSE_REF_COUNTS) { message(L" DecRef \"" + fileName + L"\": pre-decr count is " + StringUtils::toString(rc->count)); } if (rc->DecRef() == 0) { // This file is no longer referenced by any past commit points nor by the in-memory SegmentInfos deleteFile(fileName); refCounts.remove(fileName); if (synced) { SyncLock syncLock(&synced); synced.remove(fileName); } } } void IndexFileDeleter::decRef(const SegmentInfosPtr& segmentInfos) { decRef(segmentInfos->files(directory, false)); } bool IndexFileDeleter::exists(const String& fileName) { return refCounts.contains(fileName) ? getRefCount(fileName)->count > 0 : false; } RefCountPtr IndexFileDeleter::getRefCount(const String& fileName) { RefCountPtr rc; MapStringRefCount::iterator ref = refCounts.find(fileName); if (ref == refCounts.end()) { rc = newLucene(fileName); refCounts.put(fileName, rc); } else { rc = ref->second; } return rc; } void IndexFileDeleter::deleteFiles(HashSet files) { for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { deleteFile(*file); } } void IndexFileDeleter::deleteNewFiles(HashSet files) { for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { if (!refCounts.contains(*fileName)) { if (infoStream) { message(L"delete new file \"" + *fileName + L"\""); } deleteFile(*fileName); } } } void IndexFileDeleter::deleteFile(const String& fileName) { try { if (infoStream) { message(L"delete \"" + fileName + L"\""); } directory->deleteFile(fileName); } catch (IOException& e) { // if delete fails if (directory->fileExists(fileName)) { // if delete fails // Some operating systems (eg. Windows) don't permit a file to be deleted while it is opened // for read (eg. by another process or thread). So we assume that when a delete fails it is // because the file is open in another process, and queue the file for subsequent deletion. if (infoStream) { message(L"IndexFileDeleter: unable to remove file \"" + fileName + L"\": " + e.getError() + L"; Will re-try later."); } if (!deletable) { deletable = HashSet::newInstance(); } deletable.add(fileName); // add to deletable } } } RefCount::RefCount(const String& fileName) { initDone = false; count = 0; this->fileName = fileName; } RefCount::~RefCount() { } int32_t RefCount::IncRef() { if (!initDone) { initDone = true; } else { BOOST_ASSERT(count > 0); } return ++count; } int32_t RefCount::DecRef() { BOOST_ASSERT(count > 0); return --count; } CommitPoint::CommitPoint(Collection commitsToDelete, const DirectoryPtr& directory, const SegmentInfosPtr& segmentInfos) { deleted = false; this->directory = directory; this->commitsToDelete = commitsToDelete; userData = segmentInfos->getUserData(); segmentsFileName = segmentInfos->getCurrentSegmentFileName(); version = segmentInfos->getVersion(); generation = segmentInfos->getGeneration(); HashSet files(segmentInfos->files(directory, true)); this->files = HashSet::newInstance(files.begin(), files.end()); gen = segmentInfos->getGeneration(); _isOptimized = (segmentInfos->size() == 1 && !segmentInfos->info(0)->hasDeletions()); BOOST_ASSERT(!segmentInfos->hasExternalSegments(directory)); } CommitPoint::~CommitPoint() { } String CommitPoint::toString() { return L"IndexFileDeleter::CommitPoint(" + segmentsFileName + L")"; } bool CommitPoint::isOptimized() { return _isOptimized; } String CommitPoint::getSegmentsFileName() { return segmentsFileName; } HashSet CommitPoint::getFileNames() { return files; } DirectoryPtr CommitPoint::getDirectory() { return directory; } int64_t CommitPoint::getVersion() { return version; } int64_t CommitPoint::getGeneration() { return generation; } MapStringString CommitPoint::getUserData() { return userData; } void CommitPoint::deleteCommit() { if (!deleted) { deleted = true; commitsToDelete.add(shared_from_this()); } } bool CommitPoint::isDeleted() { return deleted; } int32_t CommitPoint::compareTo(const LuceneObjectPtr& other) { CommitPointPtr otherCommit(boost::static_pointer_cast(other)); if (gen < otherCommit->gen) { return -1; } if (gen > otherCommit->gen) { return 1; } return 0; } } LucenePlusPlus-rel_3.0.9/src/core/index/IndexFileNameFilter.cpp000066400000000000000000000040721456444476200245030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include #include "IndexFileNameFilter.h" #include "IndexFileNames.h" namespace Lucene { bool IndexFileNameFilter::accept(const String& directory, const String& name) { String::size_type i = name.find_last_of(L'.'); if (i != String::npos) { String extension(name.substr(i+1)); if (IndexFileNames::INDEX_EXTENSIONS().contains(extension)) { return true; } else if (!extension.empty()) { if (extension[0] == L'f' && boost::regex_search(extension, boost::wregex(L"f\\d+"))) { return true; } if (extension[0] == L's' && boost::regex_search(extension, boost::wregex(L"s\\d+"))) { return true; } } } else { if (name == IndexFileNames::DELETABLE()) { return true; } if (boost::starts_with(name, IndexFileNames::SEGMENTS())) { return true; } } return false; } bool IndexFileNameFilter::isCFSFile(const String& name) { String::size_type i = name.find_last_of(L'.'); if (i != String::npos) { String extension(name.substr(i+1)); if (IndexFileNames::INDEX_EXTENSIONS_IN_COMPOUND_FILE().contains(extension)) { return true; } else if (!extension.empty() && extension[0] == L'f' && boost::regex_search(extension, boost::wregex(L"f\\d+"))) { return true; } } return false; } IndexFileNameFilterPtr IndexFileNameFilter::getFilter() { static IndexFileNameFilterPtr singleton; LUCENE_RUN_ONCE( singleton = newLucene(); CycleCheck::addStatic(singleton); ); return singleton; } } LucenePlusPlus-rel_3.0.9/src/core/index/IndexFileNames.cpp000066400000000000000000000205431456444476200235210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "IndexFileNames.h" #include "SegmentInfo.h" #include "StringUtils.h" namespace Lucene { IndexFileNames::~IndexFileNames() { } const String& IndexFileNames::SEGMENTS() { static String _SEGMENTS(L"segments"); return _SEGMENTS; } const String& IndexFileNames::SEGMENTS_GEN() { static String _SEGMENTS_GEN(L"segments.gen"); return _SEGMENTS_GEN; } const String& IndexFileNames::DELETABLE() { static String _DELETABLE(L"deletable"); return _DELETABLE; } const String& IndexFileNames::NORMS_EXTENSION() { static String _NORMS_EXTENSION(L"nrm"); return _NORMS_EXTENSION; } const String& IndexFileNames::FREQ_EXTENSION() { static String _FREQ_EXTENSION(L"frq"); return _FREQ_EXTENSION; } const String& IndexFileNames::PROX_EXTENSION() { static String _PROX_EXTENSION(L"prx"); return _PROX_EXTENSION; } const String& IndexFileNames::TERMS_EXTENSION() { static String _TERMS_EXTENSION(L"tis"); return _TERMS_EXTENSION; } const String& IndexFileNames::TERMS_INDEX_EXTENSION() { static String _TERMS_INDEX_EXTENSION(L"tii"); return _TERMS_INDEX_EXTENSION; } const String& IndexFileNames::FIELDS_INDEX_EXTENSION() { static String _FIELDS_INDEX_EXTENSION(L"fdx"); return _FIELDS_INDEX_EXTENSION; } const String& IndexFileNames::FIELDS_EXTENSION() { static String _FIELDS_EXTENSION(L"fdt"); return _FIELDS_EXTENSION; } const String& IndexFileNames::VECTORS_FIELDS_EXTENSION() { static String _VECTORS_FIELDS_EXTENSION(L"tvf"); return _VECTORS_FIELDS_EXTENSION; } const String& IndexFileNames::VECTORS_DOCUMENTS_EXTENSION() { static String _VECTORS_DOCUMENTS_EXTENSION(L"tvd"); return _VECTORS_DOCUMENTS_EXTENSION; } const String& IndexFileNames::VECTORS_INDEX_EXTENSION() { static String _VECTORS_INDEX_EXTENSION(L"tvx"); return _VECTORS_INDEX_EXTENSION; } const String& IndexFileNames::COMPOUND_FILE_EXTENSION() { static String _COMPOUND_FILE_EXTENSION(L"cfs"); return _COMPOUND_FILE_EXTENSION; } const String& IndexFileNames::COMPOUND_FILE_STORE_EXTENSION() { static String _COMPOUND_FILE_STORE_EXTENSION(L"cfx"); return _COMPOUND_FILE_STORE_EXTENSION; } const String& IndexFileNames::DELETES_EXTENSION() { static String _DELETES_EXTENSION(L"del"); return _DELETES_EXTENSION; } const String& IndexFileNames::FIELD_INFOS_EXTENSION() { static String _FIELD_INFOS_EXTENSION(L"fnm"); return _FIELD_INFOS_EXTENSION; } const String& IndexFileNames::PLAIN_NORMS_EXTENSION() { static String _PLAIN_NORMS_EXTENSION(L"f"); return _PLAIN_NORMS_EXTENSION; } const String& IndexFileNames::SEPARATE_NORMS_EXTENSION() { static String _SEPARATE_NORMS_EXTENSION(L"s"); return _SEPARATE_NORMS_EXTENSION; } const String& IndexFileNames::GEN_EXTENSION() { static String _GEN_EXTENSION(L"gen"); return _GEN_EXTENSION; } const HashSet IndexFileNames::INDEX_EXTENSIONS() { static HashSet _INDEX_EXTENSIONS; LUCENE_RUN_ONCE( _INDEX_EXTENSIONS = HashSet::newInstance(); _INDEX_EXTENSIONS.add(COMPOUND_FILE_EXTENSION()); _INDEX_EXTENSIONS.add(FIELD_INFOS_EXTENSION()); _INDEX_EXTENSIONS.add(FIELDS_INDEX_EXTENSION()); _INDEX_EXTENSIONS.add(FIELDS_EXTENSION()); _INDEX_EXTENSIONS.add(TERMS_INDEX_EXTENSION()); _INDEX_EXTENSIONS.add(TERMS_EXTENSION()); _INDEX_EXTENSIONS.add(FREQ_EXTENSION()); _INDEX_EXTENSIONS.add(PROX_EXTENSION()); _INDEX_EXTENSIONS.add(DELETES_EXTENSION()); _INDEX_EXTENSIONS.add(VECTORS_INDEX_EXTENSION()); _INDEX_EXTENSIONS.add(VECTORS_DOCUMENTS_EXTENSION()); _INDEX_EXTENSIONS.add(VECTORS_FIELDS_EXTENSION()); _INDEX_EXTENSIONS.add(GEN_EXTENSION()); _INDEX_EXTENSIONS.add(NORMS_EXTENSION()); _INDEX_EXTENSIONS.add(COMPOUND_FILE_STORE_EXTENSION()); ); return _INDEX_EXTENSIONS; }; const HashSet IndexFileNames::INDEX_EXTENSIONS_IN_COMPOUND_FILE() { static HashSet _INDEX_EXTENSIONS_IN_COMPOUND_FILE; LUCENE_RUN_ONCE( _INDEX_EXTENSIONS_IN_COMPOUND_FILE = HashSet::newInstance(); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(FIELD_INFOS_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(FIELDS_INDEX_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(FIELDS_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(TERMS_INDEX_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(TERMS_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(FREQ_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(PROX_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(VECTORS_INDEX_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(VECTORS_DOCUMENTS_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(VECTORS_FIELDS_EXTENSION()); _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(NORMS_EXTENSION()); ); return _INDEX_EXTENSIONS_IN_COMPOUND_FILE; }; const HashSet IndexFileNames::STORE_INDEX_EXTENSIONS() { static HashSet _STORE_INDEX_EXTENSIONS; LUCENE_RUN_ONCE( _STORE_INDEX_EXTENSIONS = HashSet::newInstance(); _STORE_INDEX_EXTENSIONS.add(VECTORS_INDEX_EXTENSION()); _STORE_INDEX_EXTENSIONS.add(VECTORS_FIELDS_EXTENSION()); _STORE_INDEX_EXTENSIONS.add(VECTORS_DOCUMENTS_EXTENSION()); _STORE_INDEX_EXTENSIONS.add(FIELDS_INDEX_EXTENSION()); _STORE_INDEX_EXTENSIONS.add(FIELDS_EXTENSION()); ); return _STORE_INDEX_EXTENSIONS; }; const HashSet IndexFileNames::NON_STORE_INDEX_EXTENSIONS() { static HashSet _NON_STORE_INDEX_EXTENSIONS; LUCENE_RUN_ONCE( _NON_STORE_INDEX_EXTENSIONS = HashSet::newInstance(); _NON_STORE_INDEX_EXTENSIONS.add(FIELD_INFOS_EXTENSION()); _NON_STORE_INDEX_EXTENSIONS.add(FREQ_EXTENSION()); _NON_STORE_INDEX_EXTENSIONS.add(PROX_EXTENSION()); _NON_STORE_INDEX_EXTENSIONS.add(TERMS_EXTENSION()); _NON_STORE_INDEX_EXTENSIONS.add(TERMS_INDEX_EXTENSION()); _NON_STORE_INDEX_EXTENSIONS.add(NORMS_EXTENSION()); ); return _NON_STORE_INDEX_EXTENSIONS; }; const HashSet IndexFileNames::COMPOUND_EXTENSIONS() { static HashSet _COMPOUND_EXTENSIONS; LUCENE_RUN_ONCE( _COMPOUND_EXTENSIONS = HashSet::newInstance(); _COMPOUND_EXTENSIONS.add(FIELD_INFOS_EXTENSION()); _COMPOUND_EXTENSIONS.add(FREQ_EXTENSION()); _COMPOUND_EXTENSIONS.add(PROX_EXTENSION()); _COMPOUND_EXTENSIONS.add(FIELDS_INDEX_EXTENSION()); _COMPOUND_EXTENSIONS.add(FIELDS_EXTENSION()); _COMPOUND_EXTENSIONS.add(TERMS_INDEX_EXTENSION()); _COMPOUND_EXTENSIONS.add(TERMS_EXTENSION()); ); return _COMPOUND_EXTENSIONS; }; const HashSet IndexFileNames::VECTOR_EXTENSIONS() { static HashSet _VECTOR_EXTENSIONS; LUCENE_RUN_ONCE( _VECTOR_EXTENSIONS = HashSet::newInstance(); _VECTOR_EXTENSIONS.add(VECTORS_INDEX_EXTENSION()); _VECTOR_EXTENSIONS.add(VECTORS_DOCUMENTS_EXTENSION()); _VECTOR_EXTENSIONS.add(VECTORS_FIELDS_EXTENSION()); ); return _VECTOR_EXTENSIONS; }; String IndexFileNames::fileNameFromGeneration(const String& base, const String& extension, int64_t gen) { if (gen == SegmentInfo::NO) { return L""; } else if (gen == SegmentInfo::WITHOUT_GEN) { return base + extension; } else { return base + L"_" + StringUtils::toString(gen, StringUtils::CHARACTER_MAX_RADIX) + extension; } } bool IndexFileNames::isDocStoreFile(const String& fileName) { if (boost::ends_with(fileName, COMPOUND_FILE_STORE_EXTENSION())) { return true; } for (HashSet::iterator index = STORE_INDEX_EXTENSIONS().begin(); index != STORE_INDEX_EXTENSIONS().end(); ++index) { if (boost::ends_with(fileName, *index)) { return true; } } return false; } String IndexFileNames::segmentFileName(const String& segmentName, const String& ext) { return segmentName + L"." + ext; } } LucenePlusPlus-rel_3.0.9/src/core/index/IndexReader.cpp000066400000000000000000000275001456444476200230600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include #include "IndexReader.h" #include "_IndexReader.h" #include "DirectoryReader.h" #include "IndexDeletionPolicy.h" #include "FSDirectory.h" #include "FieldSelector.h" #include "Similarity.h" #include "CompoundFileReader.h" #include "FileUtils.h" #include "StringUtils.h" namespace Lucene { const int32_t IndexReader::DEFAULT_TERMS_INDEX_DIVISOR = 1; IndexReader::IndexReader() { refCount = 1; closed = false; _hasChanges = false; } IndexReader::~IndexReader() { } int32_t IndexReader::getRefCount() { SyncLock syncLock(this); return refCount; } void IndexReader::incRef() { SyncLock syncLock(this); BOOST_ASSERT(refCount > 0); ensureOpen(); ++refCount; } void IndexReader::decRef() { SyncLock syncLock(this); BOOST_ASSERT(refCount > 0); ensureOpen(); if (refCount == 1) { commit(); doClose(); } --refCount; } void IndexReader::ensureOpen() { if (refCount <= 0) { boost::throw_exception(AlreadyClosedException(L"this IndexReader is closed")); } } IndexReaderPtr IndexReader::open(const DirectoryPtr& directory) { return open(directory, IndexDeletionPolicyPtr(), IndexCommitPtr(), true, DEFAULT_TERMS_INDEX_DIVISOR); } IndexReaderPtr IndexReader::open(const DirectoryPtr& directory, bool readOnly) { return open(directory, IndexDeletionPolicyPtr(), IndexCommitPtr(), readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } IndexReaderPtr IndexReader::open(const IndexCommitPtr& commit, bool readOnly) { return open(commit->getDirectory(), IndexDeletionPolicyPtr(), commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } IndexReaderPtr IndexReader::open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly) { return open(directory, deletionPolicy, IndexCommitPtr(), readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } IndexReaderPtr IndexReader::open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor) { return open(directory, deletionPolicy, IndexCommitPtr(), readOnly, termInfosIndexDivisor); } IndexReaderPtr IndexReader::open(const IndexCommitPtr& commit, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly) { return open(commit->getDirectory(), deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); } IndexReaderPtr IndexReader::open(const IndexCommitPtr& commit, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor) { return open(commit->getDirectory(), deletionPolicy, commit, readOnly, termInfosIndexDivisor); } IndexReaderPtr IndexReader::open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, const IndexCommitPtr& commit, bool readOnly, int32_t termInfosIndexDivisor) { return DirectoryReader::open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); } IndexReaderPtr IndexReader::reopen() { SyncLock syncLock(this); boost::throw_exception(UnsupportedOperationException(L"This reader does not support reopen().")); return IndexReaderPtr(); } IndexReaderPtr IndexReader::reopen(bool openReadOnly) { SyncLock syncLock(this); boost::throw_exception(UnsupportedOperationException(L"This reader does not support reopen().")); return IndexReaderPtr(); } IndexReaderPtr IndexReader::reopen(const IndexCommitPtr& commit) { SyncLock syncLock(this); boost::throw_exception(UnsupportedOperationException(L"This reader does not support reopen(IndexCommit).")); return IndexReaderPtr(); } LuceneObjectPtr IndexReader::clone(const LuceneObjectPtr& other) { SyncLock syncLock(this); if (!other) { boost::throw_exception(UnsupportedOperationException(L"This reader does not implement clone().")); } return other; } LuceneObjectPtr IndexReader::clone(bool openReadOnly, const LuceneObjectPtr& other) { SyncLock syncLock(this); if (!other) { boost::throw_exception(UnsupportedOperationException(L"This reader does not implement clone(bool).")); } return other; } DirectoryPtr IndexReader::directory() { ensureOpen(); boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); return DirectoryPtr(); } int64_t IndexReader::lastModified(const DirectoryPtr& directory2) { return newLucene(newLucene(), directory2)->run(); } int64_t IndexReader::getCurrentVersion(const DirectoryPtr& directory) { return SegmentInfos::readCurrentVersion(directory); } MapStringString IndexReader::getCommitUserData(const DirectoryPtr& directory) { return SegmentInfos::readCurrentUserData(directory); } int64_t IndexReader::getVersion() { boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); return 0; } MapStringString IndexReader::getCommitUserData() { boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); return MapStringString(); } bool IndexReader::isCurrent() { boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); return false; } bool IndexReader::isOptimized() { boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); return false; } bool IndexReader::indexExists(const DirectoryPtr& directory) { return (SegmentInfos::getCurrentSegmentGeneration(directory) != -1); } int32_t IndexReader::numDeletedDocs() { return (maxDoc() - numDocs()); } DocumentPtr IndexReader::document(int32_t n) { ensureOpen(); return document(n, FieldSelectorPtr()); } bool IndexReader::hasChanges() { return _hasChanges; } bool IndexReader::hasNorms(const String& field) { // backward compatible implementation. // SegmentReader has an efficient implementation. ensureOpen(); return norms(field); } void IndexReader::setNorm(int32_t doc, const String& field, uint8_t value) { SyncLock syncLock(this); ensureOpen(); acquireWriteLock(); _hasChanges = true; doSetNorm(doc, field, value); } void IndexReader::setNorm(int32_t doc, const String& field, double value) { ensureOpen(); setNorm(doc, field, Similarity::encodeNorm(value)); } TermDocsPtr IndexReader::termDocs(const TermPtr& term) { ensureOpen(); TermDocsPtr _termDocs(termDocs()); _termDocs->seek(term); return _termDocs; } TermPositionsPtr IndexReader::termPositions(const TermPtr& term) { ensureOpen(); TermPositionsPtr _termPositions(termPositions()); _termPositions->seek(term); return _termPositions; } void IndexReader::deleteDocument(int32_t docNum) { SyncLock syncLock(this); ensureOpen(); acquireWriteLock(); _hasChanges = true; doDelete(docNum); } int32_t IndexReader::deleteDocuments(const TermPtr& term) { ensureOpen(); TermDocsPtr docs(termDocs(term)); if (!docs) { return 0; } int32_t n = 0; LuceneException finally; try { while (docs->next()) { deleteDocument(docs->doc()); ++n; } } catch (LuceneException& e) { finally = e; } docs->close(); finally.throwException(); return n; } void IndexReader::undeleteAll() { SyncLock syncLock(this); ensureOpen(); acquireWriteLock(); _hasChanges = true; doUndeleteAll(); } void IndexReader::acquireWriteLock() { SyncLock syncLock(this); // NOOP } void IndexReader::flush() { SyncLock syncLock(this); ensureOpen(); commit(); } void IndexReader::flush(MapStringString commitUserData) { SyncLock syncLock(this); ensureOpen(); commit(commitUserData); } void IndexReader::commit() { commit(MapStringString()); } void IndexReader::commit(MapStringString commitUserData) { SyncLock syncLock(this); if (_hasChanges) { doCommit(commitUserData); } _hasChanges = false; } void IndexReader::close() { SyncLock syncLock(this); if (!closed) { decRef(); closed = true; } } IndexCommitPtr IndexReader::getIndexCommit() { boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); return IndexCommitPtr(); } void IndexReader::main(Collection args) { String filename; bool extract = false; for (Collection::iterator arg = args.begin(); arg != args.end(); ++arg) { if (*arg == L"-extract") { extract = true; } else if (filename.empty()) { filename = *arg; } } if (filename.empty()) { std::wcout << L"Usage: IndexReader [-extract] "; return; } DirectoryPtr dir; CompoundFileReaderPtr cfr; LuceneException finally; try { String dirname(FileUtils::extractPath(filename)); filename = FileUtils::extractPath(filename); dir = FSDirectory::open(dirname); cfr = newLucene(dir, filename); HashSet _files(cfr->listAll()); Collection files(Collection::newInstance(_files.begin(), _files.end())); std::sort(files.begin(), files.end()); // sort the array of filename so that the output is more readable for (Collection::iterator file = files.begin(); file != files.end(); ++file) { int64_t len = cfr->fileLength(*file); if (extract) { std::wcout << L"extract " << *file << L" with " << len << L" bytes to local directory..."; IndexInputPtr ii(cfr->openInput(*file)); boost::filesystem::ofstream f(*file, std::ios::binary | std::ios::out); // read and write with a small buffer, which is more effective than reading byte by byte ByteArray buffer(ByteArray::newInstance(1024)); int32_t chunk = buffer.size(); while (len > 0) { int32_t bufLen = std::min(chunk, (int32_t)len); ii->readBytes(buffer.get(), 0, bufLen); f.write((char*)buffer.get(), bufLen); len -= bufLen; } ii->close(); } else { std::wcout << *file << L": " << len << " bytes\n"; } } } catch (LuceneException& e) { finally = e; } if (dir) { dir->close(); } if (cfr) { cfr->close(); } finally.throwException(); } Collection IndexReader::listCommits(const DirectoryPtr& dir) { return DirectoryReader::listCommits(dir); } Collection IndexReader::getSequentialSubReaders() { return Collection(); // override } LuceneObjectPtr IndexReader::getFieldCacheKey() { return shared_from_this(); } LuceneObjectPtr IndexReader::getDeletesCacheKey() { return shared_from_this(); } int64_t IndexReader::getUniqueTermCount() { boost::throw_exception(UnsupportedOperationException(L"This reader does not implement getUniqueTermCount()")); return 0; } int32_t IndexReader::getTermInfosIndexDivisor() { boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); return 0; } FindSegmentsModified::FindSegmentsModified(const SegmentInfosPtr& infos, const DirectoryPtr& directory) : FindSegmentsFileT(infos, directory) { result = 0; } FindSegmentsModified::~FindSegmentsModified() { } uint64_t FindSegmentsModified::doBody(const String& segmentFileName) { return directory->fileModified(segmentFileName); } } LucenePlusPlus-rel_3.0.9/src/core/index/IndexWriter.cpp000066400000000000000000003450151456444476200231360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "IndexWriter.h" #include "_IndexWriter.h" #include "Directory.h" #include "Analyzer.h" #include "KeepOnlyLastCommitDeletionPolicy.h" #include "DocumentsWriter.h" #include "IndexFileDeleter.h" #include "IndexFileNames.h" #include "Lock.h" #include "SegmentInfo.h" #include "SegmentReader.h" #include "ReadOnlyDirectoryReader.h" #include "BufferedIndexInput.h" #include "LogByteSizeMergePolicy.h" #include "LogDocMergePolicy.h" #include "Similarity.h" #include "ConcurrentMergeScheduler.h" #include "CompoundFileWriter.h" #include "SegmentMerger.h" #include "DateTools.h" #include "Constants.h" #include "InfoStream.h" #include "TestPoint.h" #include "StringUtils.h" namespace Lucene { /// The normal read buffer size defaults to 1024, but increasing this during merging seems to /// yield performance gains. However we don't want to increase it too much because there are /// quite a few BufferedIndexInputs created during merging. const int32_t IndexWriter::MERGE_READ_BUFFER_SIZE = 4096; int32_t IndexWriter::MESSAGE_ID = 0; InfoStreamPtr IndexWriter::defaultInfoStream; /// Default value for the write lock timeout (1,000). int64_t IndexWriter::WRITE_LOCK_TIMEOUT = 1000; const String IndexWriter::WRITE_LOCK_NAME = L"write.lock"; /// Value to denote a flush trigger is disabled. const int32_t IndexWriter::DISABLE_AUTO_FLUSH = -1; /// Disabled by default (because IndexWriter flushes by RAM usage by default). const int32_t IndexWriter::DEFAULT_MAX_BUFFERED_DOCS = IndexWriter::DISABLE_AUTO_FLUSH; /// Default value is 16 MB (which means flush when buffered docs consume 16 MB RAM). const double IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB = 16.0; /// Disabled by default (because IndexWriter flushes by RAM usage by default). const int32_t IndexWriter::DEFAULT_MAX_BUFFERED_DELETE_TERMS = IndexWriter::DISABLE_AUTO_FLUSH; /// Default value is 10000. const int32_t IndexWriter::DEFAULT_MAX_FIELD_LENGTH = 10000; /// Default value is 128. const int32_t IndexWriter::DEFAULT_TERM_INDEX_INTERVAL = 128; /// Sets the maximum field length to INT_MAX const int32_t IndexWriter::MaxFieldLengthUNLIMITED = INT_MAX; /// Sets the maximum field length to {@link #DEFAULT_MAX_FIELD_LENGTH} const int32_t IndexWriter::MaxFieldLengthLIMITED = IndexWriter::DEFAULT_MAX_FIELD_LENGTH; IndexWriter::IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, bool create, int32_t mfl) { this->directory = d; this->analyzer = a; this->create = create; this->maxFieldLength = mfl; } IndexWriter::IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, int32_t mfl) { this->directory = d; this->analyzer = a; this->create = !IndexReader::indexExists(d); this->maxFieldLength = mfl; } IndexWriter::IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, const IndexDeletionPolicyPtr& deletionPolicy, int32_t mfl) { this->directory = d; this->analyzer = a; this->deletionPolicy = deletionPolicy; this->create = !IndexReader::indexExists(d); this->maxFieldLength = mfl; } IndexWriter::IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, bool create, const IndexDeletionPolicyPtr& deletionPolicy, int32_t mfl) { this->directory = d; this->analyzer = a; this->create = create; this->deletionPolicy = deletionPolicy; this->maxFieldLength = mfl; } IndexWriter::IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, bool create, const IndexDeletionPolicyPtr& deletionPolicy, int32_t mfl, const IndexingChainPtr& indexingChain, const IndexCommitPtr& commit) { this->directory = d; this->analyzer = a; this->create = create; this->deletionPolicy = deletionPolicy; this->maxFieldLength = mfl; this->indexingChain = indexingChain; this->indexCommit = commit; } IndexWriter::IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, const IndexDeletionPolicyPtr& deletionPolicy, int32_t mfl, const IndexCommitPtr& commit) { this->directory = d; this->analyzer = a; this->create = false; this->deletionPolicy = deletionPolicy; this->maxFieldLength = mfl; this->indexCommit = commit; } IndexWriter::~IndexWriter() { } void IndexWriter::initialize() { messageID = -1; messageIDLock = newInstance(); setMessageID(defaultInfoStream); this->writeLockTimeout = WRITE_LOCK_TIMEOUT; this->segmentInfos = newLucene(); pendingMerges = Collection::newInstance(); mergeExceptions = Collection::newInstance(); segmentsToOptimize = SetSegmentInfo::newInstance(); optimizeMaxNumSegments = 0; mergingSegments = SetSegmentInfo::newInstance(); runningMerges = SetOneMerge::newInstance(); synced = HashSet::newInstance(); syncing = HashSet::newInstance(); changeCount = 0; lastCommitChangeCount = 0; poolReaders = false; readCount = 0; writeThread = 0; upgradeCount = 0; readerTermsIndexDivisor = IndexReader::DEFAULT_TERMS_INDEX_DIVISOR; readerPool = newLucene(shared_from_this()); closed = false; closing = false; hitOOM = false; stopMerges = false; mergeGen = 0; flushCount = 0; flushDeletesCount = 0; localFlushedDocCount = 0; pendingCommitChangeCount = 0; mergePolicy = newLucene(shared_from_this()); mergeScheduler = newLucene(); similarity = Similarity::getDefault(); termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; commitLock = newInstance(); if (!indexingChain) { indexingChain = DocumentsWriter::getDefaultIndexingChain(); } if (create) { directory->clearLock(WRITE_LOCK_NAME); // clear the write lock in case it's leftover } LockPtr writeLock(directory->makeLock(WRITE_LOCK_NAME)); if (!writeLock->obtain((int32_t)writeLockTimeout)) { // obtain write lock boost::throw_exception(LockObtainFailedException(L"Index locked for write: " + writeLock->toString())); } this->writeLock = writeLock; bool success = false; LuceneException finally; try { if (create) { // Try to read first. This is to allow create against an index that's currently open for // searching. In this case we write the next segments_N file with no segments bool doCommit; try { segmentInfos->read(directory); segmentInfos->clear(); doCommit = false; } catch (LuceneException&) { // Likely this means it's a fresh directory doCommit = true; } if (doCommit) { // Only commit if there is no segments file in this dir already. segmentInfos->commit(directory); HashSet files(segmentInfos->files(directory, true)); synced.addAll(files.begin(), files.end()); } else { // Record that we have a change (zero out all segments) pending ++changeCount; } } else { segmentInfos->read(directory); if (indexCommit) { // Swap out all segments, but, keep metadata in SegmentInfos, like version & generation, to // preserve write-once. This is important if readers are open against the future commit points. if (indexCommit->getDirectory() != directory) { boost::throw_exception(IllegalArgumentException(L"IndexCommit's directory doesn't match my directory")); } SegmentInfosPtr oldInfos(newLucene()); oldInfos->read(directory, indexCommit->getSegmentsFileName()); segmentInfos->replace(oldInfos); ++changeCount; if (infoStream) { message(L"init: loaded commit \"" + indexCommit->getSegmentsFileName() + L"\""); } } // We assume that this segments_N was previously properly sync'd HashSet files(segmentInfos->files(directory, true)); synced.addAll(files.begin(), files.end()); } setRollbackSegmentInfos(segmentInfos); docWriter = newLucene(directory, shared_from_this(), indexingChain); docWriter->setInfoStream(infoStream); docWriter->setMaxFieldLength(maxFieldLength); // Default deleter (for backwards compatibility) is KeepOnlyLastCommitDeleter deleter = newLucene(directory, deletionPolicy ? deletionPolicy : newLucene(), segmentInfos, infoStream, docWriter, synced); if (deleter->startingCommitDeleted) { // Deletion policy deleted the "head" commit point. We have to mark ourself as changed so that if we // are closed without any further changes we write a new segments_N file. ++changeCount; } pushMaxBufferedDocs(); if (infoStream) { message(L"init: create=" + StringUtils::toString(create)); } messageState(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { if (infoStream) { message(L"init: hit exception on init; releasing write lock"); } try { this->writeLock->release(); } catch (...) { // don't mask the original exception } this->writeLock.reset(); } finally.throwException(); } int32_t IndexWriter::MAX_TERM_LENGTH() { static int32_t _MAX_TERM_LENGTH = 0; LUCENE_RUN_ONCE( _MAX_TERM_LENGTH = DocumentsWriter::MAX_TERM_LENGTH; ); return _MAX_TERM_LENGTH; } IndexReaderPtr IndexWriter::getReader() { return getReader(readerTermsIndexDivisor); } IndexReaderPtr IndexWriter::getReader(int32_t termInfosIndexDivisor) { ensureOpen(); if (infoStream) { message(L"flush at getReader"); } // Do this up front before flushing so that the readers obtained during this flush are pooled, the first time // this method is called poolReaders = true; // Prevent segmentInfos from changing while opening the reader; in theory we could do similar retry logic, // just like we do when loading segments_N IndexReaderPtr r; { SyncLock syncLock(this); flush(false, true, true); r = newLucene(shared_from_this(), segmentInfos, termInfosIndexDivisor); } maybeMerge(); return r; } int32_t IndexWriter::numDeletedDocs(const SegmentInfoPtr& info) { SegmentReaderPtr reader(readerPool->getIfExists(info)); int32_t deletedDocs = 0; LuceneException finally; try { deletedDocs = reader ? reader->numDeletedDocs() : info->getDelCount(); } catch (LuceneException& e) { finally = e; } if (reader) { readerPool->release(reader); } finally.throwException(); return deletedDocs; } void IndexWriter::acquireWrite() { SyncLock syncLock(this); BOOST_ASSERT(writeThread != LuceneThread::currentId()); while (writeThread != 0 || readCount > 0) { doWait(); } // we could have been closed while we were waiting ensureOpen(); writeThread = LuceneThread::currentId(); } void IndexWriter::releaseWrite() { SyncLock syncLock(this); BOOST_ASSERT(writeThread == LuceneThread::currentId()); writeThread = 0; notifyAll(); } void IndexWriter::acquireRead() { SyncLock syncLock(this); int64_t current = LuceneThread::currentId(); while (writeThread != 0 && writeThread != current) { doWait(); } ++readCount; } void IndexWriter::upgradeReadToWrite() { SyncLock syncLock(this); BOOST_ASSERT(readCount > 0); ++upgradeCount; while (readCount > upgradeCount || writeThread != 0) { doWait(); } writeThread = LuceneThread::currentId(); --readCount; --upgradeCount; } void IndexWriter::releaseRead() { SyncLock syncLock(this); --readCount; BOOST_ASSERT(readCount >= 0); notifyAll(); } bool IndexWriter::isOpen(bool includePendingClose) { SyncLock syncLock(this); return !(closed || (includePendingClose && closing)); } void IndexWriter::ensureOpen(bool includePendingClose) { SyncLock syncLock(this); if (!isOpen(includePendingClose)) { boost::throw_exception(AlreadyClosedException(L"This IndexWriter is closed")); } } void IndexWriter::ensureOpen() { ensureOpen(true); } void IndexWriter::message(const String& message) { if (infoStream) { *infoStream << L"IW " << StringUtils::toString(messageID); *infoStream << L" [" << DateTools::timeToString(MiscUtils::currentTimeMillis(), DateTools::RESOLUTION_SECOND); *infoStream << L"; " << StringUtils::toString(LuceneThread::currentId()) << L"]: " << message << L"\n"; } } void IndexWriter::setMessageID(const InfoStreamPtr& infoStream) { SyncLock syncLock(this); if (infoStream && messageID == -1) { SyncLock messageLock(messageIDLock); messageID = MESSAGE_ID++; } this->infoStream = infoStream; } LogMergePolicyPtr IndexWriter::getLogMergePolicy() { LogMergePolicyPtr logMergePolicy(boost::dynamic_pointer_cast(mergePolicy)); if (logMergePolicy) { return logMergePolicy; } boost::throw_exception(IllegalArgumentException(L"This method can only be called when the merge policy is the default LogMergePolicy")); return LogMergePolicyPtr(); } bool IndexWriter::getUseCompoundFile() { return getLogMergePolicy()->getUseCompoundFile(); } void IndexWriter::setUseCompoundFile(bool value) { getLogMergePolicy()->setUseCompoundFile(value); getLogMergePolicy()->setUseCompoundDocStore(value); } void IndexWriter::setSimilarity(const SimilarityPtr& similarity) { ensureOpen(); this->similarity = similarity; docWriter->setSimilarity(similarity); } SimilarityPtr IndexWriter::getSimilarity() { ensureOpen(); return this->similarity; } void IndexWriter::setTermIndexInterval(int32_t interval) { ensureOpen(); this->termIndexInterval = interval; } int32_t IndexWriter::getTermIndexInterval() { // We pass false because this method is called by SegmentMerger while we are in the process of closing ensureOpen(false); return termIndexInterval; } void IndexWriter::setRollbackSegmentInfos(const SegmentInfosPtr& infos) { SyncLock syncLock(this); rollbackSegmentInfos = boost::dynamic_pointer_cast(infos->clone()); BOOST_ASSERT(!rollbackSegmentInfos->hasExternalSegments(directory)); rollbackSegments = MapSegmentInfoInt::newInstance(); int32_t size = rollbackSegmentInfos->size(); for (int32_t i = 0; i < size; ++i) { rollbackSegments.put(rollbackSegmentInfos->info(i), i); } } void IndexWriter::setMergePolicy(const MergePolicyPtr& mp) { ensureOpen(); if (!mp) { boost::throw_exception(NullPointerException(L"MergePolicy must be non-null")); } if (mergePolicy != mp) { mergePolicy->close(); } mergePolicy = mp; pushMaxBufferedDocs(); if (infoStream) { message(L"setMergePolicy"); } } MergePolicyPtr IndexWriter::getMergePolicy() { ensureOpen(); return mergePolicy; } void IndexWriter::setMergeScheduler(const MergeSchedulerPtr& mergeScheduler) { SyncLock syncLock(this); ensureOpen(); if (!mergeScheduler) { boost::throw_exception(NullPointerException(L"MergeScheduler must be non-null")); } if (this->mergeScheduler != mergeScheduler) { finishMerges(true); this->mergeScheduler->close(); } this->mergeScheduler = mergeScheduler; if (infoStream) { message(L"setMergeScheduler"); } } MergeSchedulerPtr IndexWriter::getMergeScheduler() { ensureOpen(); return mergeScheduler; } void IndexWriter::setMaxMergeDocs(int32_t maxMergeDocs) { getLogMergePolicy()->setMaxMergeDocs(maxMergeDocs); } int32_t IndexWriter::getMaxMergeDocs() { return getLogMergePolicy()->getMaxMergeDocs(); } void IndexWriter::setMaxFieldLength(int32_t maxFieldLength) { ensureOpen(); this->maxFieldLength = maxFieldLength; docWriter->setMaxFieldLength(maxFieldLength); if (infoStream) { message(L"setMaxFieldLength " + StringUtils::toString(maxFieldLength)); } } int32_t IndexWriter::getMaxFieldLength() { ensureOpen(); return maxFieldLength; } void IndexWriter::setReaderTermsIndexDivisor(int32_t divisor) { ensureOpen(); if (divisor <= 0) { boost::throw_exception(IllegalArgumentException(L"divisor must be >= 1 (got " + StringUtils::toString(divisor) + L")")); } readerTermsIndexDivisor = divisor; if (infoStream) { message(L"setReaderTermsIndexDivisor " + StringUtils::toString(readerTermsIndexDivisor)); } } int32_t IndexWriter::getReaderTermsIndexDivisor() { ensureOpen(); return readerTermsIndexDivisor; } void IndexWriter::setMaxBufferedDocs(int32_t maxBufferedDocs) { ensureOpen(); if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2) { boost::throw_exception(IllegalArgumentException(L"maxBufferedDocs must at least be 2 when enabled")); } if (maxBufferedDocs == DISABLE_AUTO_FLUSH && getRAMBufferSizeMB() == DISABLE_AUTO_FLUSH) { boost::throw_exception(IllegalArgumentException(L"at least one of ramBufferSize and maxBufferedDocs must be enabled")); } docWriter->setMaxBufferedDocs(maxBufferedDocs); pushMaxBufferedDocs(); if (infoStream) { message(L"setMaxBufferedDocs " + StringUtils::toString(maxBufferedDocs)); } } void IndexWriter::pushMaxBufferedDocs() { if (docWriter->getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) { LogDocMergePolicyPtr lmp(boost::dynamic_pointer_cast(mergePolicy)); if (lmp) { int32_t maxBufferedDocs = docWriter->getMaxBufferedDocs(); if (lmp->getMinMergeDocs() != maxBufferedDocs) { if (infoStream) { message(L"now push maxBufferedDocs " + StringUtils::toString(maxBufferedDocs) + L" to LogDocMergePolicy"); } lmp->setMinMergeDocs(maxBufferedDocs); } } } } int32_t IndexWriter::getMaxBufferedDocs() { ensureOpen(); return docWriter->getMaxBufferedDocs(); } void IndexWriter::setRAMBufferSizeMB(double mb) { if (mb > 2048.0) { boost::throw_exception(IllegalArgumentException(L"ramBufferSize " + StringUtils::toString(mb) + L" is too large; should be comfortably less than 2048")); } if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0) { boost::throw_exception(IllegalArgumentException(L"ramBufferSize should be > 0.0 MB when enabled")); } if (mb == DISABLE_AUTO_FLUSH && getMaxBufferedDocs() == DISABLE_AUTO_FLUSH) { boost::throw_exception(IllegalArgumentException(L"at least one of ramBufferSize and maxBufferedDocs must be enabled")); } docWriter->setRAMBufferSizeMB(mb); if (infoStream) { message(L"setRAMBufferSizeMB " + StringUtils::toString(mb)); } } double IndexWriter::getRAMBufferSizeMB() { return docWriter->getRAMBufferSizeMB(); } void IndexWriter::setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms) { ensureOpen(); if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH && maxBufferedDeleteTerms < 1) { boost::throw_exception(IllegalArgumentException(L"maxBufferedDeleteTerms must at least be 1 when enabled")); } docWriter->setMaxBufferedDeleteTerms(maxBufferedDeleteTerms); if (infoStream) { message(L"setMaxBufferedDeleteTerms " + StringUtils::toString(maxBufferedDeleteTerms)); } } int32_t IndexWriter::getMaxBufferedDeleteTerms() { ensureOpen(); return docWriter->getMaxBufferedDeleteTerms(); } void IndexWriter::setMergeFactor(int32_t mergeFactor) { getLogMergePolicy()->setMergeFactor(mergeFactor); } int32_t IndexWriter::getMergeFactor() { return getLogMergePolicy()->getMergeFactor(); } void IndexWriter::setDefaultInfoStream(const InfoStreamPtr& infoStream) { IndexWriter::defaultInfoStream = infoStream; } InfoStreamPtr IndexWriter::getDefaultInfoStream() { return IndexWriter::defaultInfoStream; } void IndexWriter::setInfoStream(const InfoStreamPtr& infoStream) { ensureOpen(); setMessageID(infoStream); docWriter->setInfoStream(infoStream); deleter->setInfoStream(infoStream); messageState(); } void IndexWriter::messageState() { if (infoStream) { message(L"ramBufferSizeMB=" + StringUtils::toString(docWriter->getRAMBufferSizeMB()) + L" maxBufferedDocs=" + StringUtils::toString(docWriter->getMaxBufferedDocs()) + L" maxBuffereDeleteTerms=" + StringUtils::toString(docWriter->getMaxBufferedDeleteTerms()) + L" maxFieldLength=" + StringUtils::toString(maxFieldLength) + L" index=" + segString()); } } InfoStreamPtr IndexWriter::getInfoStream() { ensureOpen(); return infoStream; } bool IndexWriter::verbose() { return infoStream.get() != NULL; } void IndexWriter::setWriteLockTimeout(int64_t writeLockTimeout) { ensureOpen(); this->writeLockTimeout = writeLockTimeout; } int64_t IndexWriter::getWriteLockTimeout() { ensureOpen(); return writeLockTimeout; } void IndexWriter::setDefaultWriteLockTimeout(int64_t writeLockTimeout) { IndexWriter::WRITE_LOCK_TIMEOUT = writeLockTimeout; } int64_t IndexWriter::getDefaultWriteLockTimeout() { return IndexWriter::WRITE_LOCK_TIMEOUT; } void IndexWriter::close() { close(true); } void IndexWriter::close(bool waitForMerges) { // Ensure that only one thread actually gets to do the closing if (shouldClose()) { // If any methods have hit std::bad_alloc, then abort on close, in case the internal state of IndexWriter // or DocumentsWriter is corrupt if (hitOOM) { rollbackInternal(); } else { closeInternal(waitForMerges); } } } bool IndexWriter::shouldClose() { SyncLock syncLock(this); while (true) { if (!closed) { if (!closing) { closing = true; return true; } else { // Another thread is presently trying to close; wait until it finishes one way (closes // successfully) or another (fails to close) doWait(); } } else { return false; } } } void IndexWriter::closeInternal(bool waitForMerges) { docWriter->pauseAllThreads(); LuceneException finally; try { if (infoStream) { message(L"now flush at close"); } docWriter->close(); // Only allow a new merge to be triggered if we are going to wait for merges if (!hitOOM) { flush(waitForMerges, true, true); } // Give merge scheduler last chance to run, in case any pending merges are waiting if (waitForMerges) { mergeScheduler->merge(shared_from_this()); } mergePolicy->close(); finishMerges(waitForMerges); stopMerges = true; mergeScheduler->close(); if (infoStream) { message(L"now call final commit()"); } if (!hitOOM) { commit(0); } if (infoStream) { message(L"at close: " + segString()); } { SyncLock syncLock(this); readerPool->close(); docWriter.reset(); deleter->close(); } if (writeLock) { writeLock->release(); // release write lock writeLock.reset(); } { SyncLock syncLock(this); closed = true; } } catch (std::bad_alloc& oom) { finally = handleOOM(oom, L"closeInternal"); } catch (LuceneException& e) { finally = e; } { SyncLock syncLock(this); closing = false; notifyAll(); if (!closed) { if (docWriter) { docWriter->resumeAllThreads(); } if (infoStream) { message(L"hit exception while closing"); } } } finally.throwException(); } bool IndexWriter::flushDocStores() { SyncLock syncLock(this); if (infoStream) { message(L"flushDocStores segment=" + docWriter->getDocStoreSegment()); } bool useCompoundDocStore = false; if (infoStream) { message(L"closeDocStores segment=" + docWriter->getDocStoreSegment()); } String docStoreSegment; bool success = false; LuceneException finally; try { docStoreSegment = docWriter->closeDocStore(); success = true; } catch (LuceneException& e) { finally = e; } if (!success && infoStream) { message(L"hit exception closing doc store segment"); } finally.throwException(); if (infoStream) { message(L"flushDocStores files=" + StringUtils::toString(docWriter->closedFiles())); } useCompoundDocStore = mergePolicy->useCompoundDocStore(segmentInfos); HashSet closedFiles(docWriter->closedFiles()); if (useCompoundDocStore && !docStoreSegment.empty() && !closedFiles.empty()) { // Now build compound doc store file if (infoStream) { message(L"create compound file " + docStoreSegment + L"." + IndexFileNames::COMPOUND_FILE_STORE_EXTENSION()); } success = false; int32_t numSegments = segmentInfos->size(); String compoundFileName(docStoreSegment + L"." + IndexFileNames::COMPOUND_FILE_STORE_EXTENSION()); try { CompoundFileWriterPtr cfsWriter(newLucene(directory, compoundFileName)); for (HashSet::iterator file = closedFiles.begin(); file != closedFiles.end(); ++file) { cfsWriter->addFile(*file); } // Perform the merge cfsWriter->close(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { if (infoStream) { message(L"hit exception building compound file doc store for segment " + docStoreSegment); } deleter->deleteFile(compoundFileName); docWriter->abort(); } finally.throwException(); for (int32_t i = 0; i < numSegments; ++i) { SegmentInfoPtr si(segmentInfos->info(i)); if (si->getDocStoreOffset() != -1 && si->getDocStoreSegment() == docStoreSegment) { si->setDocStoreIsCompoundFile(true); } } checkpoint(); // In case the files we just merged into a CFS were not previously checkpointed deleter->deleteNewFiles(docWriter->closedFiles()); } return useCompoundDocStore; } DirectoryPtr IndexWriter::getDirectory() { ensureOpen(false); // Pass false because the flush during closing calls getDirectory return directory; } AnalyzerPtr IndexWriter::getAnalyzer() { ensureOpen(); return analyzer; } int32_t IndexWriter::maxDoc() { SyncLock syncLock(this); int32_t count = docWriter ? docWriter->getNumDocsInRAM() : 0; for (int32_t i = 0; i < segmentInfos->size(); ++i) { count += segmentInfos->info(i)->docCount; } return count; } int32_t IndexWriter::numDocs() { SyncLock syncLock(this); int32_t count = docWriter ? docWriter->getNumDocsInRAM() : 0; for (int32_t i = 0; i < segmentInfos->size(); ++i) { SegmentInfoPtr info(segmentInfos->info(i)); count += info->docCount - info->getDelCount(); } return count; } bool IndexWriter::hasDeletions() { SyncLock syncLock(this); ensureOpen(); if (docWriter->hasDeletes()) { return true; } for (int32_t i = 0; i < segmentInfos->size(); ++i) { if (segmentInfos->info(i)->hasDeletions()) { return true; } } return false; } void IndexWriter::addDocument(const DocumentPtr& doc) { addDocument(doc, analyzer); } void IndexWriter::addDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer) { ensureOpen(); bool doFlush = false; bool success = false; try { LuceneException finally; try { doFlush = docWriter->addDocument(doc, analyzer); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { if (infoStream) { message(L"hit exception adding document"); } { SyncLock syncLock(this); // If docWriter has some aborted files that were never incref'd, then we clean them up here if (docWriter) { HashSet files(docWriter->abortedFiles()); if (files) { deleter->deleteNewFiles(files); } } } } finally.throwException(); if (doFlush) { flush(true, false, false); } } catch (std::bad_alloc& oom) { boost::throw_exception(handleOOM(oom, L"addDocument")); } } void IndexWriter::deleteDocuments(const TermPtr& term) { ensureOpen(); try { bool doFlush = docWriter->bufferDeleteTerm(term); if (doFlush) { flush(true, false, false); } } catch (std::bad_alloc& oom) { boost::throw_exception(handleOOM(oom, L"deleteDocuments(Term)")); } } void IndexWriter::deleteDocuments(Collection terms) { ensureOpen(); try { bool doFlush = docWriter->bufferDeleteTerms(terms); if (doFlush) { flush(true, false, false); } } catch (std::bad_alloc& oom) { boost::throw_exception(handleOOM(oom, L"deleteDocuments(VectorTerm)")); } } void IndexWriter::deleteDocuments(const QueryPtr& query) { ensureOpen(); bool doFlush = docWriter->bufferDeleteQuery(query); if (doFlush) { flush(true, false, false); } } void IndexWriter::deleteDocuments(Collection queries) { ensureOpen(); bool doFlush = docWriter->bufferDeleteQueries(queries); if (doFlush) { flush(true, false, false); } } void IndexWriter::updateDocument(const TermPtr& term, const DocumentPtr& doc) { ensureOpen(); updateDocument(term, doc, getAnalyzer()); } void IndexWriter::updateDocument(const TermPtr& term, const DocumentPtr& doc, const AnalyzerPtr& analyzer) { ensureOpen(); try { bool doFlush = false; bool success = false; LuceneException finally; try { doFlush = docWriter->updateDocument(term, doc, analyzer); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { if (infoStream) { message(L"hit exception updating document"); } { SyncLock syncLock(this); // If docWriter has some aborted files that were never incref'd, then we clean them up here if (docWriter) { HashSet files(docWriter->abortedFiles()); if (files) { deleter->deleteNewFiles(files); } } } } finally.throwException(); if (doFlush) { flush(true, false, false); } } catch (std::bad_alloc& oom) { boost::throw_exception(handleOOM(oom, L"updateDocument")); } } int32_t IndexWriter::getSegmentCount() { SyncLock syncLock(this); return segmentInfos->size(); } int32_t IndexWriter::getNumBufferedDocuments() { SyncLock syncLock(this); return docWriter->getNumDocsInRAM(); } int32_t IndexWriter::getDocCount(int32_t i) { SyncLock syncLock(this); return (i >= 0 && i < segmentInfos->size()) ? segmentInfos->info(i)->docCount : -1; } int32_t IndexWriter::getFlushCount() { SyncLock syncLock(this); return flushCount; } int32_t IndexWriter::getFlushDeletesCount() { SyncLock syncLock(this); return flushDeletesCount; } String IndexWriter::newSegmentName() { // Cannot synchronize on IndexWriter because that causes deadlock SyncLock segmentLock(segmentInfos); // Important to increment changeCount so that the segmentInfos is written on close. // Otherwise we could close, re-open and re-return the same segment name that was // previously returned which can cause problems at least with ConcurrentMergeScheduler. ++changeCount; return L"_" + StringUtils::toString(segmentInfos->counter++, StringUtils::CHARACTER_MAX_RADIX); } void IndexWriter::optimize() { optimize(true); } void IndexWriter::optimize(int32_t maxNumSegments) { optimize(maxNumSegments, true); } void IndexWriter::optimize(bool doWait) { optimize(1, doWait); } void IndexWriter::optimize(int32_t maxNumSegments, bool doWait) { ensureOpen(); if (maxNumSegments < 1) { boost::throw_exception(IllegalArgumentException(L"maxNumSegments must be >= 1; got " + StringUtils::toString(maxNumSegments))); } if (infoStream) { message(L"optimize: index now " + segString()); } flush(true, false, true); { SyncLock syncLock(this); resetMergeExceptions(); segmentsToOptimize.clear(); optimizeMaxNumSegments = maxNumSegments; int32_t numSegments = segmentInfos->size(); for (int32_t i = 0; i < numSegments; ++i) { segmentsToOptimize.add(segmentInfos->info(i)); } // Now mark all pending & running merges as optimize merge for (Collection::iterator merge = pendingMerges.begin(); merge != pendingMerges.end(); ++merge) { (*merge)->optimize = true; (*merge)->maxNumSegmentsOptimize = maxNumSegments; } for (SetOneMerge::iterator merge = runningMerges.begin(); merge != runningMerges.end(); ++merge) { (*merge)->optimize = true; (*merge)->maxNumSegmentsOptimize = maxNumSegments; } } maybeMerge(maxNumSegments, true); if (doWait) { { SyncLock syncLock(this); while (true) { if (hitOOM) { boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot complete optimize")); } if (!mergeExceptions.empty()) { // Forward any exceptions in background merge threads to the current thread for (Collection::iterator merge = mergeExceptions.begin(); merge != mergeExceptions.end(); ++merge) { if ((*merge)->optimize) { LuceneException err = (*merge)->getException(); if (!err.isNull()) { boost::throw_exception(IOException(L"background merge hit exception: " + (*merge)->segString(directory))); } } } } if (optimizeMergesPending()) { IndexWriter::doWait(); } else { break; } } } // If close is called while we are still running, throw an exception so the calling thread will know the // optimize did not complete ensureOpen(); } // NOTE: in the ConcurrentMergeScheduler case, when doWait is false, we can return immediately while background // threads accomplish the optimization } bool IndexWriter::optimizeMergesPending() { SyncLock syncLock(this); for (Collection::iterator merge = pendingMerges.begin(); merge != pendingMerges.end(); ++merge) { if ((*merge)->optimize) { return true; } } for (SetOneMerge::iterator merge = runningMerges.begin(); merge != runningMerges.end(); ++merge) { if ((*merge)->optimize) { return true; } } return false; } void IndexWriter::expungeDeletes(bool doWait) { ensureOpen(); if (infoStream) { message(L"expungeDeletes: index now " + segString()); } MergeSpecificationPtr spec; { SyncLock syncLock(this); spec = mergePolicy->findMergesToExpungeDeletes(segmentInfos); for (Collection::iterator merge = spec->merges.begin(); merge != spec->merges.end(); ++merge) { registerMerge(*merge); } } mergeScheduler->merge(shared_from_this()); if (doWait) { { SyncLock syncLock(this); bool running = true; while (running) { if (hitOOM) { boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot complete expungeDeletes")); } // Check each merge that MergePolicy asked us to do, to see if any of them are still running and // if any of them have hit an exception. running = false; for (Collection::iterator merge = spec->merges.begin(); merge != spec->merges.end(); ++merge) { if (pendingMerges.contains(*merge) || runningMerges.contains(*merge)) { running = true; } LuceneException err = (*merge)->getException(); if (!err.isNull()) { boost::throw_exception(IOException(L"background merge hit exception: " + (*merge)->segString(directory))); } } // If any of our merges are still running, wait if (running) { IndexWriter::doWait(); } } } } // NOTE: in the ConcurrentMergeScheduler case, when doWait is false, we can return immediately while background // threads accomplish the optimization } void IndexWriter::expungeDeletes() { expungeDeletes(true); } void IndexWriter::maybeMerge() { maybeMerge(false); } void IndexWriter::maybeMerge(bool optimize) { maybeMerge(1, optimize); } void IndexWriter::maybeMerge(int32_t maxNumSegmentsOptimize, bool optimize) { updatePendingMerges(maxNumSegmentsOptimize, optimize); mergeScheduler->merge(shared_from_this()); } void IndexWriter::updatePendingMerges(int32_t maxNumSegmentsOptimize, bool optimize) { SyncLock syncLock(this); BOOST_ASSERT(!optimize || maxNumSegmentsOptimize > 0); if (stopMerges) { return; } // Do not start new merges if we've hit std::bad_alloc if (hitOOM) { return; } MergeSpecificationPtr spec; if (optimize) { spec = mergePolicy->findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, segmentsToOptimize); if (spec) { for (Collection::iterator merge = spec->merges.begin(); merge != spec->merges.end(); ++merge) { (*merge)->optimize = true; (*merge)->maxNumSegmentsOptimize = maxNumSegmentsOptimize; } } } else { spec = mergePolicy->findMerges(segmentInfos); } if (spec) { for (Collection::iterator merge = spec->merges.begin(); merge != spec->merges.end(); ++merge) { registerMerge(*merge); } } } OneMergePtr IndexWriter::getNextMerge() { SyncLock syncLock(this); if (pendingMerges.empty()) { return OneMergePtr(); } else { // Advance the merge from pending to running OneMergePtr merge(pendingMerges.removeFirst()); runningMerges.add(merge); return merge; } } OneMergePtr IndexWriter::getNextExternalMerge() { SyncLock syncLock(this); if (pendingMerges.empty()) { return OneMergePtr(); } else { for (Collection::iterator merge = pendingMerges.begin(); merge != pendingMerges.end(); ++merge) { if ((*merge)->isExternal) { // Advance the merge from pending to running OneMergePtr running(*merge); runningMerges.add(*merge); pendingMerges.remove(merge); return running; } } } // All existing merges do not involve external segments return OneMergePtr(); } void IndexWriter::startTransaction(bool haveReadLock) { SyncLock syncLock(this); bool success = false; LuceneException finally; try { if (infoStream) { message(L"now start transaction"); } BOOST_ASSERT(docWriter->getNumBufferedDeleteTerms() == 0); // calling startTransaction with buffered delete terms not supported BOOST_ASSERT(docWriter->getNumDocsInRAM() == 0); // calling startTransaction with buffered documents not supported ensureOpen(); // If a transaction is trying to roll back (because addIndexes hit an exception) then wait here until that's done while (stopMerges) { doWait(); } success = true; } catch (LuceneException& e) { finally = e; } // Release the write lock if our caller held it, on hitting an exception if (!success && haveReadLock) { releaseRead(); } finally.throwException(); if (haveReadLock) { upgradeReadToWrite(); } else { acquireWrite(); } success = false; try { localRollbackSegmentInfos = boost::dynamic_pointer_cast(segmentInfos->clone()); BOOST_ASSERT(!hasExternalSegments()); localFlushedDocCount = docWriter->getFlushedDocCount(); // We must "protect" our files at this point from deletion in case we need to rollback deleter->incRef(segmentInfos, false); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { finishAddIndexes(); } finally.throwException(); } void IndexWriter::rollbackTransaction() { SyncLock syncLock(this); if (infoStream) { message(L"now rollback transaction"); } if (docWriter) { docWriter->setFlushedDocCount(localFlushedDocCount); } // Must finish merges before rolling back segmentInfos so merges don't hit exceptions on trying to commit // themselves, don't get files deleted out from under them, etc. finishMerges(false); // Keep the same segmentInfos instance but replace all of its SegmentInfo instances. This is so the next // attempt to commit using this instance of IndexWriter will always write to a new generation ("write once"). segmentInfos->clear(); segmentInfos->addAll(localRollbackSegmentInfos); localRollbackSegmentInfos.reset(); // This must come after we rollback segmentInfos, so that if a commit() kicks off it does not see the // segmentInfos with external segments. finishAddIndexes(); // Ask deleter to locate unreferenced files we had created & remove them deleter->checkpoint(segmentInfos, false); // Remove the incRef we did in startTransaction deleter->decRef(segmentInfos); // Also ask deleter to remove any newly created files that were never incref'd; this "garbage" is created // when a merge kicks off but aborts part way through before it had a chance to incRef the files it had // partially created deleter->refresh(); notifyAll(); BOOST_ASSERT(!hasExternalSegments()); } void IndexWriter::commitTransaction() { SyncLock syncLock(this); if (infoStream) { message(L"now commit transaction"); } // Give deleter a chance to remove files now checkpoint(); // Remove the incRef we did in startTransaction. deleter->decRef(localRollbackSegmentInfos); localRollbackSegmentInfos.reset(); BOOST_ASSERT(!hasExternalSegments()); finishAddIndexes(); } void IndexWriter::rollback() { ensureOpen(); // Ensure that only one thread actually gets to do the closing if (shouldClose()) { rollbackInternal(); } } void IndexWriter::rollbackInternal() { bool success = false; if (infoStream) { message(L"rollback"); } docWriter->pauseAllThreads(); LuceneException finally; try { finishMerges(false); // Must pre-close these two, in case they increment changeCount so that we can then set it to false before // calling closeInternal mergePolicy->close(); mergeScheduler->close(); { SyncLock syncLock(this); if (pendingCommit) { pendingCommit->rollbackCommit(directory); deleter->decRef(pendingCommit); pendingCommit.reset(); notifyAll(); } // Keep the same segmentInfos instance but replace all of its SegmentInfo instances. This is so the next // attempt to commit using this instance of IndexWriter will always write to a new generation ("write once"). segmentInfos->clear(); segmentInfos->addAll(rollbackSegmentInfos); BOOST_ASSERT(!hasExternalSegments()); docWriter->abort(); bool test = testPoint(L"rollback before checkpoint"); BOOST_ASSERT(test); // Ask deleter to locate unreferenced files & remove them deleter->checkpoint(segmentInfos, false); deleter->refresh(); } // Don't bother saving any changes in our segmentInfos readerPool->clear(SegmentInfosPtr()); lastCommitChangeCount = changeCount; success = true; } catch (std::bad_alloc& oom) { finally = handleOOM(oom, L"rollbackInternal"); } catch (LuceneException& e) { finally = e; } { SyncLock syncLock(this); if (!success) { docWriter->resumeAllThreads(); closing = false; notifyAll(); if (infoStream) { message(L"hit exception during rollback"); } } } finally.throwException(); closeInternal(false); } void IndexWriter::deleteAll() { SyncLock syncLock(this); bool success = false; docWriter->pauseAllThreads(); LuceneException finally; try { // Abort any running merges finishMerges(false); // Remove any buffered docs docWriter->abort(); docWriter->setFlushedDocCount(0); // Remove all segments segmentInfos->clear(); // Ask deleter to locate unreferenced files & remove them deleter->checkpoint(segmentInfos, false); deleter->refresh(); // Don't bother saving any changes in our segmentInfos readerPool->clear(SegmentInfosPtr()); // Mark that the index has changed ++changeCount; success = true; } catch (std::bad_alloc& oom) { finally = handleOOM(oom, L"deleteAll"); } catch (LuceneException& e) { finally = e; } docWriter->resumeAllThreads(); if (!success && infoStream) { message(L"hit exception during deleteAll"); } finally.throwException(); } void IndexWriter::finishMerges(bool waitForMerges) { SyncLock syncLock(this); if (!waitForMerges) { stopMerges = true; // Abort all pending and running merges for (Collection::iterator merge = pendingMerges.begin(); merge != pendingMerges.end(); ++merge) { if (infoStream) { message(L"now abort pending merge " + (*merge)->segString(directory)); } (*merge)->abort(); mergeFinish(*merge); } pendingMerges.clear(); for (SetOneMerge::iterator merge = runningMerges.begin(); merge != runningMerges.end(); ++merge) { if (infoStream) { message(L"now abort running merge " + (*merge)->segString(directory)); } (*merge)->abort(); } // Ensure any running addIndexes finishes. It's fine if a new one attempts to start because its merges // will quickly see the stopMerges == true and abort. acquireRead(); releaseRead(); // These merges periodically check whether they have been aborted, and stop if so. We wait here to make // sure they all stop. It should not take very long because the merge threads periodically check if they // are aborted. while (!runningMerges.empty()) { if (infoStream) { message(L"now wait for " + StringUtils::toString(runningMerges.size()) + L" running merge to abort"); } doWait(); } stopMerges = false; notifyAll(); BOOST_ASSERT(mergingSegments.empty()); if (infoStream) { message(L"all running merges have aborted"); } } else { // waitForMerges() will ensure any running addIndexes finishes. It's fine if a new one attempts to start // because from our caller above the call will see that we are in the process of closing, and will throw // an AlreadyClosed exception. IndexWriter::waitForMerges(); } } void IndexWriter::waitForMerges() { SyncLock syncLock(this); // Ensure any running addIndexes finishes. acquireRead(); releaseRead(); while (!pendingMerges.empty() || !runningMerges.empty()) { doWait(); } // sanity check BOOST_ASSERT(mergingSegments.empty()); } void IndexWriter::checkpoint() { SyncLock syncLock(this); ++changeCount; deleter->checkpoint(segmentInfos, false); } void IndexWriter::finishAddIndexes() { releaseWrite(); } void IndexWriter::blockAddIndexes(bool includePendingClose) { acquireRead(); bool success = false; LuceneException finally; try { // Make sure we are still open since we could have waited quite a while for last addIndexes to finish ensureOpen(includePendingClose); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { releaseRead(); } finally.throwException(); } void IndexWriter::resumeAddIndexes() { releaseRead(); } void IndexWriter::resetMergeExceptions() { SyncLock syncLock(this); mergeExceptions.clear(); ++mergeGen; } void IndexWriter::noDupDirs(Collection dirs) { Collection dups(Collection::newInstance()); for (Collection::iterator dir = dirs.begin(); dir != dirs.end(); ++dir) { for (Collection::iterator dup = dups.begin(); dup != dups.end(); ++dup) { if (*dup == *dir) { boost::throw_exception(IllegalArgumentException(L"Directory " + (*dir)->getLockID() + L" appears more than once")); } } if (*dir == directory) { boost::throw_exception(IllegalArgumentException(L"Cannot add directory to itself")); } dups.add(*dir); } } void IndexWriter::addIndexesNoOptimize(Collection dirs) { ensureOpen(); noDupDirs(dirs); // Do not allow add docs or deletes while we are running docWriter->pauseAllThreads(); LuceneException finally; try { if (infoStream) { message(L"flush at addIndexesNoOptimize"); } flush(true, false, true); bool success = false; startTransaction(false); try { int32_t docCount = 0; { SyncLock syncLock(this); ensureOpen(); for (Collection::iterator dir = dirs.begin(); dir != dirs.end(); ++dir) { if (directory == *dir) { // cannot add this index: segments may be deleted in merge before added boost::throw_exception(IllegalArgumentException(L"Cannot add this index to itself")); } SegmentInfosPtr sis(newLucene()); // read infos from dir sis->read(*dir); for (int32_t j = 0; j < sis->size(); ++j) { SegmentInfoPtr info(sis->info(j)); BOOST_ASSERT(!segmentInfos->contains(info)); docCount += info->docCount; segmentInfos->add(info); // add each info } } } // Notify DocumentsWriter that the flushed count just increased docWriter->updateFlushedDocCount(docCount); maybeMerge(); ensureOpen(); // If after merging there remain segments in the index that are in a different directory, just copy these // over into our index. This is necessary (before finishing the transaction) to avoid leaving the index // in an unusable (inconsistent) state. resolveExternalSegments(); ensureOpen(); success = true; } catch (LuceneException& e) { finally = e; } if (success) { commitTransaction(); } else { rollbackTransaction(); } } catch (std::bad_alloc& oom) { finally = handleOOM(oom, L"addIndexesNoOptimize"); } catch (LuceneException& e) { finally = e; } if (docWriter) { docWriter->resumeAllThreads(); } finally.throwException(); } bool IndexWriter::hasExternalSegments() { return segmentInfos->hasExternalSegments(directory); } void IndexWriter::resolveExternalSegments() { bool any = false; bool done = false; while (!done) { SegmentInfoPtr info; OneMergePtr merge; { SyncLock syncLock(this); if (stopMerges) { boost::throw_exception(MergeAbortedException(L"rollback() was called or addIndexes* hit an unhandled exception")); } int32_t numSegments = segmentInfos->size(); done = true; for (int32_t i = 0; i < numSegments; ++i) { info = segmentInfos->info(i); if (info->dir != directory) { done = false; OneMergePtr newMerge(newLucene(segmentInfos->range(i, i + 1), boost::dynamic_pointer_cast(mergePolicy) && getUseCompoundFile())); // Returns true if no running merge conflicts with this one (and, records this merge as // pending), ie, this segment is not currently being merged if (registerMerge(newMerge)) { merge = newMerge; // If this segment is not currently being merged, then advance it to running & run // the merge ourself (below) pendingMerges.remove(merge); runningMerges.add(merge); break; } } } if (!done && !merge) { // We are not yet done (external segments still exist in segmentInfos), yet, all such segments // are currently "covered" by a pending or running merge. We now try to grab any pending merge // that involves external segments merge = getNextExternalMerge(); } if (!done && !merge) { // We are not yet done, and, all external segments fall under merges that the merge scheduler is // currently running. So, we now wait and check back to see if the merge has completed. doWait(); } } if (merge) { any = true; IndexWriter::merge(merge); } } if (any) { // Sometimes, on copying an external segment over, more merges may become necessary mergeScheduler->merge(shared_from_this()); } } void IndexWriter::addIndexes(Collection readers) { ensureOpen(); // Do not allow add docs or deletes while we are running docWriter->pauseAllThreads(); // We must pre-acquire a read lock here (and upgrade to write lock in startTransaction below) so that no // other addIndexes is allowed to start up after we have flushed & optimized but before we then start our // transaction. This is because the merging below requires that only one segment is present in the index acquireRead(); LuceneException finally; try { SegmentInfoPtr info; String mergedName; SegmentMergerPtr merger; bool success = false; try { flush(true, false, true); optimize(); // start with zero or 1 seg success = true; } catch (LuceneException& e) { finally = e; } // Take care to release the read lock if we hit an exception before starting the transaction if (!success) { releaseRead(); } finally.throwException(); // true means we already have a read lock; if this call hits an exception it will release the write lock startTransaction(true); try { mergedName = newSegmentName(); merger = newLucene(shared_from_this(), mergedName, OneMergePtr()); SegmentReaderPtr sReader; { SyncLock syncLock(this); if (segmentInfos->size() == 1) { // add existing index, if any sReader = readerPool->get(segmentInfos->info(0), true, BufferedIndexInput::BUFFER_SIZE, -1); } } success = false; try { if (sReader) { merger->add(sReader); } for (Collection::iterator i = readers.begin(); i != readers.end(); ++i) { merger->add(*i); } int32_t docCount = merger->merge(); // merge 'em { SyncLock syncLock(this); segmentInfos->clear(); // pop old infos & add new info = newLucene(mergedName, docCount, directory, false, true, -1, L"", false, merger->hasProx()); setDiagnostics(info, L"addIndexes(Collection)"); segmentInfos->add(info); } // Notify DocumentsWriter that the flushed count just increased docWriter->updateFlushedDocCount(docCount); success = true; } catch (LuceneException& e) { finally = e; } if (sReader) { readerPool->release(sReader); } } catch (LuceneException& e) { finally = e; } if (!success) { if (infoStream) { message(L"hit exception in addIndexes during merge"); } rollbackTransaction(); } else { commitTransaction(); } finally.throwException(); if (boost::dynamic_pointer_cast(mergePolicy) && getUseCompoundFile()) { HashSet files; { SyncLock syncLock(this); // Must incRef our files so that if another thread is running merge/optimize, it doesn't delete our // segment's files before we have a change to finish making the compound file. if (segmentInfos->contains(info)) { files = info->files(); deleter->incRef(files); } } if (files) { success = false; startTransaction(false); try { merger->createCompoundFile(mergedName + L".cfs"); { SyncLock syncLock(this); info->setUseCompoundFile(true); } success = true; } catch (LuceneException& e) { finally = e; } { SyncLock syncLock(this); deleter->decRef(files); } if (!success) { if (infoStream) { message(L"hit exception building compound file in addIndexes during merge"); } rollbackTransaction(); } else { commitTransaction(); } } } } catch (std::bad_alloc& oom) { finally = handleOOM(oom, L"addIndexes(Collection)"); } catch (LuceneException& e) { finally = e; } if (docWriter) { docWriter->resumeAllThreads(); } finally.throwException(); } void IndexWriter::doAfterFlush() { // override } void IndexWriter::doBeforeFlush() { // override } void IndexWriter::prepareCommit() { ensureOpen(); prepareCommit(MapStringString()); } void IndexWriter::prepareCommit(MapStringString commitUserData) { if (hitOOM) { boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot commit")); } if (pendingCommit) { boost::throw_exception(IllegalStateException(L"prepareCommit was already called with no corresponding call to commit")); } if (infoStream) { message(L"prepareCommit: flush"); } flush(true, true, true); startCommit(0, commitUserData); } void IndexWriter::commit(int64_t sizeInBytes) { SyncLock messageLock(commitLock); startCommit(sizeInBytes, MapStringString()); finishCommit(); } void IndexWriter::commit() { commit(MapStringString()); } void IndexWriter::commit(MapStringString commitUserData) { ensureOpen(); if (infoStream) { message(L"commit: start"); } { SyncLock messageLock(commitLock); if (infoStream) { message(L"commit: enter lock"); } if (!pendingCommit) { if (infoStream) { message(L"commit: now prepare"); } prepareCommit(commitUserData); } else if (infoStream) { message(L"commit: already prepared"); } finishCommit(); } } void IndexWriter::finishCommit() { SyncLock syncLock(this); if (pendingCommit) { LuceneException finally; try { if (infoStream) { message(L"commit: pendingCommit != null"); } pendingCommit->finishCommit(directory); if (infoStream) { message(L"commit: wrote segments file \"" + pendingCommit->getCurrentSegmentFileName() + L"\""); } lastCommitChangeCount = pendingCommitChangeCount; segmentInfos->updateGeneration(pendingCommit); segmentInfos->setUserData(pendingCommit->getUserData()); setRollbackSegmentInfos(pendingCommit); deleter->checkpoint(pendingCommit, true); } catch (LuceneException& e) { finally = e; } deleter->decRef(pendingCommit); pendingCommit.reset(); notifyAll(); finally.throwException(); } else if (infoStream) { message(L"commit: pendingCommit == null; skip"); } if (infoStream) { message(L"commit: done"); } } void IndexWriter::flush(bool triggerMerge, bool flushDocStores, bool flushDeletes) { // We can be called during close, when closing = true, so we must pass false to ensureOpen ensureOpen(false); if (doFlush(flushDocStores, flushDeletes) && triggerMerge) { maybeMerge(); } } bool IndexWriter::doFlush(bool flushDocStores, bool flushDeletes) { TestScope testScope(L"IndexWriter", L"doFlush"); SyncLock syncLock(this); bool success = false; LuceneException finally; try { try { success = doFlushInternal(flushDocStores, flushDeletes); } catch (LuceneException& e) { finally = e; } if (docWriter->doBalanceRAM()) { docWriter->balanceRAM(); } finally.throwException(); } catch (LuceneException& e) { finally = e; } docWriter->clearFlushPending(); finally.throwException(); return success; } bool IndexWriter::doFlushInternal(bool flushDocStores, bool flushDeletes) { SyncLock syncLock(this); if (hitOOM) { boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot flush")); } ensureOpen(false); BOOST_ASSERT(testPoint(L"startDoFlush")); doBeforeFlush(); ++flushCount; // If we are flushing because too many deletes accumulated, then we should apply the deletes to free RAM if (docWriter->doApplyDeletes()) { flushDeletes = true; } // Make sure no threads are actively adding a document. Returns true if docWriter is currently aborting, in // which case we skip flushing this segment if (infoStream) { message(L"flush: now pause all indexing threads"); } if (docWriter->pauseAllThreads()) { docWriter->resumeAllThreads(); return false; } bool flushDocs = false; LuceneException finally; try { SegmentInfoPtr newSegment; int32_t numDocs = docWriter->getNumDocsInRAM(); // Always flush docs if there are any flushDocs = (numDocs > 0); String docStoreSegment(docWriter->getDocStoreSegment()); BOOST_ASSERT(!docStoreSegment.empty() || numDocs == 0); if (docStoreSegment.empty()) { flushDocStores = false; } int32_t docStoreOffset = docWriter->getDocStoreOffset(); bool docStoreIsCompoundFile = false; if (infoStream) { message(L" flush: segment=" + docWriter->getSegment() + L" docStoreSegment=" + StringUtils::toString(docWriter->getDocStoreSegment()) + L" docStoreOffset=" + StringUtils::toString(docStoreOffset) + L" flushDocs=" + StringUtils::toString(flushDocs) + L" flushDeletes=" + StringUtils::toString(flushDeletes) + L" flushDocStores=" + StringUtils::toString(flushDocStores) + L" numDocs=" + StringUtils::toString(numDocs) + L" numBufDelTerms=" + StringUtils::toString(docWriter->getNumBufferedDeleteTerms())); message(L" index before flush " + segString()); } // Check if the doc stores must be separately flushed because other segments, besides the one we are // about to flush, reference it if (flushDocStores && (!flushDocs || docWriter->getSegment() != docWriter->getDocStoreSegment())) { // We must separately flush the doc store if (infoStream) { message(L" flush shared docStore segment " + docStoreSegment); } docStoreIsCompoundFile = IndexWriter::flushDocStores(); flushDocStores = false; } String segment(docWriter->getSegment()); // If we are flushing docs, segment must not be null BOOST_ASSERT(!segment.empty() || !flushDocs); if (flushDocs) { bool success = false; int32_t flushedDocCount; try { flushedDocCount = docWriter->flush(flushDocStores); if (infoStream) { message(L"flushedFiles=" + StringUtils::toString(docWriter->getFlushedFiles())); } success = true; } catch (LuceneException& e) { finally = e; } if (!success) { if (infoStream) { message(L"hit exception flushing segment " + segment); } deleter->refresh(segment); } finally.throwException(); if (docStoreOffset == 0 && flushDocStores) { // This means we are flushing private doc stores with this segment, so it will not be shared // with other segments BOOST_ASSERT(!docStoreSegment.empty()); BOOST_ASSERT(docStoreSegment == segment); docStoreOffset = -1; docStoreIsCompoundFile = false; docStoreSegment.clear(); } // Create new SegmentInfo, but do not add to our segmentInfos until deletes are flushed successfully. newSegment = newLucene(segment, flushedDocCount, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, docWriter->hasProx()); setDiagnostics(newSegment, L"flush"); } docWriter->pushDeletes(); if (flushDocs) { segmentInfos->add(newSegment); checkpoint(); } if (flushDocs && mergePolicy->useCompoundFile(segmentInfos, newSegment)) { // Now build compound file bool success = false; try { docWriter->createCompoundFile(segment); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { if (infoStream) { message(L"hit exception creating compound file for newly flushed segment " + segment); } deleter->deleteFile(segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION()); } finally.throwException(); newSegment->setUseCompoundFile(true); checkpoint(); } if (flushDeletes) { applyDeletes(); } if (flushDocs) { checkpoint(); } doAfterFlush(); } catch (std::bad_alloc& oom) { finally = handleOOM(oom, L"doFlush"); flushDocs = false; } catch (LuceneException& e) { finally = e; } docWriter->resumeAllThreads(); finally.throwException(); return flushDocs; } int64_t IndexWriter::ramSizeInBytes() { ensureOpen(); return docWriter->getRAMUsed(); } int32_t IndexWriter::numRamDocs() { SyncLock syncLock(this); ensureOpen(); return docWriter->getNumDocsInRAM(); } int32_t IndexWriter::ensureContiguousMerge(const OneMergePtr& merge) { int32_t first = segmentInfos->find(merge->segments->info(0)); if (first == -1) { boost::throw_exception(MergeException(L"Could not find segment " + merge->segments->info(0)->name + L" in current index " + segString())); } int32_t numSegments = segmentInfos->size(); int32_t numSegmentsToMerge = merge->segments->size(); for (int32_t i = 0; i < numSegmentsToMerge; ++i) { SegmentInfoPtr info(merge->segments->info(i)); if (first + i >= numSegments || !segmentInfos->info(first + i)->equals(info)) { if (!segmentInfos->contains(info)) { boost::throw_exception(MergeException(L"MergePolicy selected a segment (" + info->name + L") that is not in the current index " + segString())); } else { boost::throw_exception(MergeException(L"MergePolicy selected non-contiguous segments to merge (" + merge->segString(directory) + L" vs " + segString() + L"), which IndexWriter (currently) cannot handle")); } } } return first; } void IndexWriter::commitMergedDeletes(const OneMergePtr& merge, const SegmentReaderPtr& mergeReader) { SyncLock syncLock(this); BOOST_ASSERT(testPoint(L"startCommitMergeDeletes")); SegmentInfosPtr sourceSegments(merge->segments); if (infoStream) { message(L"commitMergeDeletes " + merge->segString(directory)); } // Carefully merge deletes that occurred after we started merging int32_t docUpto = 0; int32_t delCount = 0; for (int32_t i = 0; i < sourceSegments->size(); ++i) { SegmentInfoPtr info(sourceSegments->info(i)); int32_t docCount = info->docCount; SegmentReaderPtr previousReader(merge->readersClone[i]); SegmentReaderPtr currentReader(merge->readers[i]); if (previousReader->hasDeletions()) { // There were deletes on this segment when the merge started. The merge has collapsed away those deletes, // but if new deletes were flushed since the merge started, we must now carefully keep any newly flushed // deletes but mapping them to the new docIDs. if (currentReader->numDeletedDocs() > previousReader->numDeletedDocs()) { // This means this segment has had new deletes committed since we started the merge, so we must merge them for (int32_t j = 0; j < docCount; ++j) { if (previousReader->isDeleted(j)) { BOOST_ASSERT(currentReader->isDeleted(j)); } else { if (currentReader->isDeleted(j)) { mergeReader->doDelete(docUpto); ++delCount; } ++docUpto; } } } else { docUpto += docCount - previousReader->numDeletedDocs(); } } else if (currentReader->hasDeletions()) { // This segment had no deletes before but now it does for (int32_t j = 0; j < docCount; ++j) { if (currentReader->isDeleted(j)) { mergeReader->doDelete(docUpto); ++delCount; } ++docUpto; } } else { // No deletes before or after docUpto += info->docCount; } } BOOST_ASSERT(mergeReader->numDeletedDocs() == delCount); mergeReader->_hasChanges = (delCount > 0); } bool IndexWriter::commitMerge(const OneMergePtr& merge, const SegmentMergerPtr& merger, int32_t mergedDocCount, const SegmentReaderPtr& mergedReader) { SyncLock syncLock(this); BOOST_ASSERT(testPoint(L"startCommitMerge")); if (hitOOM) { boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot complete merge")); } if (infoStream) { message(L"commitMerge: " + merge->segString(directory) + L" index=" + segString()); } BOOST_ASSERT(merge->registerDone); // If merge was explicitly aborted, or, if rollback() or rollbackTransaction() had been called since our merge // started (which results in an unqualified deleter.refresh() call that will remove any index file that current // segments does not reference), we abort this merge if (merge->isAborted()) { if (infoStream) { message(L"commitMerge: skipping merge " + merge->segString(directory) + L": it was aborted"); } return false; } int32_t start = ensureContiguousMerge(merge); commitMergedDeletes(merge, mergedReader); docWriter->remapDeletes(segmentInfos, merger->getDocMaps(), merger->getDelCounts(), merge, mergedDocCount); // If the doc store we are using has been closed and is in now compound format (but wasn't when we started), // then we will switch to the compound format as well setMergeDocStoreIsCompoundFile(merge); merge->info->setHasProx(merger->hasProx()); segmentInfos->remove(start, start + merge->segments->size()); BOOST_ASSERT(!segmentInfos->contains(merge->info)); segmentInfos->add(start, merge->info); closeMergeReaders(merge, false); // Must note the change to segmentInfos so any commits in-flight don't lose it checkpoint(); // If the merged segments had pending changes, clear them so that they don't bother writing // them to disk, updating SegmentInfo, etc. readerPool->clear(merge->segments); if (merge->optimize) { // cascade the optimize segmentsToOptimize.add(merge->info); } return true; } LuceneException IndexWriter::handleMergeException(const LuceneException& exc, const OneMergePtr& merge) { if (infoStream) { message(L"handleMergeException: merge=" + merge->segString(directory) + L" exc=" + exc.getError()); } // Set the exception on the merge, so if optimize() is waiting on us it sees the root cause exception merge->setException(exc); addMergeException(merge); switch (exc.getType()) { case LuceneException::MergeAborted: // We can ignore this exception (it happens when close(false) or rollback is called), unless the // merge involves segments from external directories, in which case we must throw it so, for // example, the rollbackTransaction code in addIndexes* is executed. if (merge->isExternal) { return exc; } break; case LuceneException::IO: case LuceneException::Runtime: return exc; default: return RuntimeException(); // Should not get here } return LuceneException(); } void IndexWriter::merge(const OneMergePtr& merge) { bool success = false; try { LuceneException finally; try { try { mergeInit(merge); if (infoStream) { message(L"now merge\n merge=" + merge->segString(directory) + L"\n index=" + segString()); } mergeMiddle(merge); mergeSuccess(merge); success = true; } catch (LuceneException& e) { finally = handleMergeException(e, merge); } { SyncLock syncLock(this); mergeFinish(merge); if (!success) { if (infoStream) { message(L"hit exception during merge"); } if (merge->info && !segmentInfos->contains(merge->info)) { deleter->refresh(merge->info->name); } } // This merge (and, generally, any change to the segments) may now enable // new merges, so we call merge policy & update pending merges. if (success && !merge->isAborted() && !closed && !closing) { updatePendingMerges(merge->maxNumSegmentsOptimize, merge->optimize); } } } catch (LuceneException& e) { finally = e; } finally.throwException(); } catch (std::bad_alloc& oom) { boost::throw_exception(handleOOM(oom, L"merge")); } } void IndexWriter::mergeSuccess(const OneMergePtr& merge) { // override } bool IndexWriter::registerMerge(const OneMergePtr& merge) { SyncLock syncLock(this); if (merge->registerDone) { return true; } if (stopMerges) { merge->abort(); boost::throw_exception(MergeAbortedException(L"merge is aborted: " + merge->segString(directory))); } int32_t count = merge->segments->size(); bool isExternal = false; for (int32_t i = 0; i < count; ++i) { SegmentInfoPtr info(merge->segments->info(i)); if (mergingSegments.contains(info)) { return false; } if (!segmentInfos->contains(info)) { return false; } if (info->dir != directory) { isExternal = true; } if (segmentsToOptimize.contains(info)) { merge->optimize = true; merge->maxNumSegmentsOptimize = optimizeMaxNumSegments; } } ensureContiguousMerge(merge); pendingMerges.add(merge); if (infoStream) { message(L"add merge to pendingMerges: " + merge->segString(directory) + L" [total " + StringUtils::toString(pendingMerges.size()) + L" pending]"); } merge->mergeGen = mergeGen; merge->isExternal = isExternal; // OK it does not conflict; now record that this merge is running (while synchronized) // to avoid race condition where two conflicting merges from different threads, start for (int32_t i = 0; i < count; ++i) { mergingSegments.add(merge->segments->info(i)); } // Merge is now registered merge->registerDone = true; return true; } void IndexWriter::mergeInit(const OneMergePtr& merge) { SyncLock syncLock(this); bool success = false; LuceneException finally; try { _mergeInit(merge); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { mergeFinish(merge); } finally.throwException(); } void IndexWriter::_mergeInit(const OneMergePtr& merge) { SyncLock syncLock(this); bool test = testPoint(L"startMergeInit"); BOOST_ASSERT(test); BOOST_ASSERT(merge->registerDone); BOOST_ASSERT(!merge->optimize || merge->maxNumSegmentsOptimize > 0); if (hitOOM) { boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot merge")); } if (merge->info) { // mergeInit already done return; } if (merge->isAborted()) { return; } applyDeletes(); SegmentInfosPtr sourceSegments(merge->segments); int32_t end = sourceSegments->size(); // Check whether this merge will allow us to skip merging the doc stores (stored field & vectors). // This is a very substantial optimization (saves tons of IO). DirectoryPtr lastDir(directory); String lastDocStoreSegment; int32_t next = -1; bool mergeDocStores = false; bool doFlushDocStore = false; String currentDocStoreSegment(docWriter->getDocStoreSegment()); // Test each segment to be merged: check if we need to flush/merge doc stores for (int32_t i = 0; i < end; ++i) { SegmentInfoPtr si(sourceSegments->info(i)); // If it has deletions we must merge the doc stores if (si->hasDeletions()) { mergeDocStores = true; } // If it has its own (private) doc stores we must merge the doc stores if (si->getDocStoreOffset() == -1) { mergeDocStores = true; } // If it has a different doc store segment than previous segments, we must merge the doc stores String docStoreSegment(si->getDocStoreSegment()); if (docStoreSegment.empty()) { mergeDocStores = true; } else if (lastDocStoreSegment.empty()) { lastDocStoreSegment = docStoreSegment; } else if (lastDocStoreSegment != docStoreSegment) { mergeDocStores = true; } // Segments' docScoreOffsets must be in-order, contiguous. For the default merge policy now // this will always be the case but for an arbitrary merge policy this may not be the case if (next == -1) { next = si->getDocStoreOffset() + si->docCount; } else if (next != si->getDocStoreOffset()) { mergeDocStores = true; } else { next = si->getDocStoreOffset() + si->docCount; } // If the segment comes from a different directory we must merge if (lastDir != si->dir) { mergeDocStores = true; } // If the segment is referencing the current "live" doc store outputs then we must merge if (si->getDocStoreOffset() != -1 && !currentDocStoreSegment.empty() && si->getDocStoreSegment() == currentDocStoreSegment) { doFlushDocStore = true; } } // if a mergedSegmentWarmer is installed, we must merge the doc stores because we will open a full // SegmentReader on the merged segment if (!mergeDocStores && mergedSegmentWarmer && !currentDocStoreSegment.empty() && !lastDocStoreSegment.empty() && lastDocStoreSegment == currentDocStoreSegment) { mergeDocStores = true; } int32_t docStoreOffset; String docStoreSegment; bool docStoreIsCompoundFile; if (mergeDocStores) { docStoreOffset = -1; docStoreSegment.clear(); docStoreIsCompoundFile = false; } else { SegmentInfoPtr si(sourceSegments->info(0)); docStoreOffset = si->getDocStoreOffset(); docStoreSegment = si->getDocStoreSegment(); docStoreIsCompoundFile = si->getDocStoreIsCompoundFile(); } if (mergeDocStores && doFlushDocStore) { // SegmentMerger intends to merge the doc stores (stored fields, vectors), and at // least one of the segments to be merged refers to the currently live doc stores. if (infoStream) { message(L"now flush at merge"); } doFlush(true, false); } merge->mergeDocStores = mergeDocStores; // Bind a new segment name here so even with ConcurrentMergePolicy we keep deterministic segment names. merge->info = newLucene(newSegmentName(), 0, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, false); MapStringString details(MapStringString::newInstance()); details.put(L"optimize", StringUtils::toString(merge->optimize)); details.put(L"mergeFactor", StringUtils::toString(end)); details.put(L"mergeDocStores", StringUtils::toString(mergeDocStores)); setDiagnostics(merge->info, L"merge", details); // Also enroll the merged segment into mergingSegments; this prevents it from getting // selected for a merge after our merge is done but while we are building the CFS mergingSegments.add(merge->info); } void IndexWriter::setDiagnostics(const SegmentInfoPtr& info, const String& source) { setDiagnostics(info, source, MapStringString()); } void IndexWriter::setDiagnostics(const SegmentInfoPtr& info, const String& source, MapStringString details) { MapStringString diagnostics(MapStringString::newInstance()); diagnostics.put(L"source", source); diagnostics.put(L"lucene.version", Constants::LUCENE_VERSION); diagnostics.put(L"os", Constants::OS_NAME); if (details) { diagnostics.putAll(details.begin(), details.end()); } info->setDiagnostics(diagnostics); } void IndexWriter::mergeFinish(const OneMergePtr& merge) { SyncLock syncLock(this); // Optimize, addIndexes or finishMerges may be waiting on merges to finish. notifyAll(); // It's possible we are called twice, eg if there was an exception inside mergeInit if (merge->registerDone) { SegmentInfosPtr sourceSegments(merge->segments); int32_t end = sourceSegments->size(); for (int32_t i = 0; i < end; ++i) { mergingSegments.remove(sourceSegments->info(i)); } mergingSegments.remove(merge->info); merge->registerDone = false; } runningMerges.remove(merge); } void IndexWriter::setMergeDocStoreIsCompoundFile(const OneMergePtr& merge) { SyncLock syncLock(this); String mergeDocStoreSegment(merge->info->getDocStoreSegment()); if (!mergeDocStoreSegment.empty() && !merge->info->getDocStoreIsCompoundFile()) { int32_t size = segmentInfos->size(); for (int32_t i = 0; i < size; ++i) { SegmentInfoPtr info(segmentInfos->info(i)); String docStoreSegment(info->getDocStoreSegment()); if (!docStoreSegment.empty() && docStoreSegment == mergeDocStoreSegment && info->getDocStoreIsCompoundFile()) { merge->info->setDocStoreIsCompoundFile(true); break; } } } } void IndexWriter::closeMergeReaders(const OneMergePtr& merge, bool suppressExceptions) { SyncLock syncLock(this); int32_t numSegments = merge->segments->size(); if (suppressExceptions) { // Suppress any new exceptions so we throw the original cause for (int32_t i = 0; i < numSegments; ++i) { if (merge->readers[i]) { try { readerPool->release(merge->readers[i], false); } catch (...) { } merge->readers[i].reset(); } if (merge->readersClone[i]) { try { merge->readersClone[i]->close(); } catch (...) { } // This was a private clone and we had the only reference BOOST_ASSERT(merge->readersClone[i]->getRefCount() == 0); merge->readersClone[i].reset(); } } } else { for (int32_t i = 0; i < numSegments; ++i) { if (merge->readers[i]) { readerPool->release(merge->readers[i], true); merge->readers[i].reset(); } if (merge->readersClone[i]) { merge->readersClone[i]->close(); // This was a private clone and we had the only reference BOOST_ASSERT(merge->readersClone[i]->getRefCount() == 0); merge->readersClone[i].reset(); } } } } int32_t IndexWriter::mergeMiddle(const OneMergePtr& merge) { merge->checkAborted(directory); String mergedName(merge->info->name); int32_t mergedDocCount = 0; SegmentInfosPtr sourceSegments(merge->segments); int32_t numSegments = sourceSegments->size(); if (infoStream) { message(L"merging " + merge->segString(directory)); } SegmentMergerPtr merger(newLucene(shared_from_this(), mergedName, merge)); merge->readers = Collection::newInstance(numSegments); merge->readersClone = Collection::newInstance(numSegments); bool mergeDocStores = false; String currentDocStoreSegment; { SyncLock syncLock(this); currentDocStoreSegment = docWriter->getDocStoreSegment(); } bool currentDSSMerged = false; LuceneException finally; // This is try/finally to make sure merger's readers are closed bool success = false; try { int32_t totDocCount = 0; for (int32_t i = 0; i < numSegments; ++i) { SegmentInfoPtr info(sourceSegments->info(i)); // Hold onto the "live" reader; we will use this to commit merged deletes merge->readers[i] = readerPool->get(info, merge->mergeDocStores, MERGE_READ_BUFFER_SIZE, -1); SegmentReaderPtr reader(merge->readers[i]); // We clone the segment readers because other deletes may come in while we're merging so we need readers that will not change merge->readersClone[i] = boost::dynamic_pointer_cast(reader->clone(true)); SegmentReaderPtr clone(merge->readersClone[i]); merger->add(clone); if (clone->hasDeletions()) { mergeDocStores = true; } if (info->getDocStoreOffset() != -1 && !currentDocStoreSegment.empty()) { currentDSSMerged = currentDSSMerged || (currentDocStoreSegment == info->getDocStoreSegment()); } totDocCount += clone->numDocs(); } if (infoStream) { message(L"merge: total " + StringUtils::toString(totDocCount) + L" docs"); } merge->checkAborted(directory); // If deletions have arrived and it has now become necessary to merge doc stores, go and open them if (mergeDocStores && !merge->mergeDocStores) { merge->mergeDocStores = true; { SyncLock syncLock(this); if (currentDSSMerged) { if (infoStream) { message(L"now flush at mergeMiddle"); } doFlush(true, false); } } for (Collection::iterator reader = merge->readersClone.begin(); reader != merge->readersClone.end(); ++reader) { (*reader)->openDocStores(); } // Clear DSS merge->info->setDocStore(-1, L"", false); } // This is where all the work happens merge->info->docCount = merger->merge(merge->mergeDocStores); mergedDocCount = merge->info->docCount; BOOST_ASSERT(mergedDocCount == totDocCount); if (merge->useCompoundFile) { success = false; String compoundFileName(IndexFileNames::segmentFileName(mergedName, IndexFileNames::COMPOUND_FILE_EXTENSION())); try { if (infoStream) { message(L"create compound file " + compoundFileName); } merger->createCompoundFile(compoundFileName); success = true; } catch (IOException& ioe) { SyncLock syncLock(this); if (merge->isAborted()) { // This can happen if rollback or close(false) is called - fall through to logic // below to remove the partially created CFS } else { finally = handleMergeException(ioe, merge); } } catch (LuceneException& e) { finally = handleMergeException(e, merge); } if (!success) { if (infoStream) { message(L"hit exception creating compound file during merge"); } { SyncLock syncLock(this); deleter->deleteFile(compoundFileName); deleter->deleteNewFiles(merger->getMergedFiles()); } } finally.throwException(); success = false; { SyncLock syncLock(this); // delete new non cfs files directly: they were never registered with IFD deleter->deleteNewFiles(merger->getMergedFiles()); if (merge->isAborted()) { if (infoStream) { message(L"abort merge after building CFS"); } deleter->deleteFile(compoundFileName); boost::throw_exception(TemporaryException()); } } merge->info->setUseCompoundFile(true); } int32_t termsIndexDivisor = -1; bool loadDocStores = false; // if the merged segment warmer was not installed when this merge was started, causing us // to not force the docStores to close, we can't warm it now bool canWarm = (merge->info->getDocStoreSegment().empty() || currentDocStoreSegment.empty() || merge->info->getDocStoreSegment() == currentDocStoreSegment); if (poolReaders && mergedSegmentWarmer && canWarm) { // Load terms index & doc stores so the segment warmer can run searches, load documents/term vectors termsIndexDivisor = readerTermsIndexDivisor; loadDocStores = true; } SegmentReaderPtr mergedReader(readerPool->get(merge->info, loadDocStores, BufferedIndexInput::BUFFER_SIZE, termsIndexDivisor)); try { if (poolReaders && mergedSegmentWarmer) { mergedSegmentWarmer->warm(mergedReader); } if (!commitMerge(merge, merger, mergedDocCount, mergedReader)) { // commitMerge will return false if this merge was aborted boost::throw_exception(TemporaryException()); } } catch (LuceneException& e) { finally = e; } { SyncLock syncLock(this); readerPool->release(mergedReader); } finally.throwException(); success = true; } catch (LuceneException& e) { finally = e; } // Readers are already closed in commitMerge if we didn't hit an exc if (!success) { closeMergeReaders(merge, true); } // has this merge been aborted? if (finally.getType() == LuceneException::Temporary) { return 0; } finally.throwException(); return mergedDocCount; } void IndexWriter::addMergeException(const OneMergePtr& merge) { SyncLock syncLock(this); BOOST_ASSERT(!merge->getException().isNull()); if (!mergeExceptions.contains(merge) && mergeGen == merge->mergeGen) { mergeExceptions.add(merge); } } bool IndexWriter::applyDeletes() { TestScope testScope(L"IndexWriter", L"applyDeletes"); SyncLock syncLock(this); BOOST_ASSERT(testPoint(L"startApplyDeletes")); ++flushDeletesCount; bool success = false; bool changed = false; LuceneException finally; try { changed = docWriter->applyDeletes(segmentInfos); success = true; } catch (LuceneException& e) { finally = e; } if (!success && infoStream) { message(L"hit exception flushing deletes"); } finally.throwException(); if (changed) { checkpoint(); } return changed; } int32_t IndexWriter::getBufferedDeleteTermsSize() { SyncLock syncLock(this); return docWriter->getBufferedDeleteTerms().size(); } int32_t IndexWriter::getNumBufferedDeleteTerms() { SyncLock syncLock(this); return docWriter->getNumBufferedDeleteTerms(); } SegmentInfoPtr IndexWriter::newestSegment() { return !segmentInfos->empty() ? segmentInfos->info(segmentInfos->size() - 1) : SegmentInfoPtr(); } String IndexWriter::segString() { return segString(segmentInfos); } String IndexWriter::segString(const SegmentInfosPtr& infos) { SyncLock syncLock(this); StringStream buffer; int32_t count = infos->size(); for (int32_t i = 0; i < count; ++i) { if (i > 0) { buffer << L" "; } SegmentInfoPtr info(infos->info(i)); buffer << info->segString(directory); if (info->dir != directory) { buffer << L"**"; } } return buffer.str(); } bool IndexWriter::startSync(const String& fileName, HashSet pending) { SyncLock syncedLock(&synced); if (!synced.contains(fileName)) { if (!syncing.contains(fileName)) { syncing.add(fileName); return true; } else { pending.add(fileName); return false; } } else { return false; } } void IndexWriter::finishSync(const String& fileName, bool success) { SyncLock syncedLock(&synced); BOOST_ASSERT(syncing.contains(fileName)); syncing.remove(fileName); if (success) { synced.add(fileName); } synced.notifyAll(); } bool IndexWriter::waitForAllSynced(HashSet syncing) { SyncLock syncedLock(&synced); for (HashSet::iterator fileName = syncing.begin(); fileName != syncing.end(); ++fileName) { while (!synced.contains(*fileName)) { if (!syncing.contains(*fileName)) { // There was an error because a file that was previously syncing failed to appear in synced return false; } else { synced.wait(); } } } return true; } void IndexWriter::doWait() { SyncLock syncLock(this); // NOTE: the callers of this method should in theory be able to do simply wait(), but, as a defense against // thread timing hazards where notifyAll() fails to be called, we wait for at most 1 second and then return // so caller can check if wait conditions are satisfied wait(1000); } void IndexWriter::startCommit(int64_t sizeInBytes, MapStringString commitUserData) { BOOST_ASSERT(testPoint(L"startStartCommit")); if (hitOOM) { boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot commit")); } try { if (infoStream) { message(L"startCommit(): start sizeInBytes=" + StringUtils::toString(sizeInBytes)); } SegmentInfosPtr toSync; int64_t myChangeCount = 0; LuceneException finally; { SyncLock syncLock(this); // Wait for any running addIndexes to complete first, then block any from running // until we've copied the segmentInfos we intend to sync blockAddIndexes(false); // On commit the segmentInfos must never reference a segment in another directory BOOST_ASSERT(!hasExternalSegments()); try { BOOST_ASSERT(lastCommitChangeCount <= changeCount); myChangeCount = changeCount; if (changeCount == lastCommitChangeCount) { if (infoStream) { message(L" skip startCommit(): no changes pending"); } boost::throw_exception(TemporaryException()); } // First, we clone & incref the segmentInfos we intend to sync, then, without locking, we sync() each // file referenced by toSync, in the background. Multiple threads can be doing this at once, if say // a large merge and a small merge finish at the same time if (infoStream) { message(L"startCommit index=" + segString(segmentInfos) + L" changeCount=" + StringUtils::toString(changeCount)); } readerPool->commit(); // It's possible another flush (that did not close the open do stores) snook in after the flush we // just did, so we remove any tail segments referencing the open doc store from the SegmentInfos // we are about to sync (the main SegmentInfos will keep them) toSync = boost::dynamic_pointer_cast(segmentInfos->clone()); String dss(docWriter->getDocStoreSegment()); if (!dss.empty()) { while (true) { String dss2(toSync->info(toSync->size() - 1)->getDocStoreSegment()); if (dss2.empty() || dss2 != dss) { break; } toSync->remove(toSync->size() - 1); ++changeCount; } } if (commitUserData) { toSync->setUserData(commitUserData); } deleter->incRef(toSync, false); HashSet files(toSync->files(directory, false)); for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { BOOST_ASSERT(directory->fileExists(*fileName)); // If this trips it means we are missing a call to .checkpoint somewhere, because by the // time we are called, deleter should know about every file referenced by the current head // segmentInfos BOOST_ASSERT(deleter->exists(*fileName)); } } catch (LuceneException& e) { finally = e; } resumeAddIndexes(); // no changes pending? if (finally.getType() == LuceneException::Temporary) { return; } finally.throwException(); } BOOST_ASSERT(testPoint(L"midStartCommit")); bool setPending = false; try { // Loop until all files toSync references are sync'd while (true) { HashSet pending(HashSet::newInstance()); HashSet files(toSync->files(directory, false)); for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { if (startSync(*fileName, pending)) { bool success = false; try { // Because we incRef'd this commit point above, the file had better exist BOOST_ASSERT(directory->fileExists(*fileName)); if (infoStream) { message(L"now sync " + *fileName); } directory->sync(*fileName); success = true; } catch (LuceneException& e) { finally = e; } finishSync(*fileName, success); finally.throwException(); } } // All files that I require are either synced or being synced by other threads. If they are being // synced, we must at this point block until they are done. If this returns false, that means an // error in another thread resulted in failing to actually sync one of our files, so we repeat if (waitForAllSynced(pending)) { break; } } BOOST_ASSERT(testPoint(L"midStartCommit2")); { SyncLock syncLock(this); // If someone saved a newer version of segments file since I first started syncing // my version, I can safely skip saving myself since I've been superseded while (true) { if (myChangeCount <= lastCommitChangeCount) { if (infoStream) { message(L"sync superseded by newer infos"); } break; } else if (!pendingCommit) { // My turn to commit if (segmentInfos->getGeneration() > toSync->getGeneration()) { toSync->updateGeneration(segmentInfos); } bool success = false; try { // Exception here means nothing is prepared (this method unwinds // everything it did on an exception) try { toSync->prepareCommit(directory); } catch (LuceneException& e) { finally = e; } // Have our master segmentInfos record the generations we just prepared. We do this on // error or success so we don't double-write a segments_N file. segmentInfos->updateGeneration(toSync); finally.throwException(); BOOST_ASSERT(!pendingCommit); setPending = true; pendingCommit = toSync; pendingCommitChangeCount = myChangeCount; success = true; } catch (LuceneException& e) { finally = e; } if (!success && infoStream) { message(L"hit exception committing segments file"); } finally.throwException(); break; } else { // Must wait for other commit to complete doWait(); } } } if (infoStream) { message(L"done all syncs"); } BOOST_ASSERT(testPoint(L"midStartCommitSuccess")); } catch (LuceneException& e) { finally = e; } { SyncLock syncLock(this); if (!setPending) { deleter->decRef(toSync); } } finally.throwException(); } catch (std::bad_alloc& oom) { boost::throw_exception(handleOOM(oom, L"startCommit")); } BOOST_ASSERT(testPoint(L"finishStartCommit")); } bool IndexWriter::isLocked(const DirectoryPtr& directory) { return directory->makeLock(WRITE_LOCK_NAME)->isLocked(); } void IndexWriter::unlock(const DirectoryPtr& directory) { directory->makeLock(IndexWriter::WRITE_LOCK_NAME)->release(); } void IndexWriter::setMergedSegmentWarmer(const IndexReaderWarmerPtr& warmer) { mergedSegmentWarmer = warmer; } IndexReaderWarmerPtr IndexWriter::getMergedSegmentWarmer() { return mergedSegmentWarmer; } LuceneException IndexWriter::handleOOM(const std::bad_alloc& oom, const String& location) { if (infoStream) { message(L"hit OutOfMemoryError inside " + location); } hitOOM = true; return OutOfMemoryError(); } bool IndexWriter::testPoint(const String& name) { return true; } bool IndexWriter::nrtIsCurrent(const SegmentInfosPtr& infos) { SyncLock syncLock(this); if (!infos->equals(segmentInfos)) { // if any structural changes (new segments), we are stale return false; } else if (infos->getGeneration() != segmentInfos->getGeneration()) { // if any commit took place since we were opened, we are stale return false; } else { return !docWriter->anyChanges(); } } bool IndexWriter::isClosed() { SyncLock syncLock(this); return closed; } ReaderPool::ReaderPool(const IndexWriterPtr& writer) { readerMap = MapSegmentInfoSegmentReader::newInstance(); _indexWriter = writer; } ReaderPool::~ReaderPool() { } void ReaderPool::clear(const SegmentInfosPtr& infos) { SyncLock syncLock(this); if (!infos) { for (MapSegmentInfoSegmentReader::iterator ent = readerMap.begin(); ent != readerMap.end(); ++ent) { ent->second->_hasChanges = false; } } else { for (int32_t i = 0; i < infos->size(); ++i) { MapSegmentInfoSegmentReader::iterator ent = readerMap.find(infos->info(i)); if (ent != readerMap.end()) { ent->second->_hasChanges = false; } } } } bool ReaderPool::infoIsLive(const SegmentInfoPtr& info) { SyncLock syncLock(this); IndexWriterPtr indexWriter(_indexWriter); int32_t idx = indexWriter->segmentInfos->find(info); BOOST_ASSERT(idx != -1); BOOST_ASSERT(indexWriter->segmentInfos->info(idx) == info); return true; } SegmentInfoPtr ReaderPool::mapToLive(const SegmentInfoPtr& info) { SyncLock syncLock(this); IndexWriterPtr indexWriter(_indexWriter); int32_t idx = indexWriter->segmentInfos->find(info); SegmentInfoPtr _info(info); if (idx != -1) { _info = indexWriter->segmentInfos->info(idx); } return _info; } void ReaderPool::release(const SegmentReaderPtr& sr) { release(sr, false); } void ReaderPool::release(const SegmentReaderPtr& sr, bool drop) { SyncLock syncLock(this); IndexWriterPtr indexWriter(_indexWriter); bool pooled = readerMap.contains(sr->getSegmentInfo()); BOOST_ASSERT(!pooled || readerMap.get(sr->getSegmentInfo()) == sr); // Drop caller's ref; for an external reader (not pooled), this decRef will close it sr->decRef(); if (pooled && (drop || (!indexWriter->poolReaders && sr->getRefCount() == 1))) { // We invoke deleter.checkpoint below, so we must be sync'd on IW if there are changes BOOST_ASSERT(!sr->_hasChanges || holdsLock()); // Discard (don't save) changes when we are dropping the reader; this is used only on the // sub-readers after a successful merge. sr->_hasChanges = sr->_hasChanges && !drop; bool hasChanges = sr->_hasChanges; // Drop our ref - this will commit any pending changes to the dir sr->close(); // We are the last ref to this reader; since we're not pooling readers, we release it readerMap.remove(sr->getSegmentInfo()); if (hasChanges) { // Must checkpoint with deleter, because this segment reader will have created new // _X_N.del file. indexWriter->deleter->checkpoint(indexWriter->segmentInfos, false); } } } void ReaderPool::close() { SyncLock syncLock(this); IndexWriterPtr indexWriter(_indexWriter); // We invoke deleter.checkpoint below, so we must be sync'd on IW BOOST_ASSERT(holdsLock()); for (MapSegmentInfoSegmentReader::iterator iter = readerMap.begin(); iter != readerMap.end(); ++iter) { if (iter->second->_hasChanges) { BOOST_ASSERT(infoIsLive(iter->second->getSegmentInfo())); iter->second->doCommit(MapStringString()); // Must checkpoint with deleter, because this segment reader will have created // new _X_N.del file. indexWriter->deleter->checkpoint(indexWriter->segmentInfos, false); } // NOTE: it is allowed that this decRef does not actually close the SR; this can happen when a // near real-time reader is kept open after the IndexWriter instance is closed iter->second->decRef(); } readerMap.clear(); } void ReaderPool::commit() { SyncLock syncLock(this); IndexWriterPtr indexWriter(_indexWriter); // We invoke deleter.checkpoint below, so we must be sync'd on IW BOOST_ASSERT(holdsLock()); for (MapSegmentInfoSegmentReader::iterator ent = readerMap.begin(); ent != readerMap.end(); ++ent) { if (ent->second->_hasChanges) { BOOST_ASSERT(infoIsLive(ent->second->getSegmentInfo())); ent->second->doCommit(MapStringString()); // Must checkpoint with deleter, because this segment reader will have created // new _X_N.del file. indexWriter->deleter->checkpoint(indexWriter->segmentInfos, false); } } } IndexReaderPtr ReaderPool::getReadOnlyClone(const SegmentInfoPtr& info, bool doOpenStores, int32_t termInfosIndexDivisor) { SyncLock syncLock(this); SegmentReaderPtr sr(get(info, doOpenStores, BufferedIndexInput::BUFFER_SIZE, termInfosIndexDivisor)); IndexReaderPtr clone; LuceneException finally; try { clone = boost::dynamic_pointer_cast(sr->clone(true)); } catch (LuceneException& e) { finally = e; } sr->decRef(); finally.throwException(); return clone; } SegmentReaderPtr ReaderPool::get(const SegmentInfoPtr& info, bool doOpenStores) { return get(info, doOpenStores, BufferedIndexInput::BUFFER_SIZE, IndexWriterPtr(_indexWriter)->readerTermsIndexDivisor); } SegmentReaderPtr ReaderPool::get(const SegmentInfoPtr& info, bool doOpenStores, int32_t readBufferSize, int32_t termsIndexDivisor) { SyncLock syncLock(this); IndexWriterPtr indexWriter(_indexWriter); if (indexWriter->poolReaders) { readBufferSize = BufferedIndexInput::BUFFER_SIZE; } SegmentReaderPtr sr(readerMap.get(info)); if (!sr) { // Returns a ref, which we xfer to readerMap sr = SegmentReader::get(false, info->dir, info, readBufferSize, doOpenStores, termsIndexDivisor); if (info->dir == indexWriter->directory) { // Only pool if reader is not external readerMap.put(info, sr); } } else { if (doOpenStores) { sr->openDocStores(); } if (termsIndexDivisor != -1 && !sr->termsIndexLoaded()) { // If this reader was originally opened because we needed to merge it, we didn't load the terms // index. But now, if the caller wants the terms index (eg because it's doing deletes, or an NRT // reader is being opened) we ask the reader to load its terms index. sr->loadTermsIndex(termsIndexDivisor); } } // Return a ref to our caller if (info->dir == indexWriter->directory) { // Only incRef if we pooled (reader is not external) sr->incRef(); } return sr; } SegmentReaderPtr ReaderPool::getIfExists(const SegmentInfoPtr& info) { SyncLock syncLock(this); SegmentReaderPtr sr(readerMap.get(info)); if (sr) { sr->incRef(); } return sr; } IndexReaderWarmer::~IndexReaderWarmer() { } } LucenePlusPlus-rel_3.0.9/src/core/index/IntBlockPool.cpp000066400000000000000000000030371456444476200232240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "IntBlockPool.h" #include "DocumentsWriter.h" namespace Lucene { IntBlockPool::IntBlockPool(const DocumentsWriterPtr& docWriter, bool trackAllocations) { this->buffers = Collection::newInstance(10); this->bufferUpto = -1; this->intUpto = DocumentsWriter::INT_BLOCK_SIZE; this->intOffset = -DocumentsWriter::INT_BLOCK_SIZE; this->_docWriter = docWriter; this->trackAllocations = trackAllocations; } IntBlockPool::~IntBlockPool() { } void IntBlockPool::reset() { if (bufferUpto != -1) { if (bufferUpto > 0) { // Recycle all but the first buffer DocumentsWriterPtr(_docWriter)->recycleIntBlocks(buffers, 1, 1 + bufferUpto); } // Reuse first buffer bufferUpto = 0; intUpto = 0; intOffset = 0; buffer = buffers[0]; } } void IntBlockPool::nextBuffer() { if (bufferUpto + 1 == buffers.size()) { buffers.resize((int32_t)((double)buffers.size() * 1.5)); } buffer = DocumentsWriterPtr(_docWriter)->getIntBlock(trackAllocations); buffers[1 + bufferUpto] = buffer; ++bufferUpto; intUpto = 0; intOffset += DocumentsWriter::INT_BLOCK_SIZE; } } LucenePlusPlus-rel_3.0.9/src/core/index/InvertedDocConsumer.cpp000066400000000000000000000011041456444476200246000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "InvertedDocConsumer.h" namespace Lucene { InvertedDocConsumer::~InvertedDocConsumer() { } void InvertedDocConsumer::setFieldInfos(const FieldInfosPtr& fieldInfos) { this->fieldInfos = fieldInfos; } } LucenePlusPlus-rel_3.0.9/src/core/index/InvertedDocConsumerPerField.cpp000066400000000000000000000007531456444476200262240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "InvertedDocConsumerPerField.h" namespace Lucene { InvertedDocConsumerPerField::~InvertedDocConsumerPerField() { } } LucenePlusPlus-rel_3.0.9/src/core/index/InvertedDocConsumerPerThread.cpp000066400000000000000000000007561456444476200264130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "InvertedDocConsumerPerThread.h" namespace Lucene { InvertedDocConsumerPerThread::~InvertedDocConsumerPerThread() { } } LucenePlusPlus-rel_3.0.9/src/core/index/InvertedDocEndConsumer.cpp000066400000000000000000000007341456444476200252370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "InvertedDocEndConsumer.h" namespace Lucene { InvertedDocEndConsumer::~InvertedDocEndConsumer() { } } LucenePlusPlus-rel_3.0.9/src/core/index/InvertedDocEndConsumerPerField.cpp000066400000000000000000000007641456444476200266550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "InvertedDocEndConsumerPerField.h" namespace Lucene { InvertedDocEndConsumerPerField::~InvertedDocEndConsumerPerField() { } } LucenePlusPlus-rel_3.0.9/src/core/index/InvertedDocEndConsumerPerThread.cpp000066400000000000000000000007671456444476200270440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "InvertedDocEndConsumerPerThread.h" namespace Lucene { InvertedDocEndConsumerPerThread::~InvertedDocEndConsumerPerThread() { } } LucenePlusPlus-rel_3.0.9/src/core/index/KeepOnlyLastCommitDeletionPolicy.cpp000066400000000000000000000017361456444476200272600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "KeepOnlyLastCommitDeletionPolicy.h" #include "IndexCommit.h" namespace Lucene { KeepOnlyLastCommitDeletionPolicy::~KeepOnlyLastCommitDeletionPolicy() { } void KeepOnlyLastCommitDeletionPolicy::onInit(Collection commits) { // Note that commits.size() should normally be 1 onCommit(commits); } void KeepOnlyLastCommitDeletionPolicy::onCommit(Collection commits) { // Note that commits.size() should normally be 2 (if not called by onInit above) int32_t size = commits.size(); for (int32_t i = 0; i < size - 1; ++i) { commits[i]->deleteCommit(); } } } LucenePlusPlus-rel_3.0.9/src/core/index/LogByteSizeMergePolicy.cpp000066400000000000000000000030541456444476200252240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "LogByteSizeMergePolicy.h" namespace Lucene { /// Default minimum segment size. const double LogByteSizeMergePolicy::DEFAULT_MIN_MERGE_MB = 1.6; /// Default maximum segment size. A segment of this size or larger will never be merged. const double LogByteSizeMergePolicy::DEFAULT_MAX_MERGE_MB = DBL_MAX; LogByteSizeMergePolicy::LogByteSizeMergePolicy(const IndexWriterPtr& writer) : LogMergePolicy(writer) { minMergeSize = (int64_t)(DEFAULT_MIN_MERGE_MB * 1024 * 1024); maxMergeSize = DEFAULT_MAX_MERGE_MB == DBL_MAX ? std::numeric_limits::max() : (int64_t)(DEFAULT_MAX_MERGE_MB * 1024 * 1024); } LogByteSizeMergePolicy::~LogByteSizeMergePolicy() { } int64_t LogByteSizeMergePolicy::size(const SegmentInfoPtr& info) { return sizeBytes(info); } void LogByteSizeMergePolicy::setMaxMergeMB(double mb) { maxMergeSize = (int64_t)(mb * 1024 * 1024); } double LogByteSizeMergePolicy::getMaxMergeMB() { return ((double)maxMergeSize) / 1024 / 1024; } void LogByteSizeMergePolicy::setMinMergeMB(double mb) { minMergeSize = (int64_t)(mb * 1024 * 1024); } double LogByteSizeMergePolicy::getMinMergeMB() { return ((double)minMergeSize) / 1024 / 1024; } } LucenePlusPlus-rel_3.0.9/src/core/index/LogDocMergePolicy.cpp000066400000000000000000000021711456444476200241720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LogDocMergePolicy.h" namespace Lucene { /// Default minimum segment size. @see setMinMergeDocs const int32_t LogDocMergePolicy::DEFAULT_MIN_MERGE_DOCS = 1000; LogDocMergePolicy::LogDocMergePolicy(const IndexWriterPtr& writer) : LogMergePolicy(writer) { minMergeSize = DEFAULT_MIN_MERGE_DOCS; // maxMergeSize is never used by LogDocMergePolicy; set it to LLONG_MAX to disable it maxMergeSize = std::numeric_limits::max(); } LogDocMergePolicy::~LogDocMergePolicy() { } int64_t LogDocMergePolicy::size(const SegmentInfoPtr& info) { return sizeDocs(info); } void LogDocMergePolicy::setMinMergeDocs(int32_t minMergeDocs) { minMergeSize = minMergeDocs; } int32_t LogDocMergePolicy::getMinMergeDocs() { return (int32_t)minMergeSize; } } LucenePlusPlus-rel_3.0.9/src/core/index/LogMergePolicy.cpp000066400000000000000000000342661456444476200235560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LogMergePolicy.h" #include "IndexWriter.h" #include "SegmentInfo.h" #include "StringUtils.h" namespace Lucene { /// Defines the allowed range of log(size) for each level. A level is computed by taking the max segment /// log size, minus LEVEL_LOG_SPAN, and finding all segments falling within that range. const double LogMergePolicy::LEVEL_LOG_SPAN = 0.75; /// Default merge factor, which is how many segments are merged at a time. const int32_t LogMergePolicy::DEFAULT_MERGE_FACTOR = 10; /// Default maximum segment size. A segment of this size or larger will never be merged. const int32_t LogMergePolicy::DEFAULT_MAX_MERGE_DOCS = INT_MAX; /// Default noCFSRatio. If a merge's size is >= 10% of the index, then we disable compound file for it. const double LogMergePolicy::DEFAULT_NO_CFS_RATIO = 0.1; LogMergePolicy::LogMergePolicy(const IndexWriterPtr& writer) : MergePolicy(writer) { mergeFactor = DEFAULT_MERGE_FACTOR; noCFSRatio = DEFAULT_NO_CFS_RATIO; minMergeSize = 0; maxMergeSize = 0; maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; calibrateSizeByDeletes = false; _useCompoundFile = true; _useCompoundDocStore = true; } LogMergePolicy::~LogMergePolicy() { } double LogMergePolicy::getNoCFSRatio() { return noCFSRatio; } void LogMergePolicy::setNoCFSRatio(double noCFSRatio) { if (noCFSRatio < 0.0 || noCFSRatio > 1.0) { boost::throw_exception(IllegalArgumentException(L"noCFSRatio must be 0.0 to 1.0 inclusive; got " + StringUtils::toString(noCFSRatio))); } this->noCFSRatio = noCFSRatio; } bool LogMergePolicy::verbose() { return (!_writer.expired() && IndexWriterPtr(_writer)->verbose()); } void LogMergePolicy::message(const String& message) { if (verbose()) { IndexWriterPtr(_writer)->message(L"LMP: " + message); } } int32_t LogMergePolicy::getMergeFactor() { return mergeFactor; } void LogMergePolicy::setMergeFactor(int32_t mergeFactor) { if (mergeFactor < 2) { boost::throw_exception(IllegalArgumentException(L"mergeFactor cannot be less than 2")); } this->mergeFactor = mergeFactor; } bool LogMergePolicy::getUseCompoundFile() { return _useCompoundFile; } void LogMergePolicy::setUseCompoundFile(bool useCompoundFile) { _useCompoundFile = useCompoundFile; } bool LogMergePolicy::useCompoundFile(const SegmentInfosPtr& segments, const SegmentInfoPtr& newSegment) { return _useCompoundFile; } bool LogMergePolicy::useCompoundDocStore(const SegmentInfosPtr& segments) { return _useCompoundDocStore; } void LogMergePolicy::setUseCompoundDocStore(bool useCompoundDocStore) { _useCompoundDocStore = useCompoundDocStore; } bool LogMergePolicy::getUseCompoundDocStore() { return _useCompoundDocStore; } void LogMergePolicy::setCalibrateSizeByDeletes(bool calibrateSizeByDeletes) { this->calibrateSizeByDeletes = calibrateSizeByDeletes; } bool LogMergePolicy::getCalibrateSizeByDeletes() { return calibrateSizeByDeletes; } void LogMergePolicy::close() { } int64_t LogMergePolicy::sizeDocs(const SegmentInfoPtr& info) { if (calibrateSizeByDeletes) { int32_t delCount = IndexWriterPtr(_writer)->numDeletedDocs(info); return (info->docCount - (int64_t)delCount); } else { return info->docCount; } } int64_t LogMergePolicy::sizeBytes(const SegmentInfoPtr& info) { int64_t byteSize = info->sizeInBytes(); if (calibrateSizeByDeletes) { int32_t delCount = IndexWriterPtr(_writer)->numDeletedDocs(info); double delRatio = info->docCount <= 0 ? 0.0 : ((double)delCount / (double)info->docCount); return info->docCount <= 0 ? byteSize : (int64_t)(byteSize * (1.0 - delRatio)); } else { return byteSize; } } bool LogMergePolicy::isOptimized(const SegmentInfosPtr& infos, int32_t maxNumSegments, SetSegmentInfo segmentsToOptimize) { int32_t numSegments = infos->size(); int32_t numToOptimize = 0; SegmentInfoPtr optimizeInfo; for (int32_t i = 0; i < numSegments && numToOptimize <= maxNumSegments; ++i) { SegmentInfoPtr info(infos->info(i)); if (segmentsToOptimize.contains(info)) { ++numToOptimize; optimizeInfo = info; } } return (numToOptimize <= maxNumSegments && (numToOptimize != 1 || isOptimized(optimizeInfo))); } bool LogMergePolicy::isOptimized(const SegmentInfoPtr& info) { IndexWriterPtr writer(_writer); bool hasDeletions = (writer->numDeletedDocs(info) > 0); return (!hasDeletions && !info->hasSeparateNorms() && info->dir == writer->getDirectory() && (info->getUseCompoundFile() == _useCompoundFile || noCFSRatio < 1.0)); } MergeSpecificationPtr LogMergePolicy::findMergesForOptimize(const SegmentInfosPtr& segmentInfos, int32_t maxSegmentCount, SetSegmentInfo segmentsToOptimize) { MergeSpecificationPtr spec; BOOST_ASSERT(maxSegmentCount > 0); if (!isOptimized(segmentInfos, maxSegmentCount, segmentsToOptimize)) { // Find the newest (rightmost) segment that needs to be optimized (other segments may have been // flushed since optimize started) int32_t last = segmentInfos->size(); while (last > 0) { if (segmentsToOptimize.contains(segmentInfos->info(--last))) { ++last; break; } } if (last > 0) { spec = newLucene(); // First, enroll all "full" merges (size mergeFactor) to potentially be run concurrently while (last - maxSegmentCount + 1 >= mergeFactor) { spec->add(makeOneMerge(segmentInfos, segmentInfos->range(last - mergeFactor, last))); last -= mergeFactor; } // Only if there are no full merges pending do we add a final partial (< mergeFactor segments) merge if (spec->merges.empty()) { if (maxSegmentCount == 1) { // Since we must optimize down to 1 segment, the choice is simple if (last > 1 || !isOptimized(segmentInfos->info(0))) { spec->add(makeOneMerge(segmentInfos, segmentInfos->range(0, last))); } } else if (last > maxSegmentCount) { // Take care to pick a partial merge that is least cost, but does not make the index too // lopsided. If we always just picked the partial tail then we could produce a highly // lopsided index over time // We must merge this many segments to leave maxNumSegments in the index (from when // optimize was first kicked off) int32_t finalMergeSize = last - maxSegmentCount + 1; // Consider all possible starting points int64_t bestSize = 0; int32_t bestStart = 0; for (int32_t i = 0; i < last - finalMergeSize + 1; ++i) { int64_t sumSize = 0; for (int32_t j = 0; j < finalMergeSize; ++j) { sumSize += size(segmentInfos->info(j + i)); } if (i == 0 || (sumSize < 2 * size(segmentInfos->info(i - 1)) && sumSize < bestSize)) { bestStart = i; bestSize = sumSize; } } spec->add(makeOneMerge(segmentInfos, segmentInfos->range(bestStart, bestStart + finalMergeSize))); } } } else { spec.reset(); } } else { spec.reset(); } return spec; } MergeSpecificationPtr LogMergePolicy::findMergesToExpungeDeletes(const SegmentInfosPtr& segmentInfos) { int32_t numSegments = segmentInfos->size(); message(L"findMergesToExpungeDeletes: " + StringUtils::toString(numSegments) + L" segments"); MergeSpecificationPtr spec(newLucene()); int32_t firstSegmentWithDeletions = -1; for (int32_t i = 0; i < numSegments; ++i) { SegmentInfoPtr info(segmentInfos->info(i)); int32_t delCount = IndexWriterPtr(_writer)->numDeletedDocs(info); if (delCount > 0) { message(L" segment " + info->name + L" has deletions"); if (firstSegmentWithDeletions == -1) { firstSegmentWithDeletions = i; } else if (i - firstSegmentWithDeletions == mergeFactor) { // We've seen mergeFactor segments in a row with deletions, so force a merge now message(L" add merge " + StringUtils::toString(firstSegmentWithDeletions) + L" to " + StringUtils::toString(i - 1) + L" inclusive"); spec->add(makeOneMerge(segmentInfos, segmentInfos->range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = i; } } else if (firstSegmentWithDeletions != -1) { // End of a sequence of segments with deletions, so merge those past segments even if // it's fewer than mergeFactor segments message(L" add merge " + StringUtils::toString(firstSegmentWithDeletions) + L" to " + StringUtils::toString(i - 1) + L" inclusive"); spec->add(makeOneMerge(segmentInfos, segmentInfos->range(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = -1; } } if (firstSegmentWithDeletions != -1) { message(L" add merge " + StringUtils::toString(firstSegmentWithDeletions) + L" to " + StringUtils::toString(numSegments - 1) + L" inclusive"); spec->add(makeOneMerge(segmentInfos, segmentInfos->range(firstSegmentWithDeletions, numSegments))); } return spec; } MergeSpecificationPtr LogMergePolicy::findMerges(const SegmentInfosPtr& segmentInfos) { int32_t numSegments = segmentInfos->size(); message(L"findMerges: " + StringUtils::toString(numSegments) + L" segments"); // Compute levels, which is just log (base mergeFactor) of the size of each segment Collection levels(Collection::newInstance(numSegments)); double norm = std::log((double)mergeFactor); for (int32_t i = 0; i < numSegments; ++i) { SegmentInfoPtr info(segmentInfos->info(i)); int64_t _size = size(info); // Floor tiny segments _size = std::max(_size, (int64_t)1); levels[i] = std::log((double)_size) / norm; } double levelFloor = minMergeSize <= 0 ? 0 : (std::log((double)minMergeSize) / norm); // Now, we quantize the log values into levels. The first level is any segment whose log // size is within LEVEL_LOG_SPAN of the max size, or, who has such as segment "to the right". // Then, we find the max of all other segments and use that to define the next level segment, etc. MergeSpecificationPtr spec; int32_t start = 0; while (start < numSegments) { // Find max level of all segments not already quantized double maxLevel = levels[start]; for (int32_t i = 1 + start; i < numSegments; ++i) { maxLevel = std::max(maxLevel, levels[i]); } // Now search backwards for the rightmost segment that falls into this level double levelBottom; if (maxLevel < levelFloor) { levelBottom = -1.0; } else { levelBottom = (double)(maxLevel - LEVEL_LOG_SPAN); // Force a boundary at the level floor if (levelBottom < levelFloor && maxLevel >= levelFloor) { levelBottom = levelFloor; } } int32_t upto = numSegments - 1; while (upto >= start) { if (levels[upto] >= levelBottom) { break; } --upto; } message(L" level " + StringUtils::toString(levelBottom) + L" to " + StringUtils::toString(maxLevel) + L": " + StringUtils::toString(1 + upto - start) + L" segments"); // Finally, record all merges that are viable at this level int32_t end = start + mergeFactor; while (end <= 1 + upto) { bool anyTooLarge = false; for (int32_t i = start; i < end; ++i) { SegmentInfoPtr info(segmentInfos->info(i)); if (size(info) >= maxMergeSize || sizeDocs(info) >= maxMergeDocs) { anyTooLarge = true; break; } } if (!anyTooLarge) { if (!spec) { spec = newLucene(); } message(L" " + StringUtils::toString(start) + L" to " + StringUtils::toString(end) + L": add this merge"); spec->add(makeOneMerge(segmentInfos, segmentInfos->range(start, end))); } else { message(L" " + StringUtils::toString(start) + L" to " + StringUtils::toString(end) + L": contains segment over maxMergeSize or maxMergeDocs; skipping"); } start = end; end = start + mergeFactor; } start = 1 + upto; } return spec; } OneMergePtr LogMergePolicy::makeOneMerge(const SegmentInfosPtr& infos, const SegmentInfosPtr& infosToMerge) { bool doCFS; if (!_useCompoundFile) { doCFS = false; } else if (noCFSRatio == 1.0) { doCFS = true; } else { int64_t totSize = 0; int32_t numInfos = infos->size(); for (int32_t i = 0; i < numInfos; ++i) { SegmentInfoPtr info(infos->info(i)); totSize += size(info); } int64_t mergeSize = 0; int32_t numMerges = infosToMerge->size(); for (int32_t i = 0; i < numMerges; ++i) { SegmentInfoPtr info(infosToMerge->info(i)); mergeSize += size(info); } doCFS = mergeSize <= noCFSRatio * totSize; } return newLucene(infosToMerge, doCFS); } void LogMergePolicy::setMaxMergeDocs(int32_t maxMergeDocs) { this->maxMergeDocs = maxMergeDocs; } int32_t LogMergePolicy::getMaxMergeDocs() { return maxMergeDocs; } } LucenePlusPlus-rel_3.0.9/src/core/index/MergeDocIDRemapper.cpp000066400000000000000000000057071456444476200242710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MergeDocIDRemapper.h" #include "SegmentMerger.h" #include "MergePolicy.h" #include "SegmentInfo.h" namespace Lucene { MergeDocIDRemapper::MergeDocIDRemapper(const SegmentInfosPtr& infos, Collection< Collection > docMaps, Collection delCounts, const OneMergePtr& merge, int32_t mergedDocCount) { this->docMaps = docMaps; SegmentInfoPtr firstSegment(merge->segments->info(0)); int32_t i = 0; this->minDocID = 0; while (true) { SegmentInfoPtr info(infos->info(i)); if (info->equals(firstSegment)) { break; } minDocID += info->docCount; ++i; } int32_t numDocs = 0; for (int32_t j = 0; j < docMaps.size(); ++i, ++j) { numDocs += infos->info(i)->docCount; BOOST_ASSERT(infos->info(i)->equals(merge->segments->info(j))); } this->maxDocID = minDocID + numDocs; starts = Collection::newInstance(docMaps.size()); newStarts = Collection::newInstance(docMaps.size()); starts[0] = minDocID; newStarts[0] = minDocID; for (i = 1; i < docMaps.size(); ++i) { int32_t lastDocCount = merge->segments->info(i - 1)->docCount; starts[i] = starts[i - 1] + lastDocCount; newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1]; } this->docShift = numDocs - mergedDocCount; // There are rare cases when docShift is 0. It happens if you try to delete a docID that's // out of bounds, because the SegmentReader still allocates deletedDocs and pretends it has // deletions ... so we can't make this assert here: BOOST_ASSERT(docShift > 0); // Make sure it all adds up BOOST_ASSERT(docShift == maxDocID - (newStarts[docMaps.size() - 1] + merge->segments->info(docMaps.size() - 1)->docCount - delCounts[docMaps.size() - 1])); } MergeDocIDRemapper::~MergeDocIDRemapper() { } int32_t MergeDocIDRemapper::remap(int32_t oldDocID) { if (oldDocID < minDocID) { // Unaffected by merge return oldDocID; } else if (oldDocID >= maxDocID) { // This doc was "after" the merge, so simple shift return oldDocID - docShift; } else { // Binary search to locate this document & find its new docID Collection::iterator doc = std::upper_bound(starts.begin(), starts.begin() + docMaps.size(), oldDocID); int32_t docMap = std::distance(starts.begin(), doc) - 1; if (docMaps[docMap]) { return newStarts[docMap] + docMaps[docMap][oldDocID - starts[docMap]]; } else { return newStarts[docMap] + oldDocID - starts[docMap]; } } } } LucenePlusPlus-rel_3.0.9/src/core/index/MergePolicy.cpp000066400000000000000000000052621456444476200231060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MergePolicy.h" #include "SegmentInfos.h" #include "SegmentInfo.h" #include "StringUtils.h" namespace Lucene { MergePolicy::MergePolicy(const IndexWriterPtr& writer) { this->_writer = writer; } MergePolicy::~MergePolicy() { } OneMerge::OneMerge(const SegmentInfosPtr& segments, bool useCompoundFile) { mergeDocStores = false; optimize = false; registerDone = false; mergeGen = 0; isExternal = false; maxNumSegmentsOptimize = 0; aborted = false; if (segments->empty()) { boost::throw_exception(RuntimeException(L"segments must include at least one segment")); } this->segments = segments; this->useCompoundFile = useCompoundFile; } OneMerge::~OneMerge() { } void OneMerge::setException(const LuceneException& error) { SyncLock syncLock(this); this->error = error; } LuceneException OneMerge::getException() { SyncLock syncLock(this); return error; } void OneMerge::abort() { SyncLock syncLock(this); aborted = true; } bool OneMerge::isAborted() { SyncLock syncLock(this); return aborted; } void OneMerge::checkAborted(const DirectoryPtr& dir) { SyncLock syncLock(this); if (aborted) { boost::throw_exception(MergeAbortedException(L"merge is aborted: " + segString(dir))); } } String OneMerge::segString(const DirectoryPtr& dir) { StringStream buffer; int32_t numSegments = segments->size(); for (int32_t i = 0; i < numSegments; ++i) { if (i > 0) { buffer << L" "; } buffer << segments->info(i)->segString(dir); } if (info) { buffer << L" into " + info->name; } if (optimize) { buffer << L" [optimize]"; } if (mergeDocStores) { buffer << L" [mergeDocStores]"; } return buffer.str(); } MergeSpecification::MergeSpecification() { merges = Collection::newInstance(); } MergeSpecification::~MergeSpecification() { } void MergeSpecification::add(const OneMergePtr& merge) { merges.add(merge); } String MergeSpecification::segString(const DirectoryPtr& dir) { String seg(L"MergeSpec:\n"); int32_t i = 1; for (Collection::iterator merge = merges.begin(); merge != merges.end(); ++merge) { seg += L" " + StringUtils::toString(i++) + L": " + (*merge)->segString(dir); } return seg; } } LucenePlusPlus-rel_3.0.9/src/core/index/MergeScheduler.cpp000066400000000000000000000007041456444476200235610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MergeScheduler.h" namespace Lucene { MergeScheduler::~MergeScheduler() { } } LucenePlusPlus-rel_3.0.9/src/core/index/MultiLevelSkipListReader.cpp000066400000000000000000000155121456444476200255560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultiLevelSkipListReader.h" #include "BufferedIndexInput.h" #include "MiscUtils.h" namespace Lucene { MultiLevelSkipListReader::MultiLevelSkipListReader(const IndexInputPtr& skipStream, int32_t maxSkipLevels, int32_t skipInterval) { this->numberOfLevelsToBuffer = 1; this->numberOfSkipLevels = 0; this->docCount = 0; this->haveSkipped = false; this->lastDoc = 0; this->lastChildPointer = 0; this->skipStream = Collection::newInstance(maxSkipLevels); this->skipPointer = Collection::newInstance(maxSkipLevels); this->childPointer = Collection::newInstance(maxSkipLevels); this->numSkipped = Collection::newInstance(maxSkipLevels); this->maxNumberOfSkipLevels = maxSkipLevels; this->skipInterval = Collection::newInstance(maxSkipLevels); this->skipStream[0] = skipStream; this->inputIsBuffered = boost::dynamic_pointer_cast(skipStream).get() != NULL; this->skipInterval[0] = skipInterval; this->skipDoc = Collection::newInstance(maxSkipLevels); MiscUtils::arrayFill(this->skipPointer.begin(), 0, this->skipPointer.size(), 0); MiscUtils::arrayFill(this->childPointer.begin(), 0, this->childPointer.size(), 0); MiscUtils::arrayFill(this->numSkipped.begin(), 0, this->numSkipped.size(), 0); MiscUtils::arrayFill(this->skipDoc.begin(), 0, this->skipDoc.size(), 0); for (int32_t i = 1; i < maxSkipLevels; ++i) { // cache skip intervals this->skipInterval[i] = this->skipInterval[i - 1] * skipInterval; } } MultiLevelSkipListReader::~MultiLevelSkipListReader() { } int32_t MultiLevelSkipListReader::getDoc() { return lastDoc; } int32_t MultiLevelSkipListReader::skipTo(int32_t target) { if (!haveSkipped) { // first time, load skip levels loadSkipLevels(); haveSkipped = true; } // walk up the levels until highest level is found that has a skip for this target int32_t level = 0; while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) { ++level; } while (level >= 0) { if (target > skipDoc[level]) { if (!loadNextSkip(level)) { continue; } } else { // no more skips on this level, go down one level if (level > 0 && lastChildPointer > skipStream[level - 1]->getFilePointer()) { seekChild(level - 1); } --level; } } return numSkipped[0] - skipInterval[0] - 1; } bool MultiLevelSkipListReader::loadNextSkip(int32_t level) { // we have to skip, the target document is greater than the current skip list entry setLastSkipData(level); numSkipped[level] += skipInterval[level]; if (numSkipped[level] > docCount) { // this skip list is exhausted skipDoc[level] = INT_MAX; if (numberOfSkipLevels > level) { numberOfSkipLevels = level; } return false; } // read next skip entry skipDoc[level] += readSkipData(level, skipStream[level]); if (level != 0) { // read the child pointer if we are not on the leaf level childPointer[level] = skipStream[level]->readVLong() + skipPointer[level - 1]; } return true; } void MultiLevelSkipListReader::seekChild(int32_t level) { skipStream[level]->seek(lastChildPointer); numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1]; skipDoc[level] = lastDoc; if (level > 0) { childPointer[level] = skipStream[level]->readVLong() + skipPointer[level - 1]; } } void MultiLevelSkipListReader::close() { for (int32_t i = 1; i < skipStream.size(); ++i) { if (skipStream[i]) { skipStream[i]->close(); } } } void MultiLevelSkipListReader::init(int64_t skipPointer, int32_t df) { this->skipPointer[0] = skipPointer; this->docCount = df; MiscUtils::arrayFill(skipDoc.begin(), 0, skipDoc.size(), 0); MiscUtils::arrayFill(numSkipped.begin(), 0, numSkipped.size(), 0); MiscUtils::arrayFill(childPointer.begin(), 0, childPointer.size(), 0); haveSkipped = false; for (int32_t i = 1; i < numberOfSkipLevels; ++i) { skipStream[i].reset(); } } void MultiLevelSkipListReader::loadSkipLevels() { numberOfSkipLevels = docCount == 0 ? 0 : (int32_t)std::floor(std::log((double)docCount) / std::log((double)skipInterval[0])); if (numberOfSkipLevels > maxNumberOfSkipLevels) { numberOfSkipLevels = maxNumberOfSkipLevels; } skipStream[0]->seek(skipPointer[0]); int32_t toBuffer = numberOfLevelsToBuffer; for (int32_t i = numberOfSkipLevels - 1; i > 0; --i) { // the length of the current level int64_t length = skipStream[0]->readVLong(); // the start pointer of the current level skipPointer[i] = skipStream[0]->getFilePointer(); if (toBuffer > 0) { // buffer this level skipStream[i] = newLucene(skipStream[0], (int32_t)length); --toBuffer; } else { // clone this stream, it is already at the start of the current level skipStream[i] = boost::dynamic_pointer_cast(skipStream[0]->clone()); if (inputIsBuffered && length < BufferedIndexInput::BUFFER_SIZE) { boost::dynamic_pointer_cast(skipStream[i])->setBufferSize((int32_t)length); } // move base stream beyond the current level skipStream[0]->seek(skipStream[0]->getFilePointer() + length); } } // use base stream for the lowest level skipPointer[0] = skipStream[0]->getFilePointer(); } void MultiLevelSkipListReader::setLastSkipData(int32_t level) { lastDoc = skipDoc[level]; lastChildPointer = childPointer[level]; } SkipBuffer::SkipBuffer(const IndexInputPtr& input, int32_t length) { pos = 0; data = ByteArray::newInstance(length); pointer = input->getFilePointer(); input->readBytes(data.get(), 0, length); } SkipBuffer::~SkipBuffer() { } void SkipBuffer::close() { data.reset(); } int64_t SkipBuffer::getFilePointer() { return (pointer + pos); } int64_t SkipBuffer::length() { return data.size(); } uint8_t SkipBuffer::readByte() { return data[pos++]; } void SkipBuffer::readBytes(uint8_t* b, int32_t offset, int32_t length) { MiscUtils::arrayCopy(data.get(), pos, b, offset, length); pos += length; } void SkipBuffer::seek(int64_t pos) { this->pos = (int32_t)(pos - pointer); } } LucenePlusPlus-rel_3.0.9/src/core/index/MultiLevelSkipListWriter.cpp000066400000000000000000000053501456444476200256270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultiLevelSkipListWriter.h" #include "RAMOutputStream.h" namespace Lucene { MultiLevelSkipListWriter::MultiLevelSkipListWriter(int32_t skipInterval, int32_t maxSkipLevels, int32_t df) { this->skipInterval = skipInterval; // calculate the maximum number of skip levels for this document frequency numberOfSkipLevels = df == 0 ? 0 : (int32_t)std::floor(std::log((double)df) / std::log((double)skipInterval)); // make sure it does not exceed maxSkipLevels numberOfSkipLevels = std::max(numberOfSkipLevels, maxSkipLevels); } MultiLevelSkipListWriter::~MultiLevelSkipListWriter() { } void MultiLevelSkipListWriter::init() { skipBuffer = Collection::newInstance(numberOfSkipLevels); for (int32_t i = 0; i < numberOfSkipLevels; ++i) { skipBuffer[i] = newLucene(); } } void MultiLevelSkipListWriter::resetSkip() { // creates new buffers or empties the existing ones if (!skipBuffer) { init(); } else { for (Collection::iterator buffer = skipBuffer.begin(); buffer != skipBuffer.end(); ++buffer) { (*buffer)->reset(); } } } void MultiLevelSkipListWriter::bufferSkip(int32_t df) { int32_t numLevels = 0; // determine max level for (; (df % skipInterval) == 0 && numLevels < numberOfSkipLevels; df /= skipInterval) { ++numLevels; } int64_t childPointer = 0; for (int32_t level = 0; level < numLevels; ++level) { writeSkipData(level, skipBuffer[level]); int64_t newChildPointer = skipBuffer[level]->getFilePointer(); if (level != 0) { // store child pointers for all levels except the lowest skipBuffer[level]->writeVLong(childPointer); } // remember the childPointer for the next level childPointer = newChildPointer; } } int64_t MultiLevelSkipListWriter::writeSkip(const IndexOutputPtr& output) { int64_t skipPointer = output->getFilePointer(); if (!skipBuffer || skipBuffer.empty()) { return skipPointer; } for (int32_t level = numberOfSkipLevels - 1; level > 0; --level) { int64_t length = skipBuffer[level]->getFilePointer(); if (length > 0) { output->writeVLong(length); skipBuffer[level]->writeTo(output); } } skipBuffer[0]->writeTo(output); return skipPointer; } } LucenePlusPlus-rel_3.0.9/src/core/index/MultiReader.cpp000066400000000000000000000246771456444476200231170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultiReader.h" #include "DirectoryReader.h" #include "DefaultSimilarity.h" #include "FieldCache.h" #include "MiscUtils.h" namespace Lucene { MultiReader::MultiReader(Collection subReaders, bool closeSubReaders) { this->normsCache = MapStringByteArray::newInstance(); this->_maxDoc = 0; this->_numDocs = -1; this->_hasDeletions = false; this->subReaders = subReaders; starts = Collection::newInstance(subReaders.size() + 1); // build starts array decrefOnClose = Collection::newInstance(subReaders.size()); for (int32_t i = 0; i < subReaders.size(); ++i) { starts[i] = _maxDoc; _maxDoc += subReaders[i]->maxDoc(); // compute maxDocs if (!closeSubReaders) { subReaders[i]->incRef(); decrefOnClose[i] = true; } else { decrefOnClose[i] = false; } if (subReaders[i]->hasDeletions()) { _hasDeletions = true; } } starts[subReaders.size()] = _maxDoc; } MultiReader::~MultiReader() { } IndexReaderPtr MultiReader::reopen() { SyncLock syncLock(this); return doReopen(false); } LuceneObjectPtr MultiReader::clone(const LuceneObjectPtr& other) { SyncLock syncLock(this); try { return doReopen(true); } catch (LuceneException& e) { boost::throw_exception(RuntimeException(e.getError())); } return LuceneObjectPtr(); } IndexReaderPtr MultiReader::doReopen(bool doClone) { ensureOpen(); bool reopened = false; Collection newSubReaders(Collection::newInstance(subReaders.size())); bool success = false; LuceneException finally; try { for (int32_t i = 0; i < subReaders.size(); ++i) { if (doClone) { newSubReaders[i] = boost::dynamic_pointer_cast(subReaders[i]->clone()); } else { newSubReaders[i] = subReaders[i]->reopen(); } // if at least one of the subreaders was updated we remember that and return a new MultiReader if (newSubReaders[i] != subReaders[i]) { reopened = true; } } success = true; } catch (LuceneException& e) { finally = e; } if (!success && reopened) { for (int32_t i = 0; i < newSubReaders.size(); ++i) { if (newSubReaders[i] != subReaders[i]) { try { if (newSubReaders[i]) { newSubReaders[i]->close(); } } catch (...) { // keep going - we want to clean up as much as possible } } } } finally.throwException(); if (reopened) { Collection newDecrefOnClose(Collection::newInstance(subReaders.size())); for (int32_t i = 0; i < subReaders.size(); ++i) { if (newSubReaders[i] == subReaders[i]) { newSubReaders[i]->incRef(); newDecrefOnClose[i] = true; } } MultiReaderPtr mr(newLucene(newSubReaders)); mr->decrefOnClose = newDecrefOnClose; return mr; } else { return shared_from_this(); } } Collection MultiReader::getTermFreqVectors(int32_t docNumber) { ensureOpen(); int32_t i = readerIndex(docNumber); // find segment num return subReaders[i]->getTermFreqVectors(docNumber - starts[i]); // dispatch to segment } TermFreqVectorPtr MultiReader::getTermFreqVector(int32_t docNumber, const String& field) { ensureOpen(); int32_t i = readerIndex(docNumber); // find segment num return subReaders[i]->getTermFreqVector(docNumber - starts[i], field); } void MultiReader::getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper) { ensureOpen(); int32_t i = readerIndex(docNumber); // find segment num subReaders[i]->getTermFreqVector(docNumber - starts[i], field, mapper); } void MultiReader::getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper) { ensureOpen(); int32_t i = readerIndex(docNumber); // find segment num subReaders[i]->getTermFreqVector(docNumber - starts[i], mapper); } bool MultiReader::isOptimized() { return false; } int32_t MultiReader::numDocs() { // Don't call ensureOpen() here (it could affect performance) // NOTE: multiple threads may wind up init'ing numDocs... but that's harmless if (_numDocs == -1) { // check cache int32_t n = 0; // cache miss - recompute for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { n += (*reader)->numDocs(); // sum from readers } _numDocs = n; } return _numDocs; } int32_t MultiReader::maxDoc() { // Don't call ensureOpen() here (it could affect performance) return _maxDoc; } DocumentPtr MultiReader::document(int32_t n, const FieldSelectorPtr& fieldSelector) { ensureOpen(); int32_t i = readerIndex(n); // find segment num return subReaders[i]->document(n - starts[i], fieldSelector); // dispatch to segment reader } bool MultiReader::isDeleted(int32_t n) { // Don't call ensureOpen() here (it could affect performance) int32_t i = readerIndex(n); // find segment num return subReaders[i]->isDeleted(n - starts[i]); // dispatch to segment reader } bool MultiReader::hasDeletions() { // Don't call ensureOpen() here (it could affect performance) return _hasDeletions; } void MultiReader::doDelete(int32_t docNum) { _numDocs = -1; // invalidate cache int32_t i = readerIndex(docNum); // find segment num subReaders[i]->deleteDocument(docNum - starts[i]); // dispatch to segment reader _hasDeletions = true; } void MultiReader::doUndeleteAll() { for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { (*reader)->undeleteAll(); } _hasDeletions = false; _numDocs = -1; // invalidate cache } int32_t MultiReader::readerIndex(int32_t n) { return DirectoryReader::readerIndex(n, this->starts, this->subReaders.size()); } bool MultiReader::hasNorms(const String& field) { ensureOpen(); for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { if ((*reader)->hasNorms(field)) { return true; } } return false; } ByteArray MultiReader::norms(const String& field) { SyncLock syncLock(this); ensureOpen(); ByteArray bytes(normsCache.get(field)); if (bytes) { return bytes; // cache hit } if (!hasNorms(field)) { return ByteArray(); } bytes = ByteArray::newInstance(maxDoc()); for (int32_t i = 0; i < subReaders.size(); ++i) { subReaders[i]->norms(field, bytes, starts[i]); } normsCache.put(field, bytes); // update cache return bytes; } void MultiReader::norms(const String& field, ByteArray norms, int32_t offset) { SyncLock syncLock(this); ensureOpen(); ByteArray bytes(normsCache.get(field)); for (int32_t i = 0; i < subReaders.size(); ++i) { // read from segments subReaders[i]->norms(field, norms, offset + starts[i]); } if (!bytes && !hasNorms(field)) { MiscUtils::arrayFill(norms.get(), offset, norms.size(), DefaultSimilarity::encodeNorm(1.0)); } else if (bytes) { // cache hit MiscUtils::arrayCopy(bytes.get(), 0, norms.get(), offset, maxDoc()); } else { for (int32_t i = 0; i < subReaders.size(); ++i) { subReaders[i]->norms(field, norms, offset + starts[i]); } } } void MultiReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { { SyncLock normsLock(&normsCache); normsCache.remove(field); // clear cache } int32_t i = readerIndex(doc); // find segment num subReaders[i]->setNorm(doc - starts[i], field, value); // dispatch } TermEnumPtr MultiReader::terms() { ensureOpen(); return newLucene(shared_from_this(), subReaders, starts, TermPtr()); } TermEnumPtr MultiReader::terms(const TermPtr& t) { ensureOpen(); return newLucene(shared_from_this(), subReaders, starts, t); } int32_t MultiReader::docFreq(const TermPtr& t) { ensureOpen(); int32_t total = 0; // sum freqs in segments for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { total += (*reader)->docFreq(t); } return total; } TermDocsPtr MultiReader::termDocs() { ensureOpen(); return newLucene(shared_from_this(), subReaders, starts); } TermPositionsPtr MultiReader::termPositions() { ensureOpen(); return newLucene(shared_from_this(), subReaders, starts); } void MultiReader::doCommit(MapStringString commitUserData) { for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { (*reader)->commit(commitUserData); } } void MultiReader::doClose() { SyncLock syncLock(this); for (int32_t i = 0; i < subReaders.size(); ++i) { if (decrefOnClose[i]) { subReaders[i]->decRef(); } else { subReaders[i]->close(); } } // NOTE: only needed in case someone had asked for FieldCache for top-level reader (which is // generally not a good idea) FieldCache::DEFAULT()->purge(shared_from_this()); } HashSet MultiReader::getFieldNames(FieldOption fieldOption) { ensureOpen(); return DirectoryReader::getFieldNames(fieldOption, this->subReaders); } bool MultiReader::isCurrent() { for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { if (!(*reader)->isCurrent()) { return false; } } // all subreaders are up to date return true; } int64_t MultiReader::getVersion() { boost::throw_exception(UnsupportedOperationException()); return 0; } Collection MultiReader::getSequentialSubReaders() { return subReaders; } } LucenePlusPlus-rel_3.0.9/src/core/index/MultipleTermPositions.cpp000066400000000000000000000104641456444476200252220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultipleTermPositions.h" #include "_MultipleTermPositions.h" #include "IndexReader.h" #include "Term.h" namespace Lucene { MultipleTermPositions::MultipleTermPositions(const IndexReaderPtr& indexReader, Collection terms) { Collection termPositions(Collection::newInstance()); for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) { termPositions.add(indexReader->termPositions(*term)); } termPositionsQueue = newLucene(termPositions); posList = newLucene(); _doc = 0; _freq = 0; } MultipleTermPositions::~MultipleTermPositions() { } bool MultipleTermPositions::next() { if (termPositionsQueue->empty()) { return false; } posList->clear(); _doc = termPositionsQueue->top()->doc(); TermPositionsPtr tp; do { tp = termPositionsQueue->top(); for (int32_t i = 0; i < tp->freq(); ++i) { posList->add(tp->nextPosition()); } if (tp->next()) { termPositionsQueue->updateTop(); } else { termPositionsQueue->pop(); tp->close(); } } while (!termPositionsQueue->empty() && termPositionsQueue->top()->doc() == _doc); posList->sort(); _freq = posList->size(); return true; } int32_t MultipleTermPositions::nextPosition() { return posList->next(); } bool MultipleTermPositions::skipTo(int32_t target) { while (termPositionsQueue->top() && target > termPositionsQueue->top()->doc()) { TermPositionsPtr tp(termPositionsQueue->top()); termPositionsQueue->pop(); if (tp->skipTo(target)) { termPositionsQueue->add(tp); } else { tp->close(); } } return next(); } int32_t MultipleTermPositions::doc() { return _doc; } int32_t MultipleTermPositions::freq() { return _freq; } void MultipleTermPositions::close() { while (!termPositionsQueue->empty()) { termPositionsQueue->pop()->close(); } } void MultipleTermPositions::seek(const TermPtr& term) { boost::throw_exception(UnsupportedOperationException()); } void MultipleTermPositions::seek(const TermEnumPtr& termEnum) { boost::throw_exception(UnsupportedOperationException()); } int32_t MultipleTermPositions::read(Collection& docs, Collection& freqs) { boost::throw_exception(UnsupportedOperationException()); return 0; } ByteArray MultipleTermPositions::getPayload(ByteArray data, int32_t offset) { boost::throw_exception(UnsupportedOperationException()); return ByteArray(); } bool MultipleTermPositions::isPayloadAvailable() { return false; } TermPositionsQueue::TermPositionsQueue(Collection termPositions) : PriorityQueue(termPositions.size()) { this->termPositions = termPositions; } TermPositionsQueue::~TermPositionsQueue() { } void TermPositionsQueue::initialize() { PriorityQueue::initialize(); for (Collection::iterator tp = termPositions.begin(); tp != termPositions.end(); ++tp) { if ((*tp)->next()) { add(*tp); } } } bool TermPositionsQueue::lessThan(const TermPositionsPtr& first, const TermPositionsPtr& second) { return (first->doc() < second->doc()); } IntQueue::IntQueue() { arraySize = 16; index = 0; lastIndex = 0; array = Collection::newInstance(arraySize); } IntQueue::~IntQueue() { } void IntQueue::add(int32_t i) { if (lastIndex == arraySize) { growArray(); } array[lastIndex++] = i; } int32_t IntQueue::next() { return array[index++]; } void IntQueue::sort() { std::sort(array.begin() + index, array.begin() + lastIndex); } void IntQueue::clear() { index = 0; lastIndex = 0; } int32_t IntQueue::size() { return (lastIndex - index); } void IntQueue::growArray() { array.resize(arraySize * 2); arraySize *= 2; } } LucenePlusPlus-rel_3.0.9/src/core/index/NormsWriter.cpp000066400000000000000000000140011456444476200231510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NormsWriter.h" #include "NormsWriterPerThread.h" #include "NormsWriterPerField.h" #include "Similarity.h" #include "IndexFileNames.h" #include "IndexOutput.h" #include "SegmentMerger.h" #include "SegmentWriteState.h" #include "InvertedDocEndConsumerPerField.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "Directory.h" namespace Lucene { NormsWriter::NormsWriter() { } NormsWriter::~NormsWriter() { } uint8_t NormsWriter::getDefaultNorm() { static uint8_t defaultNorm = 0; LUCENE_RUN_ONCE( defaultNorm = Similarity::encodeNorm(1.0); ); return defaultNorm; } InvertedDocEndConsumerPerThreadPtr NormsWriter::addThread(const DocInverterPerThreadPtr& docInverterPerThread) { return newLucene(docInverterPerThread, shared_from_this()); } void NormsWriter::abort() { } void NormsWriter::files(HashSet files) { } void NormsWriter::setFieldInfos(const FieldInfosPtr& fieldInfos) { this->fieldInfos = fieldInfos; } void NormsWriter::flush(MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) { MapFieldInfoCollectionNormsWriterPerField byField(MapFieldInfoCollectionNormsWriterPerField::newInstance()); // Typically, each thread will have encountered the same field. So first we collate by field, ie all // per-thread field instances that correspond to the same FieldInfo for (MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end();) { NormsWriterPerFieldPtr normsPerField(boost::static_pointer_cast(*perField)); if (normsPerField->upto > 0) { // It has some norms Collection l = byField.get(normsPerField->fieldInfo); if (!l) { l = Collection::newInstance(); byField.put(normsPerField->fieldInfo, l); } l.add(normsPerField); ++perField; } else { // Remove this field since we haven't seen it since the previous flush perField = entry->second.remove(perField); } } } String normsFileName(state->segmentName + L"." + IndexFileNames::NORMS_EXTENSION()); state->flushedFiles.add(normsFileName); IndexOutputPtr normsOut(state->directory->createOutput(normsFileName)); LuceneException finally; try { normsOut->writeBytes(SegmentMerger::NORMS_HEADER, 0, SegmentMerger::NORMS_HEADER_LENGTH); int32_t numField = fieldInfos->size(); int32_t normCount = 0; for (int32_t fieldNumber = 0; fieldNumber < numField; ++fieldNumber) { FieldInfoPtr fieldInfo(fieldInfos->fieldInfo(fieldNumber)); Collection toMerge = byField.get(fieldInfo); int32_t upto = 0; if (toMerge) { int32_t numFields = toMerge.size(); ++normCount; Collection fields(Collection::newInstance(numFields)); Collection uptos(Collection::newInstance(numFields)); for (int32_t j = 0; j < numFields; ++j) { fields[j] = toMerge[j]; } int32_t numLeft = numFields; while (numLeft > 0) { BOOST_ASSERT(uptos[0] < fields[0]->docIDs.size()); int32_t minLoc = 0; int32_t minDocID = fields[0]->docIDs[uptos[0]]; for (int32_t j = 1; j < numLeft; ++j) { int32_t docID = fields[j]->docIDs[uptos[j]]; if (docID < minDocID) { minDocID = docID; minLoc = j; } } BOOST_ASSERT(minDocID < state->numDocs); // Fill hole for (; upto < minDocID; ++upto) { normsOut->writeByte(getDefaultNorm()); } normsOut->writeByte(fields[minLoc]->norms[uptos[minLoc]]); ++(uptos[minLoc]); ++upto; if (uptos[minLoc] == fields[minLoc]->upto) { fields[minLoc]->reset(); if (minLoc != numLeft - 1) { fields[minLoc] = fields[numLeft - 1]; uptos[minLoc] = uptos[numLeft - 1]; } --numLeft; } } // Fill final hole with defaultNorm for (; upto < state->numDocs; ++upto) { normsOut->writeByte(getDefaultNorm()); } } else if (fieldInfo->isIndexed && !fieldInfo->omitNorms) { ++normCount; // Fill entire field with default norm for (; upto < state->numDocs; ++upto) { normsOut->writeByte(getDefaultNorm()); } } BOOST_ASSERT(4 + normCount * state->numDocs == normsOut->getFilePointer()); // .nrm file size mismatch? } } catch (LuceneException& e) { finally = e; } normsOut->close(); finally.throwException(); } void NormsWriter::closeDocStore(const SegmentWriteStatePtr& state) { } } LucenePlusPlus-rel_3.0.9/src/core/index/NormsWriterPerField.cpp000066400000000000000000000040521456444476200245710ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NormsWriterPerField.h" #include "NormsWriterPerThread.h" #include "Similarity.h" #include "DocInverterPerField.h" #include "DocumentsWriter.h" #include "FieldInfo.h" #include "MiscUtils.h" namespace Lucene { NormsWriterPerField::NormsWriterPerField(const DocInverterPerFieldPtr& docInverterPerField, const NormsWriterPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo) { docIDs = Collection::newInstance(1); norms = ByteArray::newInstance(1); upto = 0; this->_perThread = perThread; this->fieldInfo = fieldInfo; docState = perThread->docState; fieldState = docInverterPerField->fieldState; } NormsWriterPerField::~NormsWriterPerField() { } void NormsWriterPerField::reset() { // Shrink back if we are over allocated now docIDs.resize(MiscUtils::getShrinkSize(docIDs.size(), upto)); norms.resize(MiscUtils::getShrinkSize(norms.size(), upto)); upto = 0; } void NormsWriterPerField::abort() { upto = 0; } int32_t NormsWriterPerField::compareTo(const LuceneObjectPtr& other) { return fieldInfo->name.compare(boost::static_pointer_cast(other)->fieldInfo->name); } void NormsWriterPerField::finish() { BOOST_ASSERT(docIDs.size() == norms.size()); if (fieldInfo->isIndexed && !fieldInfo->omitNorms) { if (docIDs.size() <= upto) { BOOST_ASSERT(docIDs.size() == upto); docIDs.resize(MiscUtils::getNextSize(1 + upto)); norms.resize(MiscUtils::getNextSize(1 + upto)); } double norm = docState->similarity->computeNorm(fieldInfo->name, fieldState); norms[upto] = Similarity::encodeNorm(norm); docIDs[upto] = docState->docID; ++upto; } } } LucenePlusPlus-rel_3.0.9/src/core/index/NormsWriterPerThread.cpp000066400000000000000000000022621456444476200247560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NormsWriterPerThread.h" #include "NormsWriterPerField.h" #include "DocInverterPerThread.h" namespace Lucene { NormsWriterPerThread::NormsWriterPerThread(const DocInverterPerThreadPtr& docInverterPerThread, const NormsWriterPtr& normsWriter) { this->_normsWriter = normsWriter; docState = docInverterPerThread->docState; } NormsWriterPerThread::~NormsWriterPerThread() { } InvertedDocEndConsumerPerFieldPtr NormsWriterPerThread::addField(const DocInverterPerFieldPtr& docInverterPerField, const FieldInfoPtr& fieldInfo) { return newLucene(docInverterPerField, shared_from_this(), fieldInfo); } void NormsWriterPerThread::abort() { } void NormsWriterPerThread::startDocument() { } void NormsWriterPerThread::finishDocument() { } bool NormsWriterPerThread::freeRAM() { return false; } } LucenePlusPlus-rel_3.0.9/src/core/index/ParallelReader.cpp000066400000000000000000000425121456444476200235450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ParallelReader.h" #include "_ParallelReader.h" #include "Document.h" #include "FieldSelector.h" #include "Term.h" #include "FieldCache.h" #include "StringUtils.h" namespace Lucene { ParallelReader::ParallelReader(bool closeSubReaders) { this->readers = Collection::newInstance(); this->decrefOnClose = Collection::newInstance(); this->fieldToReader = MapStringIndexReader::newInstance(); this->readerToFields = MapIndexReaderSetString::newInstance(); this->storedFieldReaders = Collection::newInstance(); this->_maxDoc = 0; this->_numDocs = 0; this->_hasDeletions = false; this->incRefReaders = !closeSubReaders; } ParallelReader::~ParallelReader() { } void ParallelReader::add(const IndexReaderPtr& reader) { ensureOpen(); add(reader, false); } void ParallelReader::add(const IndexReaderPtr& reader, bool ignoreStoredFields) { ensureOpen(); if (readers.empty()) { this->_maxDoc = reader->maxDoc(); this->_numDocs = reader->numDocs(); this->_hasDeletions = reader->hasDeletions(); } if (reader->maxDoc() != _maxDoc) { // check compatibility boost::throw_exception(IllegalArgumentException(L"All readers must have same maxDoc: " + StringUtils::toString(_maxDoc) + L" != " + StringUtils::toString(reader->maxDoc()))); } if (reader->numDocs() != _numDocs) { boost::throw_exception(IllegalArgumentException(L"All readers must have same numDocs: " + StringUtils::toString(_numDocs) + L" != " + StringUtils::toString(reader->numDocs()))); } HashSet fields(reader->getFieldNames(IndexReader::FIELD_OPTION_ALL)); readerToFields.put(reader, fields); for (HashSet::iterator field = fields.begin(); field != fields.end(); ++field) { // update fieldToReader map if (!fieldToReader.contains(*field)) { fieldToReader.put(*field, reader); } } if (!ignoreStoredFields) { storedFieldReaders.add(reader); // add to storedFieldReaders } readers.add(reader); if (incRefReaders) { reader->incRef(); } decrefOnClose.add(incRefReaders); } LuceneObjectPtr ParallelReader::clone(const LuceneObjectPtr& other) { SyncLock syncLock(this); try { return doReopen(true); } catch (LuceneException& e) { boost::throw_exception(RuntimeException(e.getError())); } return LuceneObjectPtr(); } IndexReaderPtr ParallelReader::reopen() { SyncLock syncLock(this); return doReopen(false); } IndexReaderPtr ParallelReader::doReopen(bool doClone) { ensureOpen(); bool reopened = false; Collection newReaders(Collection::newInstance()); bool success = false; LuceneException finally; try { for (Collection::iterator oldReader = readers.begin(); oldReader != readers.end(); ++oldReader) { IndexReaderPtr newReader; if (doClone) { newReader = boost::dynamic_pointer_cast((*oldReader)->clone()); } else { newReader = (*oldReader)->reopen(); } newReaders.add(newReader); // if at least one of the subreaders was updated we remember that and return a new ParallelReader if (newReader != *oldReader) { reopened = true; } } success = true; } catch (LuceneException& e) { finally = e; } if (!success && reopened) { for (int32_t i = 0; i < newReaders.size(); ++i) { if (newReaders[i] != readers[i]) { try { if (newReaders[i]) { newReaders[i]->close(); } } catch (...) { // keep going - we want to clean up as much as possible } } } } finally.throwException(); if (reopened) { Collection newDecrefOnClose(Collection::newInstance()); ParallelReaderPtr pr(newLucene()); for (int32_t i = 0; i < readers.size(); ++i) { IndexReaderPtr oldReader(readers[i]); IndexReaderPtr newReader(newReaders[i]); if (newReader == oldReader) { newDecrefOnClose.add(true); newReader->incRef(); } else { // this is a new subreader instance, so on close() we don't decRef but close it newDecrefOnClose.add(false); } pr->add(newReader, !storedFieldReaders.contains(oldReader)); } pr->decrefOnClose = newDecrefOnClose; pr->incRefReaders = incRefReaders; return pr; } else { // No subreader was refreshed return shared_from_this(); } } int32_t ParallelReader::numDocs() { // Don't call ensureOpen() here (it could affect performance) return _numDocs; } int32_t ParallelReader::maxDoc() { // Don't call ensureOpen() here (it could affect performance) return _maxDoc; } bool ParallelReader::hasDeletions() { // Don't call ensureOpen() here (it could affect performance) return _hasDeletions; } bool ParallelReader::isDeleted(int32_t n) { // Don't call ensureOpen() here (it could affect performance) return !readers.empty() ? readers[0]->isDeleted(n) : false; // check first reader } void ParallelReader::doDelete(int32_t docNum) { // delete in all readers for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { (*reader)->deleteDocument(docNum); } _hasDeletions = true; } void ParallelReader::doUndeleteAll() { // undeleteAll in all readers for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { (*reader)->undeleteAll(); } _hasDeletions = false; } DocumentPtr ParallelReader::document(int32_t n, const FieldSelectorPtr& fieldSelector) { ensureOpen(); DocumentPtr result(newLucene()); // append fields from storedFieldReaders for (Collection::iterator reader = storedFieldReaders.begin(); reader != storedFieldReaders.end(); ++reader) { bool include = !fieldSelector; if (!include) { HashSet fields = readerToFields.get(*reader); for (HashSet::iterator field = fields.begin(); field != fields.end(); ++field) { if (fieldSelector->accept(*field) != FieldSelector::SELECTOR_NO_LOAD) { include = true; break; } } } if (include) { Collection fields((*reader)->document(n, fieldSelector)->getFields()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { result->add(*field); } } } return result; } Collection ParallelReader::getTermFreqVectors(int32_t docNumber) { ensureOpen(); Collection results(Collection::newInstance()); // get all vectors for (MapStringIndexReader::iterator entry = fieldToReader.begin(); entry != fieldToReader.end(); ++entry) { TermFreqVectorPtr vector(entry->second->getTermFreqVector(docNumber, entry->first)); if (vector) { results.add(vector); } } return results; } TermFreqVectorPtr ParallelReader::getTermFreqVector(int32_t docNumber, const String& field) { ensureOpen(); MapStringIndexReader::iterator reader = fieldToReader.find(field); return reader == fieldToReader.end() ? TermFreqVectorPtr() : reader->second->getTermFreqVector(docNumber, field); } void ParallelReader::getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper) { ensureOpen(); MapStringIndexReader::iterator reader = fieldToReader.find(field); if (reader != fieldToReader.end()) { reader->second->getTermFreqVector(docNumber, field, mapper); } } void ParallelReader::getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper) { ensureOpen(); for (MapStringIndexReader::iterator entry = fieldToReader.begin(); entry != fieldToReader.end(); ++entry) { entry->second->getTermFreqVector(docNumber, entry->first, mapper); } } bool ParallelReader::hasNorms(const String& field) { ensureOpen(); MapStringIndexReader::iterator reader = fieldToReader.find(field); return reader == fieldToReader.end() ? false : reader->second->hasNorms(field); } ByteArray ParallelReader::norms(const String& field) { ensureOpen(); MapStringIndexReader::iterator reader = fieldToReader.find(field); return reader == fieldToReader.end() ? ByteArray() : reader->second->norms(field); } void ParallelReader::norms(const String& field, ByteArray norms, int32_t offset) { ensureOpen(); MapStringIndexReader::iterator reader = fieldToReader.find(field); if (reader != fieldToReader.end()) { reader->second->norms(field, norms, offset); } } void ParallelReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { ensureOpen(); MapStringIndexReader::iterator reader = fieldToReader.find(field); if (reader != fieldToReader.end()) { reader->second->doSetNorm(doc, field, value); } } TermEnumPtr ParallelReader::terms() { ensureOpen(); return newLucene(shared_from_this()); } TermEnumPtr ParallelReader::terms(const TermPtr& t) { ensureOpen(); return newLucene(shared_from_this(), t); } int32_t ParallelReader::docFreq(const TermPtr& t) { ensureOpen(); MapStringIndexReader::iterator reader = fieldToReader.find(t->field()); return reader == fieldToReader.end() ? 0 : reader->second->docFreq(t); } TermDocsPtr ParallelReader::termDocs(const TermPtr& term) { ensureOpen(); return newLucene(shared_from_this(), term); } TermDocsPtr ParallelReader::termDocs() { ensureOpen(); return newLucene(shared_from_this()); } TermPositionsPtr ParallelReader::termPositions(const TermPtr& term) { ensureOpen(); return newLucene(shared_from_this(), term); } TermPositionsPtr ParallelReader::termPositions() { ensureOpen(); return newLucene(shared_from_this()); } bool ParallelReader::isCurrent() { for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { if (!(*reader)->isCurrent()) { return false; } } // all subreaders are up to date return true; } bool ParallelReader::isOptimized() { for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { if (!(*reader)->isOptimized()) { return false; } } // all subindexes are optimized return true; } int64_t ParallelReader::getVersion() { boost::throw_exception(UnsupportedOperationException(L"ParallelReader does not support this method.")); return 0; } Collection ParallelReader::getSubReaders() { return readers; } void ParallelReader::doCommit(MapStringString commitUserData) { for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { (*reader)->commit(commitUserData); } } void ParallelReader::doClose() { SyncLock syncLock(this); for (int32_t i = 0; i < readers.size(); ++i) { if (decrefOnClose[i]) { readers[i]->decRef(); } else { readers[i]->close(); } } FieldCache::DEFAULT()->purge(shared_from_this()); } HashSet ParallelReader::getFieldNames(FieldOption fieldOption) { ensureOpen(); HashSet fieldSet(HashSet::newInstance()); for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { HashSet names((*reader)->getFieldNames(fieldOption)); fieldSet.addAll(names.begin(), names.end()); } return fieldSet; } ParallelTermEnum::ParallelTermEnum(const ParallelReaderPtr& reader) { this->setIterator = false; this->_reader = reader; MapStringIndexReader::iterator indexReader = reader->fieldToReader.begin(); if (indexReader != reader->fieldToReader.end()) { this->field = indexReader->first; } if (!field.empty()) { this->termEnum = reader->fieldToReader[field]->terms(); } } ParallelTermEnum::ParallelTermEnum(const ParallelReaderPtr& reader, const TermPtr& term) { this->setIterator = false; this->_reader = reader; this->field = term->field(); MapStringIndexReader::iterator indexReader = reader->fieldToReader.find(field); if (indexReader != reader->fieldToReader.end()) { this->termEnum = indexReader->second->terms(term); } } ParallelTermEnum::~ParallelTermEnum() { } bool ParallelTermEnum::next() { if (!termEnum) { return false; } // another term in this field? if (termEnum->next() && termEnum->term()->field() == field) { return true; // yes, keep going } termEnum->close(); // close old termEnum ParallelReaderPtr reader(_reader); // find the next field with terms, if any if (!setIterator) { fieldIterator = reader->fieldToReader.find(field); ++fieldIterator; // Skip field to get next one setIterator = false; } while (fieldIterator != reader->fieldToReader.end()) { field = fieldIterator->first; termEnum = fieldIterator->second->terms(newLucene(field)); ++fieldIterator; TermPtr term(termEnum->term()); if (term && term->field() == field) { return true; } else { termEnum->close(); } } return false; // no more fields } TermPtr ParallelTermEnum::term() { return termEnum ? termEnum->term() : TermPtr(); } int32_t ParallelTermEnum::docFreq() { return termEnum ? termEnum->docFreq() : 0; } void ParallelTermEnum::close() { if (termEnum) { termEnum->close(); } } ParallelTermDocs::ParallelTermDocs(const ParallelReaderPtr& reader) { this->_reader = reader; } ParallelTermDocs::ParallelTermDocs(const ParallelReaderPtr& reader, const TermPtr& term) { this->_reader = reader; if (!term) { termDocs = reader->readers.empty() ? TermDocsPtr() : reader->readers[0]->termDocs(TermPtr()); } else { seek(term); } } ParallelTermDocs::~ParallelTermDocs() { } int32_t ParallelTermDocs::doc() { return termDocs->doc(); } int32_t ParallelTermDocs::freq() { return termDocs->freq(); } void ParallelTermDocs::seek(const TermPtr& term) { ParallelReaderPtr reader(_reader); MapStringIndexReader::iterator indexReader = reader->fieldToReader.find(term->field()); termDocs = indexReader != reader->fieldToReader.end() ? indexReader->second->termDocs(term) : TermDocsPtr(); } void ParallelTermDocs::seek(const TermEnumPtr& termEnum) { seek(termEnum->term()); } bool ParallelTermDocs::next() { return termDocs ? termDocs->next() : false; } int32_t ParallelTermDocs::read(Collection& docs, Collection& freqs) { return termDocs ? termDocs->read(docs, freqs) : 0; } bool ParallelTermDocs::skipTo(int32_t target) { return termDocs ? termDocs->skipTo(target) : false; } void ParallelTermDocs::close() { if (termDocs) { termDocs->close(); } } ParallelTermPositions::ParallelTermPositions(const ParallelReaderPtr& reader) : ParallelTermDocs(reader) { } ParallelTermPositions::ParallelTermPositions(const ParallelReaderPtr& reader, const TermPtr& term) : ParallelTermDocs(reader) { seek(term); } ParallelTermPositions::~ParallelTermPositions() { } void ParallelTermPositions::seek(const TermPtr& term) { ParallelReaderPtr reader(_reader); MapStringIndexReader::iterator indexReader = reader->fieldToReader.find(term->field()); termDocs = indexReader != reader->fieldToReader.end() ? indexReader->second->termPositions(term) : TermDocsPtr(); } int32_t ParallelTermPositions::nextPosition() { // It is an error to call this if there is no next position, eg. if termDocs==null return boost::static_pointer_cast(termDocs)->nextPosition(); } int32_t ParallelTermPositions::getPayloadLength() { // It is an error to call this if there is no next position, eg. if termDocs==null return boost::static_pointer_cast(termDocs)->getPayloadLength(); } ByteArray ParallelTermPositions::getPayload(ByteArray data, int32_t offset) { // It is an error to call this if there is no next position, eg. if termDocs==null return boost::static_pointer_cast(termDocs)->getPayload(data, offset); } bool ParallelTermPositions::isPayloadAvailable() { // It is an error to call this if there is no next position, eg. if termDocs==null return boost::static_pointer_cast(termDocs)->isPayloadAvailable(); } } LucenePlusPlus-rel_3.0.9/src/core/index/Payload.cpp000066400000000000000000000064621456444476200222630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Payload.h" #include "MiscUtils.h" namespace Lucene { Payload::Payload() { this->offset = 0; this->_length = 0; } Payload::Payload(ByteArray data) { this->data = data; this->offset = 0; this->_length = data.size(); } Payload::Payload(ByteArray data, int32_t offset, int32_t length) { if (offset < 0 || offset + length > data.size()) { boost::throw_exception(IllegalArgumentException()); } this->data = data; this->offset = offset; this->_length = length; } Payload::~Payload() { } void Payload::setData(ByteArray data) { setData(data, 0, data.size()); } void Payload::setData(ByteArray data, int32_t offset, int32_t length) { this->data = data; this->offset = offset; this->_length = length; } ByteArray Payload::getData() { return this->data; } int32_t Payload::getOffset() { return this->offset; } int32_t Payload::length() { return this->_length; } uint8_t Payload::byteAt(int32_t index) { if (0 <= index && index < this->_length) { return this->data[this->offset + index]; } boost::throw_exception(IndexOutOfBoundsException()); return 0; } ByteArray Payload::toByteArray() { ByteArray retArray(ByteArray::newInstance(this->_length)); MiscUtils::arrayCopy(this->data.get(), this->offset, retArray.get(), 0, this->_length); return retArray; } void Payload::copyTo(ByteArray target, int32_t targetOffset) { if (this->_length > target.size() + targetOffset) { boost::throw_exception(IndexOutOfBoundsException()); } MiscUtils::arrayCopy(this->data.get(), this->offset, target.get(), targetOffset, this->_length); } LuceneObjectPtr Payload::clone(const LuceneObjectPtr& other) { // Start with a shallow copy of data LuceneObjectPtr clone = LuceneObject::clone(other ? other : newLucene()); PayloadPtr clonePayload(boost::dynamic_pointer_cast(clone)); clonePayload->offset = offset; clonePayload->_length = _length; // Only copy the part of data that belongs to this Payload if (offset == 0 && _length == data.size()) { // It is the whole thing, so just clone it. clonePayload->data = ByteArray::newInstance(data.size()); MiscUtils::arrayCopy(data.get(), 0, clonePayload->data.get(), 0, data.size()); } else { // Just get the part clonePayload->data = toByteArray(); clonePayload->offset = 0; } return clonePayload; } bool Payload::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } PayloadPtr otherPayload(boost::dynamic_pointer_cast(other)); if (otherPayload) { if (_length == otherPayload->_length) { return (std::memcmp(data.get(), otherPayload->data.get(), _length) == 0); } else { return false; } } return false; } int32_t Payload::hashCode() { return MiscUtils::hashCode(data.get(), offset, offset + _length); } } LucenePlusPlus-rel_3.0.9/src/core/index/PositionBasedTermVectorMapper.cpp000066400000000000000000000053221456444476200266070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PositionBasedTermVectorMapper.h" namespace Lucene { PositionBasedTermVectorMapper::PositionBasedTermVectorMapper(bool ignoringOffsets) : TermVectorMapper(false, ignoringOffsets) { storeOffsets = false; } PositionBasedTermVectorMapper::~PositionBasedTermVectorMapper() { } bool PositionBasedTermVectorMapper::isIgnoringPositions() { return false; } void PositionBasedTermVectorMapper::map(const String& term, int32_t frequency, Collection offsets, Collection positions) { for (int32_t i = 0; i < positions.size(); ++i) { TermVectorsPositionInfoPtr pos(currentPositions.get(positions[i])); if (!pos) { pos = newLucene(positions[i], storeOffsets); currentPositions.put(positions[i], pos); } pos->addTerm(term, offsets ? offsets[i] : TermVectorOffsetInfoPtr()); } } void PositionBasedTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) { if (storePositions == false) { boost::throw_exception(RuntimeException(L"You must store positions in order to use this Mapper")); } if (storeOffsets == true) { // ignoring offsets } this->fieldToTerms = MapStringMapIntTermVectorsPositionInfo::newInstance(); this->storeOffsets = storeOffsets; currentField = field; this->currentPositions = MapIntTermVectorsPositionInfo::newInstance(); fieldToTerms.put(currentField, currentPositions); } MapStringMapIntTermVectorsPositionInfo PositionBasedTermVectorMapper::getFieldToTerms() { return fieldToTerms; } TermVectorsPositionInfo::TermVectorsPositionInfo(int32_t position, bool storeOffsets) { this->position = position; this->terms = Collection::newInstance(); if (storeOffsets) { offsets = Collection::newInstance(); } } TermVectorsPositionInfo::~TermVectorsPositionInfo() { } void TermVectorsPositionInfo::addTerm(const String& term, const TermVectorOffsetInfoPtr& info) { terms.add(term); if (offsets) { offsets.add(info); } } int32_t TermVectorsPositionInfo::getPosition() { return position; } Collection TermVectorsPositionInfo::getTerms() { return terms; } Collection TermVectorsPositionInfo::getOffsets() { return offsets; } } LucenePlusPlus-rel_3.0.9/src/core/index/RawPostingList.cpp000066400000000000000000000012651456444476200236170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "RawPostingList.h" #include "DocumentsWriter.h" namespace Lucene { const int32_t RawPostingList::BYTES_SIZE = DocumentsWriter::OBJECT_HEADER_BYTES + 3 * DocumentsWriter::INT_NUM_BYTE; RawPostingList::RawPostingList() { textStart = 0; intStart = 0; byteStart = 0; } RawPostingList::~RawPostingList() { } } LucenePlusPlus-rel_3.0.9/src/core/index/ReadOnlyDirectoryReader.cpp000066400000000000000000000027321456444476200254130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ReadOnlyDirectoryReader.h" #include "ReadOnlySegmentReader.h" namespace Lucene { ReadOnlyDirectoryReader::ReadOnlyDirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& sis, const IndexDeletionPolicyPtr& deletionPolicy, int32_t termInfosIndexDivisor) : DirectoryReader(directory, sis, deletionPolicy, true, termInfosIndexDivisor) { } ReadOnlyDirectoryReader::ReadOnlyDirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& infos, Collection oldReaders, Collection oldStarts, MapStringByteArray oldNormsCache, bool doClone, int32_t termInfosIndexDivisor) : DirectoryReader(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone, termInfosIndexDivisor) { } ReadOnlyDirectoryReader::ReadOnlyDirectoryReader(const IndexWriterPtr& writer, const SegmentInfosPtr& infos, int32_t termInfosIndexDivisor) : DirectoryReader(writer, infos, termInfosIndexDivisor) { } ReadOnlyDirectoryReader::~ReadOnlyDirectoryReader() { } void ReadOnlyDirectoryReader::acquireWriteLock() { ReadOnlySegmentReader::noWrite(); } } LucenePlusPlus-rel_3.0.9/src/core/index/ReadOnlySegmentReader.cpp000066400000000000000000000015371456444476200250530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ReadOnlySegmentReader.h" #include "BitVector.h" namespace Lucene { ReadOnlySegmentReader::~ReadOnlySegmentReader() { } void ReadOnlySegmentReader::noWrite() { boost::throw_exception(UnsupportedOperationException(L"This IndexReader cannot make any changes to the index (it was opened with readOnly = true)")); } void ReadOnlySegmentReader::acquireWriteLock() { noWrite(); } bool ReadOnlySegmentReader::isDeleted(int32_t n) { return (deletedDocs && deletedDocs->get(n)); } } LucenePlusPlus-rel_3.0.9/src/core/index/ReusableStringReader.cpp000066400000000000000000000023101456444476200247320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ReusableStringReader.h" #include "MiscUtils.h" namespace Lucene { ReusableStringReader::ReusableStringReader() { upto = 0; left = 0; } ReusableStringReader::~ReusableStringReader() { } void ReusableStringReader::init(const String& s) { this->s = s; left = s.length(); this->upto = 0; } int32_t ReusableStringReader::read(wchar_t* buffer, int32_t offset, int32_t length) { if (left > length) { MiscUtils::arrayCopy(s.begin(), upto, buffer, offset, length); upto += length; left -= length; return length; } else if (left == 0) { s.clear(); return -1; } else { MiscUtils::arrayCopy(s.begin(), upto, buffer, offset, left); int32_t r = left; left = 0; upto = s.length(); return r; } } void ReusableStringReader::close() { } } LucenePlusPlus-rel_3.0.9/src/core/index/SegmentInfo.cpp000066400000000000000000000453051456444476200231070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "SegmentInfo.h" #include "SegmentInfos.h" #include "Directory.h" #include "IndexInput.h" #include "IndexOutput.h" #include "IndexFileNames.h" #include "IndexFileNameFilter.h" #include "BitVector.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { const int32_t SegmentInfo::NO = -1; // no norms; no deletes; const int32_t SegmentInfo::YES = 1; // have norms; have deletes; const int32_t SegmentInfo::CHECK_DIR = 0; // must check dir to see if there are norms/deletions const int32_t SegmentInfo::WITHOUT_GEN = 0; // a file name that has no GEN in it. SegmentInfo::SegmentInfo(const String& name, int32_t docCount, const DirectoryPtr& dir) { _sizeInBytes = -1; this->name = name; this->docCount = docCount; this->dir = dir; delGen = NO; this->isCompoundFile = CHECK_DIR; preLockless = true; hasSingleNormFile = false; docStoreOffset = -1; docStoreSegment = name; docStoreIsCompoundFile = false; delCount = 0; hasProx = true; } SegmentInfo::SegmentInfo(const String& name, int32_t docCount, const DirectoryPtr& dir, bool isCompoundFile, bool hasSingleNormFile) { _sizeInBytes = -1; this->name = name; this->docCount = docCount; this->dir = dir; delGen = NO; this->isCompoundFile = (uint8_t)(isCompoundFile ? YES : NO); this->hasSingleNormFile = hasSingleNormFile; preLockless = false; docStoreOffset = -1; docStoreIsCompoundFile = false; delCount = 0; hasProx = true; } SegmentInfo::SegmentInfo(const String& name, int32_t docCount, const DirectoryPtr& dir, bool isCompoundFile, bool hasSingleNormFile, int32_t docStoreOffset, const String& docStoreSegment, bool docStoreIsCompoundFile, bool hasProx) { _sizeInBytes = -1; this->name = name; this->docCount = docCount; this->dir = dir; delGen = NO; this->isCompoundFile = (uint8_t)(isCompoundFile ? YES : NO); this->hasSingleNormFile = hasSingleNormFile; preLockless = false; this->docStoreOffset = docStoreOffset; this->docStoreSegment = docStoreSegment; this->docStoreIsCompoundFile = docStoreIsCompoundFile; delCount = 0; this->hasProx = hasProx; } SegmentInfo::SegmentInfo(const DirectoryPtr& dir, int32_t format, const IndexInputPtr& input) { _sizeInBytes = -1; this->dir = dir; name = input->readString(); docCount = input->readInt(); if (format <= SegmentInfos::FORMAT_LOCKLESS) { delGen = input->readLong(); if (format <= SegmentInfos::FORMAT_SHARED_DOC_STORE) { docStoreOffset = input->readInt(); if (docStoreOffset != -1) { docStoreSegment = input->readString(); docStoreIsCompoundFile = (input->readByte() == 1); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } } else { docStoreOffset = -1; docStoreSegment = name; docStoreIsCompoundFile = false; } if (format <= SegmentInfos::FORMAT_SINGLE_NORM_FILE) { hasSingleNormFile = (input->readByte() == 1); } else { hasSingleNormFile = false; } int32_t numNormGen = input->readInt(); if (numNormGen != NO) { normGen = Collection::newInstance(numNormGen); for (int32_t j = 0; j < numNormGen; ++j) { normGen[j] = input->readLong(); } } isCompoundFile = input->readByte(); preLockless = (isCompoundFile == CHECK_DIR); if (format <= SegmentInfos::FORMAT_DEL_COUNT) { delCount = input->readInt(); BOOST_ASSERT(delCount <= docCount); } else { delCount = -1; } if (format <= SegmentInfos::FORMAT_HAS_PROX) { hasProx = (input->readByte() == 1); } else { hasProx = true; } if (format <= SegmentInfos::FORMAT_DIAGNOSTICS) { diagnostics = input->readStringStringMap(); } else { diagnostics = MapStringString::newInstance(); } } else { delGen = CHECK_DIR; isCompoundFile = CHECK_DIR; preLockless = true; hasSingleNormFile = false; docStoreOffset = -1; docStoreIsCompoundFile = false; delCount = -1; hasProx = true; diagnostics = MapStringString::newInstance(); } } SegmentInfo::~SegmentInfo() { } void SegmentInfo::reset(const SegmentInfoPtr& src) { clearFiles(); name = src->name; docCount = src->docCount; dir = src->dir; preLockless = src->preLockless; delGen = src->delGen; docStoreOffset = src->docStoreOffset; docStoreIsCompoundFile = src->docStoreIsCompoundFile; if (!src->normGen) { normGen = src->normGen; } else { normGen = Collection::newInstance(src->normGen.begin(), src->normGen.end()); } isCompoundFile = src->isCompoundFile; hasSingleNormFile = src->hasSingleNormFile; delCount = src->delCount; } void SegmentInfo::setDiagnostics(MapStringString diagnostics) { this->diagnostics = diagnostics; } MapStringString SegmentInfo::getDiagnostics() { return diagnostics; } void SegmentInfo::setNumFields(int32_t numFields) { if (!normGen) { // normGen is null if we loaded a pre-2.1 segment file, or, if this segments file hasn't had any // norms set against it yet normGen = Collection::newInstance(numFields); if (!preLockless) { // Do nothing: thus leaving normGen[k] == CHECK_DIR (==0), so that later we know } // we have to check filesystem for norm files, because this is prelockless. else { // This is a FORMAT_LOCKLESS segment, which means there are no separate norms for (int32_t i = 0; i < numFields; ++i) { normGen[i] = NO; } } } } int64_t SegmentInfo::sizeInBytes() { if (_sizeInBytes == -1) { HashSet _files(files()); _sizeInBytes = 0; for (HashSet::iterator fileName = _files.begin(); fileName != _files.end(); ++fileName) { // we don't count bytes used by a shared doc store against this segment if (docStoreOffset == -1 || !IndexFileNames::isDocStoreFile(*fileName)) { _sizeInBytes += dir->fileLength(*fileName); } } } return _sizeInBytes; } bool SegmentInfo::hasDeletions() { if (delGen == NO) { return false; } else if (delGen >= YES) { return true; } else { return dir->fileExists(getDelFileName()); } } void SegmentInfo::advanceDelGen() { // delGen 0 is reserved for pre-LOCKLESS format if (delGen == NO) { delGen = YES; } else { delGen++; } clearFiles(); } void SegmentInfo::clearDelGen() { delGen = NO; clearFiles(); } LuceneObjectPtr SegmentInfo::clone(const LuceneObjectPtr& other) { SegmentInfoPtr si(newLucene(name, docCount, dir)); si->isCompoundFile = isCompoundFile; si->delGen = delGen; si->delCount = delCount; si->hasProx = hasProx; si->preLockless = preLockless; si->hasSingleNormFile = hasSingleNormFile; si->diagnostics = MapStringString::newInstance(); si->diagnostics.putAll(diagnostics.begin(), diagnostics.end()); if (normGen) { si->normGen = Collection::newInstance(normGen.begin(), normGen.end()); } si->docStoreOffset = docStoreOffset; si->docStoreSegment = docStoreSegment; si->docStoreIsCompoundFile = docStoreIsCompoundFile; return si; } String SegmentInfo::getDelFileName() { if (delGen == NO) { // in this case we know there is no deletion filename against this segment return L""; } else { // if delgen is check_dir, it's the pre-lockless-commit file format return IndexFileNames::fileNameFromGeneration(name, String(L".") + IndexFileNames::DELETES_EXTENSION(), delGen); } } bool SegmentInfo::hasSeparateNorms(int32_t fieldNumber) { if ((!normGen && preLockless) || (normGen && normGen[fieldNumber] == CHECK_DIR)) { // must fallback to directory file exists check return dir->fileExists(name + L".s" + StringUtils::toString(fieldNumber)); } else if (!normGen || normGen[fieldNumber] == NO) { return false; } else { return true; } } bool SegmentInfo::hasSeparateNorms() { if (!normGen) { if (!preLockless) { // this means we were created with lockless code and no norms are written yet return false; } else { HashSet result(dir->listAll()); if (!result) { boost::throw_exception(IOException(L"Cannot read directory " + dir->toString() + L": listAll() returned null")); } String pattern(name + L".s"); int32_t patternLength = pattern.length(); for (HashSet::iterator fileName = result.begin(); fileName != result.end(); ++fileName) { if (IndexFileNameFilter::accept(L"", *fileName) && boost::starts_with(*fileName, pattern) && UnicodeUtil::isDigit((*fileName)[patternLength])) { return true; } } return false; } } else { // This means this segment was saved with LOCKLESS code so we first check whether any normGen's are >= 1 // (meaning they definitely have separate norms) for (Collection::iterator gen = normGen.begin(); gen != normGen.end(); ++gen) { if (*gen >= YES) { return true; } } // Next we look for any == 0. These cases were pre-LOCKLESS and must be checked in directory for (int32_t gen = 0; gen < normGen.size(); ++gen) { if (normGen[gen] == CHECK_DIR && hasSeparateNorms(gen)) { return true; } } } return false; } void SegmentInfo::advanceNormGen(int32_t fieldIndex) { if (normGen[fieldIndex] == NO) { normGen[fieldIndex] = YES; } else { normGen[fieldIndex]++; } clearFiles(); } String SegmentInfo::getNormFileName(int32_t number) { String prefix; int64_t gen = !normGen ? CHECK_DIR : normGen[number]; if (hasSeparateNorms(number)) { // case 1: separate norm prefix = L".s"; return IndexFileNames::fileNameFromGeneration(name, prefix + StringUtils::toString(number), gen); } if (hasSingleNormFile) { // case 2: lockless (or nrm file exists) - single file for all norms prefix = String(L".") + IndexFileNames::NORMS_EXTENSION(); return IndexFileNames::fileNameFromGeneration(name, prefix, WITHOUT_GEN); } // case 3: norm file for each field prefix = L".f"; return IndexFileNames::fileNameFromGeneration(name, prefix + StringUtils::toString(number), WITHOUT_GEN); } void SegmentInfo::setUseCompoundFile(bool isCompoundFile) { this->isCompoundFile = (uint8_t)(isCompoundFile ? YES : NO); clearFiles(); } bool SegmentInfo::getUseCompoundFile() { if (isCompoundFile == (uint8_t)NO) { return false; } else if (isCompoundFile == (uint8_t)YES) { return true; } else { return dir->fileExists(name + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION()); } } int32_t SegmentInfo::getDelCount() { if (delCount == -1) { delCount = hasDeletions() ? BitVector(dir, getDelFileName()).count() : 0; } BOOST_ASSERT(delCount <= docCount); return delCount; } void SegmentInfo::setDelCount(int32_t delCount) { this->delCount = delCount; BOOST_ASSERT(delCount <= docCount); } int32_t SegmentInfo::getDocStoreOffset() { return docStoreOffset; } bool SegmentInfo::getDocStoreIsCompoundFile() { return docStoreIsCompoundFile; } void SegmentInfo::setDocStoreIsCompoundFile(bool v) { docStoreIsCompoundFile = v; clearFiles(); } String SegmentInfo::getDocStoreSegment() { return docStoreSegment; } void SegmentInfo::setDocStoreOffset(int32_t offset) { docStoreOffset = offset; clearFiles(); } void SegmentInfo::setDocStore(int32_t offset, const String& segment, bool isCompoundFile) { docStoreOffset = offset; docStoreSegment = segment; docStoreIsCompoundFile = isCompoundFile; } void SegmentInfo::write(const IndexOutputPtr& output) { output->writeString(name); output->writeInt(docCount); output->writeLong(delGen); output->writeInt(docStoreOffset); if (docStoreOffset != -1) { output->writeString(docStoreSegment); output->writeByte((uint8_t)(docStoreIsCompoundFile ? 1 : 0)); } output->writeByte((uint8_t)(hasSingleNormFile ? 1 : 0)); if (!normGen) { output->writeInt(NO); } else { output->writeInt(normGen.size()); for (Collection::iterator gen = normGen.begin(); gen != normGen.end(); ++gen) { output->writeLong(*gen); } } output->writeByte(isCompoundFile); output->writeInt(delCount); output->writeByte((uint8_t)(hasProx ? 1 : 0)); output->writeStringStringMap(diagnostics); } void SegmentInfo::setHasProx(bool hasProx) { this->hasProx = hasProx; clearFiles(); } bool SegmentInfo::getHasProx() { return hasProx; } void SegmentInfo::addIfExists(HashSet files, const String& fileName) { if (dir->fileExists(fileName)) { files.add(fileName); } } HashSet SegmentInfo::files() { if (_files) { // already cached return _files; } _files = HashSet::newInstance(); bool useCompoundFile = getUseCompoundFile(); if (useCompoundFile) { _files.add(name + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION()); } else { for (HashSet::iterator ext = IndexFileNames::NON_STORE_INDEX_EXTENSIONS().begin(); ext != IndexFileNames::NON_STORE_INDEX_EXTENSIONS().end(); ++ext) { addIfExists(_files, name + L"." + *ext); } } if (docStoreOffset != -1) { // we are sharing doc stores (stored fields, term vectors) with other segments BOOST_ASSERT(!docStoreSegment.empty()); if (docStoreIsCompoundFile) { _files.add(docStoreSegment + L"." + IndexFileNames::COMPOUND_FILE_STORE_EXTENSION()); } else { for (HashSet::iterator ext = IndexFileNames::STORE_INDEX_EXTENSIONS().begin(); ext != IndexFileNames::STORE_INDEX_EXTENSIONS().end(); ++ext) { addIfExists(_files, docStoreSegment + L"." + *ext); } } } else if (!useCompoundFile) { // we are not sharing, and, these files were not included in the compound file for (HashSet::iterator ext = IndexFileNames::STORE_INDEX_EXTENSIONS().begin(); ext != IndexFileNames::STORE_INDEX_EXTENSIONS().end(); ++ext) { addIfExists(_files, name + L"." + *ext); } } String delFileName(IndexFileNames::fileNameFromGeneration(name, String(L".") + IndexFileNames::DELETES_EXTENSION(), delGen)); if (!delFileName.empty() && (delGen >= YES || dir->fileExists(delFileName))) { _files.add(delFileName); } // careful logic for norms files if (normGen) { for (int32_t gen = 0; gen < normGen.size(); ++gen) { if (normGen[gen] >= YES) { // definitely a separate norm file, with generation _files.add(IndexFileNames::fileNameFromGeneration(name, String(L".") + IndexFileNames::SEPARATE_NORMS_EXTENSION() + StringUtils::toString(gen), normGen[gen])); } else if (normGen[gen] == NO) { // no separate norms but maybe plain norms in the non compound file case if (!hasSingleNormFile && !useCompoundFile) { String fileName(name + L"." + IndexFileNames::PLAIN_NORMS_EXTENSION() + StringUtils::toString(gen)); if (dir->fileExists(fileName)) { _files.add(fileName); } } } else if (normGen[gen] == CHECK_DIR) { // pre-2.1: we have to check file existence String fileName; if (useCompoundFile) { fileName = name + L"." + IndexFileNames::SEPARATE_NORMS_EXTENSION() + StringUtils::toString(gen); } else if (!hasSingleNormFile) { fileName = name + L"." + IndexFileNames::PLAIN_NORMS_EXTENSION() + StringUtils::toString(gen); } if (!fileName.empty() && dir->fileExists(fileName)) { _files.add(fileName); } } } } else if (preLockless || (!hasSingleNormFile && !useCompoundFile)) { // pre-2.1: we have to scan the dir to find all matching _x.sn/_x.fn files for our segment String prefix; if (useCompoundFile) { prefix = name + L"." + IndexFileNames::SEPARATE_NORMS_EXTENSION(); } else { prefix = name + L"." + IndexFileNames::PLAIN_NORMS_EXTENSION(); } int32_t prefixLength = prefix.length(); HashSet allFiles(dir->listAll()); for (HashSet::iterator fileName = allFiles.begin(); fileName != allFiles.end(); ++fileName) { if (IndexFileNameFilter::accept(L"", *fileName) && (int32_t)fileName->length() > prefixLength && UnicodeUtil::isDigit((*fileName)[prefixLength]) && boost::starts_with(*fileName, prefix)) { _files.add(*fileName); } } } return _files; } void SegmentInfo::clearFiles() { _files.reset(); _sizeInBytes = -1; } String SegmentInfo::segString(const DirectoryPtr& dir) { String cfs; try { cfs = getUseCompoundFile() ? L"c" : L"C"; } catch (LuceneException&) { cfs = L"?"; } String docStore; if (docStoreOffset != -1) { docStore = L"->" + docStoreSegment; } return name + L":" + cfs + (this->dir == dir ? L"" : L"x") + StringUtils::toString(docCount) + docStore; } bool SegmentInfo::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } SegmentInfoPtr otherSegmentInfo(boost::dynamic_pointer_cast(other)); if (!otherSegmentInfo) { return false; } return (otherSegmentInfo->dir == dir && otherSegmentInfo->name == name); } int32_t SegmentInfo::hashCode() { return dir->hashCode() + StringUtils::hashCode(name); } } LucenePlusPlus-rel_3.0.9/src/core/index/SegmentInfoCollection.cpp000066400000000000000000000052451456444476200251220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentInfoCollection.h" #include "SegmentInfo.h" namespace Lucene { SegmentInfoCollection::SegmentInfoCollection() { segmentInfos = Collection::newInstance(); } SegmentInfoCollection::~SegmentInfoCollection() { } int32_t SegmentInfoCollection::size() { return segmentInfos.size(); } bool SegmentInfoCollection::empty() { return segmentInfos.empty(); } void SegmentInfoCollection::clear() { segmentInfos.clear(); } void SegmentInfoCollection::add(const SegmentInfoPtr& info) { segmentInfos.add(info); } void SegmentInfoCollection::add(int32_t pos, const SegmentInfoPtr& info) { segmentInfos.add(pos, info); } void SegmentInfoCollection::addAll(const SegmentInfoCollectionPtr& segmentInfos) { this->segmentInfos.addAll(segmentInfos->segmentInfos.begin(), segmentInfos->segmentInfos.end()); } bool SegmentInfoCollection::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } SegmentInfoCollectionPtr otherColl(boost::dynamic_pointer_cast(other)); if (!otherColl) { return false; } return segmentInfos.equals(otherColl->segmentInfos, luceneEquals()); } int32_t SegmentInfoCollection::find(const SegmentInfoPtr& info) { Collection::iterator idx = segmentInfos.find_if(luceneEqualTo(info)); return idx == segmentInfos.end() ? -1 : std::distance(segmentInfos.begin(), idx); } bool SegmentInfoCollection::contains(const SegmentInfoPtr& info) { return segmentInfos.contains_if(luceneEqualTo(info)); } void SegmentInfoCollection::remove(int32_t pos) { segmentInfos.remove(segmentInfos.begin() + pos); } void SegmentInfoCollection::remove(int32_t start, int32_t end) { segmentInfos.remove(segmentInfos.begin() + start, segmentInfos.begin() + end); } LuceneObjectPtr SegmentInfoCollection::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = LuceneObject::clone(other ? other : newLucene()); SegmentInfoCollectionPtr cloneInfos(boost::dynamic_pointer_cast(clone)); for (Collection::iterator info = segmentInfos.begin(); info != segmentInfos.end(); ++info) { cloneInfos->segmentInfos.add(*info); } return cloneInfos; } } LucenePlusPlus-rel_3.0.9/src/core/index/SegmentInfos.cpp000066400000000000000000000577401456444476200233000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "SegmentInfos.h" #include "_SegmentInfos.h" #include "SegmentInfo.h" #include "IndexFileNames.h" #include "Directory.h" #include "ChecksumIndexInput.h" #include "ChecksumIndexOutput.h" #include "IndexCommit.h" #include "LuceneThread.h" #include "InfoStream.h" #include "TestPoint.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { /// The file format version, a negative number. Works since counter, the old 1st entry, is always >= 0 const int32_t SegmentInfos::FORMAT = -1; /// This format adds details used for lockless commits. It differs slightly from the previous format in that file names /// are never re-used (write once). Instead, each file is written to the next generation. For example, segments_1, /// segments_2, etc. This allows us to not use a commit lock. const int32_t SegmentInfos::FORMAT_LOCKLESS = -2; /// This format adds a "hasSingleNormFile" flag into each segment info. const int32_t SegmentInfos::FORMAT_SINGLE_NORM_FILE = -3; /// This format allows multiple segments to share a single vectors and stored fields file. const int32_t SegmentInfos::FORMAT_SHARED_DOC_STORE = -4; /// This format adds a checksum at the end of the file to ensure all bytes were successfully written. const int32_t SegmentInfos::FORMAT_CHECKSUM = -5; /// This format adds the deletion count for each segment. This way IndexWriter can efficiently report numDocs(). const int32_t SegmentInfos::FORMAT_DEL_COUNT = -6; /// This format adds the boolean hasProx to record if any fields in the segment store prox information (ie, have /// omitTermFreqAndPositions == false) const int32_t SegmentInfos::FORMAT_HAS_PROX = -7; /// This format adds optional commit userData storage. const int32_t SegmentInfos::FORMAT_USER_DATA = -8; /// This format adds optional per-segment string diagnostics storage, and switches userData to Map const int32_t SegmentInfos::FORMAT_DIAGNOSTICS = -9; /// This must always point to the most recent file format. const int32_t SegmentInfos::CURRENT_FORMAT = SegmentInfos::FORMAT_DIAGNOSTICS; /// Advanced configuration of retry logic in loading segments_N file. int32_t SegmentInfos::defaultGenFileRetryCount = 10; int32_t SegmentInfos::defaultGenFileRetryPauseMsec = 50; int32_t SegmentInfos::defaultGenLookaheadCount = 10; MapStringString SegmentInfos::singletonUserData; InfoStreamPtr SegmentInfos::infoStream; SegmentInfos::SegmentInfos() { userData = MapStringString::newInstance(); lastGeneration = 0; generation = 0; counter = 0; version = MiscUtils::currentTimeMillis(); } SegmentInfos::~SegmentInfos() { } SegmentInfoPtr SegmentInfos::info(int32_t i) { return segmentInfos[i]; } int64_t SegmentInfos::getCurrentSegmentGeneration(HashSet files) { if (!files) { return -1; } int64_t max = -1; for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { if (boost::starts_with(*file, IndexFileNames::SEGMENTS()) && *file != IndexFileNames::SEGMENTS_GEN()) { max = std::max(generationFromSegmentsFileName(*file), max); } } return max; } int64_t SegmentInfos::getCurrentSegmentGeneration(const DirectoryPtr& directory) { try { return getCurrentSegmentGeneration(directory->listAll()); } catch (LuceneException&) { return -1; } } String SegmentInfos::getCurrentSegmentFileName(HashSet files) { return IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", getCurrentSegmentGeneration(files)); } String SegmentInfos::getCurrentSegmentFileName(const DirectoryPtr& directory) { return IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", getCurrentSegmentGeneration(directory)); } String SegmentInfos::getCurrentSegmentFileName() { return IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", lastGeneration); } int64_t SegmentInfos::generationFromSegmentsFileName(const String& fileName) { if (fileName == IndexFileNames::SEGMENTS()) { return 0; } else if (boost::starts_with(fileName, IndexFileNames::SEGMENTS())) { return StringUtils::toLong(fileName.substr(wcslen(IndexFileNames::SEGMENTS().c_str()) + 1), StringUtils::CHARACTER_MAX_RADIX); } else { boost::throw_exception(IllegalArgumentException(L"FileName '" + fileName + L"' is not a segments file")); } return 0; } String SegmentInfos::getNextSegmentFileName() { return IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", generation == -1 ? 1 : generation + 1); } void SegmentInfos::read(const DirectoryPtr& directory, const String& segmentFileName) { bool success = false; // clear any previous segments segmentInfos.clear(); ChecksumIndexInputPtr input(newLucene(directory->openInput(segmentFileName))); generation = generationFromSegmentsFileName(segmentFileName); lastGeneration = generation; LuceneException finally; try { int32_t format = input->readInt(); if (format < 0) { // file contains explicit format info if (format < CURRENT_FORMAT) { boost::throw_exception(CorruptIndexException(L"Unknown format version: " + StringUtils::toString(format))); } version = input->readLong(); // read version counter = input->readInt(); // read counter } else { counter = format; } for (int32_t i = input->readInt(); i > 0; --i) { // read segmentInfos segmentInfos.add(newLucene(directory, format, input)); } // in old format the version number may be at the end of the file if (format >= 0) { if (input->getFilePointer() >= input->length()) { version = MiscUtils::currentTimeMillis(); // old file format without version number } else { input->readLong(); // read version } } if (format <= FORMAT_USER_DATA) { if (format <= FORMAT_DIAGNOSTICS) { userData = input->readStringStringMap(); } else if (input->readByte() != 0) { if (!singletonUserData) { singletonUserData = MapStringString::newInstance(); } singletonUserData[String(L"userData")] = input->readString(); userData = singletonUserData; } else { userData.clear(); } } else { userData.clear(); } if (format <= FORMAT_CHECKSUM) { int64_t checksumNow = input->getChecksum(); int64_t checksumThen = input->readLong(); if (checksumNow != checksumThen) { boost::throw_exception(CorruptIndexException(L"Checksum mismatch in segments file")); } } success = true; } catch (LuceneException& e) { finally = e; } input->close(); // clear any segment infos we had loaded so we have a clean slate on retry if (!success) { segmentInfos.clear(); } finally.throwException(); } void SegmentInfos::read(const DirectoryPtr& directory) { lastGeneration = -1; generation = lastGeneration; newLucene(shared_from_this(), directory)->run(); } void SegmentInfos::write(const DirectoryPtr& directory) { String segmentFileName(getNextSegmentFileName()); // always advance the generation on write if (generation == -1) { generation = 1; } else { ++generation; } ChecksumIndexOutputPtr segnOutput(newLucene(directory->createOutput(segmentFileName))); bool success = false; LuceneException finally; try { segnOutput->writeInt(CURRENT_FORMAT); // write FORMAT segnOutput->writeLong(++version); // every write changes the index segnOutput->writeInt(counter); // write counter segnOutput->writeInt(segmentInfos.size()); // write infos for (Collection::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) { (*seginfo)->write(segnOutput); } segnOutput->writeStringStringMap(userData); segnOutput->prepareCommit(); success = true; pendingSegnOutput = segnOutput; } catch (LuceneException& e) { finally = e; } if (!success) { // We hit an exception above; try to close the file but suppress any exception try { segnOutput->close(); } catch (...) { // Suppress so we keep throwing the original exception } try { // try not to leave a truncated segments_n file in the index directory->deleteFile(segmentFileName); } catch (...) { // Suppress so we keep throwing the original exception } } finally.throwException(); } LuceneObjectPtr SegmentInfos::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = SegmentInfoCollection::clone(other ? other : newLucene()); SegmentInfosPtr cloneInfos(boost::dynamic_pointer_cast(clone)); cloneInfos->counter = counter; cloneInfos->generation = generation; cloneInfos->lastGeneration = lastGeneration; cloneInfos->version = version; cloneInfos->pendingSegnOutput = pendingSegnOutput; for (int32_t i = 0; i < cloneInfos->size(); ++i) { cloneInfos->segmentInfos[i] = boost::dynamic_pointer_cast(cloneInfos->info(i)->clone()); } cloneInfos->userData = MapStringString::newInstance(); cloneInfos->userData.putAll(userData.begin(), userData.end()); return cloneInfos; } int64_t SegmentInfos::getVersion() { return version; } int64_t SegmentInfos::getGeneration() { return generation; } int64_t SegmentInfos::getLastGeneration() { return lastGeneration; } int64_t SegmentInfos::readCurrentVersion(const DirectoryPtr& directory) { // Fully read the segments file: this ensures that it's completely written so that if IndexWriter.prepareCommit has been called // (but not yet commit), then the reader will still see itself as current. SegmentInfosPtr sis(newLucene()); sis->read(directory); return sis->getVersion(); } MapStringString SegmentInfos::readCurrentUserData(const DirectoryPtr& directory) { SegmentInfosPtr sis(newLucene()); sis->read(directory); return sis->getUserData(); } void SegmentInfos::setInfoStream(const InfoStreamPtr& infoStream) { SegmentInfos::infoStream = infoStream; } void SegmentInfos::setDefaultGenFileRetryCount(int32_t count) { defaultGenFileRetryCount = count; } int32_t SegmentInfos::getDefaultGenFileRetryCount() { return defaultGenFileRetryCount; } void SegmentInfos::setDefaultGenFileRetryPauseMsec(int32_t msec) { defaultGenFileRetryPauseMsec = msec; } int32_t SegmentInfos::getDefaultGenFileRetryPauseMsec() { return defaultGenFileRetryPauseMsec; } void SegmentInfos::setDefaultGenLookaheadCount(int32_t count) { defaultGenLookaheadCount = count; } int32_t SegmentInfos::getDefaultGenLookahedCount() { return defaultGenLookaheadCount; } InfoStreamPtr SegmentInfos::getInfoStream() { return infoStream; } void SegmentInfos::message(const String& message) { if (infoStream) { *infoStream << L"SIS [" << message << L"]\n"; } } FindSegmentsFile::FindSegmentsFile(const SegmentInfosPtr& infos, const DirectoryPtr& directory) { this->_segmentInfos = infos; this->directory = directory; } FindSegmentsFile::~FindSegmentsFile() { } void FindSegmentsFile::doRun(const IndexCommitPtr& commit) { if (commit) { if (directory != commit->getDirectory()) { boost::throw_exception(IOException(L"The specified commit does not match the specified Directory")); } runBody(commit->getSegmentsFileName()); return; } String segmentFileName; int64_t lastGen = -1; int64_t gen = 0; int32_t genLookaheadCount = 0; bool retry = false; LuceneException exc; SegmentInfosPtr segmentInfos(_segmentInfos); int32_t method = 0; // Loop until we succeed in calling runBody() without hitting an IOException. An IOException most likely // means a commit was in process and has finished, in the time it took us to load the now-old infos files // (and segments files). It's also possible it's a true error (corrupt index). To distinguish these, // on each retry we must see "forward progress" on which generation we are trying to load. If we don't, // then the original error is real and we throw it. // We have three methods for determining the current generation. We try the first two in parallel, and // fall back to the third when necessary. while (true) { if (method == 0) { // Method 1: list the directory and use the highest segments_N file. This method works well as long // as there is no stale caching on the directory contents (NOTE: NFS clients often have such stale caching) HashSet files(directory->listAll()); int64_t genA = segmentInfos->getCurrentSegmentGeneration(files); segmentInfos->message(L"directory listing genA=" + StringUtils::toString(genA)); // Method 2: open segments.gen and read its contents. Then we take the larger of the two gens. This way, // if either approach is hitting a stale cache (NFS) we have a better chance of getting the right generation. int64_t genB = -1; for (int32_t i = 0; i < SegmentInfos::defaultGenFileRetryCount; ++i) { IndexInputPtr genInput; try { genInput = directory->openInput(IndexFileNames::SEGMENTS_GEN()); } catch (FileNotFoundException& e) { segmentInfos->message(L"Segments.gen open: FileNotFoundException " + e.getError()); break; } catch (IOException& e) { segmentInfos->message(L"Segments.gen open: IOException " + e.getError()); } if (genInput) { LuceneException finally; bool fileConsistent = false; try { int32_t version = genInput->readInt(); if (version == SegmentInfos::FORMAT_LOCKLESS) { int64_t gen0 = genInput->readLong(); int64_t gen1 = genInput->readLong(); segmentInfos->message(L"fallback check: " + StringUtils::toString(gen0) + L"; " + StringUtils::toString(gen1)); if (gen0 == gen1) { // the file is consistent genB = gen0; fileConsistent = true; } } } catch (IOException&) { // will retry } catch (LuceneException& e) { finally = e; } genInput->close(); finally.throwException(); if (fileConsistent) { break; } } LuceneThread::threadSleep(SegmentInfos::defaultGenFileRetryPauseMsec); } segmentInfos->message(String(IndexFileNames::SEGMENTS_GEN()) + L" check: genB=" + StringUtils::toString(genB)); // pick the larger of the two gen's gen = std::max(genA, genB); // neither approach found a generation if (gen == -1) { boost::throw_exception(FileNotFoundException(L"No segments* file found in directory")); } } // Third method (fallback if first & second methods are not reliable): since both directory cache and // file contents cache seem to be stale, just advance the generation. if (method == 1 || (method == 0 && lastGen == gen && retry)) { method = 1; if (genLookaheadCount < SegmentInfos::defaultGenLookaheadCount) { ++gen; ++genLookaheadCount; segmentInfos->message(L"look ahead increment gen to " + StringUtils::toString(gen)); } } if (lastGen == gen) { // This means we're about to try the same segments_N last tried. This is allowed, exactly once, because // writer could have been in the process of writing segments_N last time. if (retry) { // OK, we've tried the same segments_N file twice in a row, so this must be a real error. exc.throwException(); } else { retry = true; } } else if (method == 0) { // Segment file has advanced since our last loop, so reset retry retry = false; } lastGen = gen; segmentFileName = IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen); try { runBody(segmentFileName); segmentInfos->message(L"success on " + segmentFileName); return; } catch (LuceneException& err) { // Save the original root cause if (exc.isNull()) { exc = err; } segmentInfos->message(L"primary Exception on '" + segmentFileName + L"': " + err.getError() + L"'; will retry: retry=" + StringUtils::toString(retry) + L"; gen = " + StringUtils::toString(gen)); if (!retry && gen > 1) { // This is our first time trying this segments file (because retry is false), and, there is possibly a // segments_(N-1) (because gen > 1). So, check if the segments_(N-1) exists and try it if so. String prevSegmentFileName(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen - 1)); if (directory->fileExists(prevSegmentFileName)) { segmentInfos->message(L"fallback to prior segment file '" + prevSegmentFileName + L"'"); try { runBody(prevSegmentFileName); if (!exc.isNull()) { segmentInfos->message(L"success on fallback " + prevSegmentFileName); } return; } catch (LuceneException& err2) { segmentInfos->message(L"secondary Exception on '" + prevSegmentFileName + L"': " + err2.getError() + L"'; will retry"); } } } } } } FindSegmentsRead::FindSegmentsRead(const SegmentInfosPtr& infos, const DirectoryPtr& directory) : FindSegmentsFileT(infos, directory) { result = 0; } FindSegmentsRead::~FindSegmentsRead() { } int64_t FindSegmentsRead::doBody(const String& segmentFileName) { SegmentInfosPtr(_segmentInfos)->read(directory, segmentFileName); return 0; } SegmentInfosPtr SegmentInfos::range(int32_t first, int32_t last) { SegmentInfosPtr infos(newLucene()); infos->segmentInfos.addAll(segmentInfos.begin() + first, segmentInfos.begin() + last); return infos; } void SegmentInfos::updateGeneration(const SegmentInfosPtr& other) { lastGeneration = other->lastGeneration; generation = other->generation; version = other->version; } void SegmentInfos::rollbackCommit(const DirectoryPtr& dir) { if (pendingSegnOutput) { try { pendingSegnOutput->close(); } catch (...) { } // must carefully compute filename from "generation" since lastgeneration isn't incremented try { String segmentFileName(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", generation)); dir->deleteFile(segmentFileName); } catch (...) { } pendingSegnOutput.reset(); } } void SegmentInfos::prepareCommit(const DirectoryPtr& dir) { TestScope testScope(L"SegmentInfos", L"prepareCommit"); if (pendingSegnOutput) { boost::throw_exception(IllegalStateException(L"prepareCommit was already called")); } write(dir); } HashSet SegmentInfos::files(const DirectoryPtr& dir, bool includeSegmentsFile) { HashSet files(HashSet::newInstance()); if (includeSegmentsFile) { files.add(getCurrentSegmentFileName()); } for (Collection::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) { if ((*seginfo)->dir == dir) { HashSet segFiles((*seginfo)->files()); files.addAll(segFiles.begin(), segFiles.end()); } } return files; } void SegmentInfos::finishCommit(const DirectoryPtr& dir) { if (!pendingSegnOutput) { boost::throw_exception(IllegalStateException(L"prepareCommit was not called")); } bool success = false; LuceneException finally; try { pendingSegnOutput->finishCommit(); pendingSegnOutput->close(); pendingSegnOutput.reset(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { rollbackCommit(dir); } finally.throwException(); // NOTE: if we crash here, we have left a segments_N file in the directory in a possibly corrupt state (if // some bytes made it to stable storage and others didn't). But, the segments_N file includes checksum // at the end, which should catch this case. So when a reader tries to read it, it will throw a // CorruptIndexException, which should cause the retry logic in SegmentInfos to kick in and load the last // good (previous) segments_N-1 file. String fileName(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", generation)); success = false; try { dir->sync(fileName); success = true; } catch (...) { } if (!success) { dir->deleteFile(fileName); } lastGeneration = generation; IndexOutputPtr genOutput; try { genOutput = dir->createOutput(IndexFileNames::SEGMENTS_GEN()); try { genOutput->writeInt(FORMAT_LOCKLESS); genOutput->writeLong(generation); genOutput->writeLong(generation); } catch (LuceneException& e) { finally = e; } genOutput->close(); finally.throwException(); } catch (...) { } } void SegmentInfos::commit(const DirectoryPtr& dir) { prepareCommit(dir); finishCommit(dir); } String SegmentInfos::segString(const DirectoryPtr& directory) { SyncLock syncLock(this); String buffer; for (Collection::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) { if (seginfo != segmentInfos.begin()) { buffer += L' '; } buffer += (*seginfo)->segString(directory); if ((*seginfo)->dir != directory) { buffer += L"**"; } } return buffer; } MapStringString SegmentInfos::getUserData() { return userData; } void SegmentInfos::setUserData(MapStringString data) { if (!data) { userData = MapStringString::newInstance(); } else { userData = data; } } void SegmentInfos::replace(const SegmentInfosPtr& other) { segmentInfos.clear(); segmentInfos.addAll(other->segmentInfos.begin(), other->segmentInfos.end()); lastGeneration = other->lastGeneration; } bool SegmentInfos::hasExternalSegments(const DirectoryPtr& dir) { for (Collection::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) { if ((*seginfo)->dir != dir) { return true; } } return false; } } LucenePlusPlus-rel_3.0.9/src/core/index/SegmentMergeInfo.cpp000066400000000000000000000035071456444476200240650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentMergeInfo.h" #include "IndexReader.h" #include "TermEnum.h" #include "TermPositions.h" namespace Lucene { SegmentMergeInfo::SegmentMergeInfo(int32_t b, const TermEnumPtr& te, const IndexReaderPtr& r) { base = b; _reader = r; termEnum = te; term = te->term(); ord = 0; delCount = 0; } SegmentMergeInfo::~SegmentMergeInfo() { } Collection SegmentMergeInfo::getDocMap() { if (!docMap) { delCount = 0; IndexReaderPtr reader(_reader); // build array which maps document numbers around deletions if (reader->hasDeletions()) { int32_t maxDoc = reader->maxDoc(); docMap = Collection::newInstance(maxDoc); int32_t j = 0; for (int32_t i = 0; i < maxDoc; ++i) { if (reader->isDeleted(i)) { ++delCount; docMap[i] = -1; } else { docMap[i] = j++; } } } } return docMap; } TermPositionsPtr SegmentMergeInfo::getPositions() { if (!postings) { postings = IndexReaderPtr(_reader)->termPositions(); } return postings; } bool SegmentMergeInfo::next() { if (termEnum->next()) { term = termEnum->term(); return true; } else { term.reset(); return false; } } void SegmentMergeInfo::close() { termEnum->close(); if (postings) { postings->close(); } } } LucenePlusPlus-rel_3.0.9/src/core/index/SegmentMergeQueue.cpp000066400000000000000000000016351456444476200242560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentMergeQueue.h" #include "SegmentMergeInfo.h" namespace Lucene { SegmentMergeQueue::SegmentMergeQueue(int32_t size) : PriorityQueue(size) { } SegmentMergeQueue::~SegmentMergeQueue() { } void SegmentMergeQueue::close() { while (top()) { pop()->close(); } } bool SegmentMergeQueue::lessThan(const SegmentMergeInfoPtr& first, const SegmentMergeInfoPtr& second) { int32_t comparison = first->term->compareTo(second->term); return comparison == 0 ? (first->base < second->base) : (comparison < 0); } } LucenePlusPlus-rel_3.0.9/src/core/index/SegmentMerger.cpp000066400000000000000000000652351456444476200234410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentMerger.h" #include "MergePolicy.h" #include "IndexWriter.h" #include "IndexOutput.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "FieldsReader.h" #include "FieldsWriter.h" #include "IndexFileNames.h" #include "CompoundFileWriter.h" #include "SegmentReader.h" #include "_SegmentReader.h" #include "Directory.h" #include "TermPositions.h" #include "TermVectorsReader.h" #include "TermVectorsWriter.h" #include "FormatPostingsDocsConsumer.h" #include "FormatPostingsFieldsWriter.h" #include "FormatPostingsPositionsConsumer.h" #include "FormatPostingsTermsConsumer.h" #include "SegmentMergeInfo.h" #include "SegmentMergeQueue.h" #include "SegmentWriteState.h" #include "TestPoint.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { /// Maximum number of contiguous documents to bulk-copy when merging stored fields const int32_t SegmentMerger::MAX_RAW_MERGE_DOCS = 4192; /// norms header placeholder const uint8_t SegmentMerger::NORMS_HEADER[] = {'N', 'R', 'M', static_cast(-1) }; const int32_t SegmentMerger::NORMS_HEADER_LENGTH = 4; SegmentMerger::SegmentMerger(const DirectoryPtr& dir, const String& name) { readers = Collection::newInstance(); termIndexInterval = IndexWriter::DEFAULT_TERM_INDEX_INTERVAL; mergedDocs = 0; mergeDocStores = false; omitTermFreqAndPositions = false; directory = dir; segment = name; checkAbort = newLucene(); } SegmentMerger::SegmentMerger(const IndexWriterPtr& writer, const String& name, const OneMergePtr& merge) { readers = Collection::newInstance(); mergedDocs = 0; mergeDocStores = false; omitTermFreqAndPositions = false; directory = writer->getDirectory(); segment = name; if (merge) { checkAbort = newLucene(merge, directory); } else { checkAbort = newLucene(); } termIndexInterval = writer->getTermIndexInterval(); } SegmentMerger::~SegmentMerger() { } bool SegmentMerger::hasProx() { return fieldInfos->hasProx(); } void SegmentMerger::add(const IndexReaderPtr& reader) { readers.add(reader); } IndexReaderPtr SegmentMerger::segmentReader(int32_t i) { return readers[i]; } int32_t SegmentMerger::merge() { return merge(true); } int32_t SegmentMerger::merge(bool mergeDocStores) { this->mergeDocStores = mergeDocStores; // NOTE: it's important to add calls to checkAbort.work(...) if you make any changes to this method that will spend a lot of time. // The frequency of this check impacts how long IndexWriter.close(false) takes to actually stop the threads. mergedDocs = mergeFields(); mergeTerms(); mergeNorms(); if (mergeDocStores && fieldInfos->hasVectors()) { mergeVectors(); } return mergedDocs; } void SegmentMerger::closeReaders() { for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { (*reader)->close(); } } HashSet SegmentMerger::getMergedFiles() { HashSet fileSet(HashSet::newInstance()); // Basic files for (HashSet::iterator ext = IndexFileNames::COMPOUND_EXTENSIONS().begin(); ext != IndexFileNames::COMPOUND_EXTENSIONS().end(); ++ext) { if (*ext == IndexFileNames::PROX_EXTENSION() && !hasProx()) { continue; } if (mergeDocStores || (*ext != IndexFileNames::FIELDS_EXTENSION() && *ext != IndexFileNames::FIELDS_INDEX_EXTENSION())) { fileSet.add(segment + L"." + *ext); } } // Fieldable norm files for (int32_t i = 0; i < fieldInfos->size(); ++i) { FieldInfoPtr fi(fieldInfos->fieldInfo(i)); if (fi->isIndexed && !fi->omitNorms) { fileSet.add(segment + L"." + IndexFileNames::NORMS_EXTENSION()); break; } } // Vector files if (fieldInfos->hasVectors() && mergeDocStores) { for (HashSet::iterator ext = IndexFileNames::VECTOR_EXTENSIONS().begin(); ext != IndexFileNames::VECTOR_EXTENSIONS().end(); ++ext) { fileSet.add(segment + L"." + *ext); } } return fileSet; } HashSet SegmentMerger::createCompoundFile(const String& fileName) { HashSet files(getMergedFiles()); CompoundFileWriterPtr cfsWriter(newLucene(directory, fileName, checkAbort)); // Now merge all added files for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { cfsWriter->addFile(*file); } // Perform the merge cfsWriter->close(); return files; } void SegmentMerger::addIndexed(const IndexReaderPtr& reader, const FieldInfosPtr& fInfos, HashSet names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions) { for (HashSet::iterator field = names.begin(); field != names.end(); ++field) { fInfos->add(*field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader->hasNorms(*field), storePayloads, omitTFAndPositions); } } void SegmentMerger::setMatchingSegmentReaders() { // If the i'th reader is a SegmentReader and has identical fieldName -> number mapping, then // this array will be non-null at position i int32_t numReaders = readers.size(); matchingSegmentReaders = Collection::newInstance(numReaders); // If this reader is a SegmentReader, and all of its field name -> number mappings match the // "merged" FieldInfos, then we can do a bulk copy of the stored fields for (int32_t i = 0; i < numReaders; ++i) { IndexReaderPtr reader(readers[i]); SegmentReaderPtr segmentReader(boost::dynamic_pointer_cast(reader)); if (segmentReader) { bool same = true; FieldInfosPtr segmentFieldInfos(segmentReader->fieldInfos()); int32_t numFieldInfos = segmentFieldInfos->size(); for (int32_t j = 0; same && j < numFieldInfos; ++j) { same = (fieldInfos->fieldName(j) == segmentFieldInfos->fieldName(j)); } if (same) { matchingSegmentReaders[i] = segmentReader; } } } // Used for bulk-reading raw bytes for stored fields rawDocLengths = Collection::newInstance(MAX_RAW_MERGE_DOCS); rawDocLengths2 = Collection::newInstance(MAX_RAW_MERGE_DOCS); } int32_t SegmentMerger::mergeFields() { if (!mergeDocStores) { // When we are not merging by doc stores, their field name -> number mapping are the same. // So, we start with the fieldInfos of the last segment in this case, to keep that numbering fieldInfos = boost::dynamic_pointer_cast(boost::dynamic_pointer_cast(readers[readers.size() - 1])->core->fieldInfos->clone()); } else { fieldInfos = newLucene(); // merge field names } for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { SegmentReaderPtr segmentReader(boost::dynamic_pointer_cast(*reader)); if (segmentReader) { FieldInfosPtr readerFieldInfos(segmentReader->fieldInfos()); int32_t numReaderFieldInfos = readerFieldInfos->size(); for (int32_t j = 0; j < numReaderFieldInfos; ++j) { FieldInfoPtr fi(readerFieldInfos->fieldInfo(j)); fieldInfos->add(fi->name, fi->isIndexed, fi->storeTermVector, fi->storePositionWithTermVector, fi->storeOffsetWithTermVector, !(*reader)->hasNorms(fi->name), fi->storePayloads, fi->omitTermFreqAndPositions); } } else { addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR_WITH_POSITION), true, true, false, false, false); addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR_WITH_OFFSET), true, false, true, false, false); addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR), true, false, false, false, false); addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true); addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_STORES_PAYLOADS), false, false, false, true, false); addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_INDEXED), false, false, false, false, false); fieldInfos->add((*reader)->getFieldNames(IndexReader::FIELD_OPTION_UNINDEXED), false); } } fieldInfos->write(directory, segment + L".fnm"); int32_t docCount = 0; setMatchingSegmentReaders(); if (mergeDocStores) { // merge field values FieldsWriterPtr fieldsWriter(newLucene(directory, segment, fieldInfos)); LuceneException finally; try { int32_t idx = 0; for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { SegmentReaderPtr matchingSegmentReader(matchingSegmentReaders[idx++]); FieldsReaderPtr matchingFieldsReader; if (matchingSegmentReader) { FieldsReaderPtr fieldsReader(matchingSegmentReader->getFieldsReader()); if (fieldsReader && fieldsReader->canReadRawDocs()) { matchingFieldsReader = fieldsReader; } } if ((*reader)->hasDeletions()) { docCount += copyFieldsWithDeletions(fieldsWriter, *reader, matchingFieldsReader); } else { docCount += copyFieldsNoDeletions(fieldsWriter, *reader, matchingFieldsReader); } } } catch (LuceneException& e) { finally = e; } fieldsWriter->close(); finally.throwException(); String fileName(segment + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); int64_t fdxFileLength = directory->fileLength(fileName); if (4 + ((int64_t)docCount) * 8 != fdxFileLength) { boost::throw_exception(RuntimeException(L"mergeFields produced an invalid result: docCount is " + StringUtils::toString(docCount) + L" but fdx file size is " + StringUtils::toString(fdxFileLength) + L" file=" + fileName + L" file exists?=" + StringUtils::toString(directory->fileExists(fileName)) + L"; now aborting this merge to prevent index corruption")); } } else { // If we are skipping the doc stores, that means there are no deletions in any of these segments, // so we just sum numDocs() of each segment to get total docCount for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { docCount += (*reader)->numDocs(); } } return docCount; } int32_t SegmentMerger::copyFieldsWithDeletions(const FieldsWriterPtr& fieldsWriter, const IndexReaderPtr& reader, const FieldsReaderPtr& matchingFieldsReader) { int32_t docCount = 0; int32_t maxDoc = reader->maxDoc(); if (matchingFieldsReader) { // We can bulk-copy because the fieldInfos are "congruent" for (int32_t j = 0; j < maxDoc;) { if (reader->isDeleted(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field numbers are identical int32_t start = j; int32_t numDocs = 0; do { ++j; ++numDocs; if (j >= maxDoc) { break; } if (reader->isDeleted(j)) { ++j; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); IndexInputPtr stream(matchingFieldsReader->rawDocs(rawDocLengths, start, numDocs)); fieldsWriter->addRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; checkAbort->work(300 * numDocs); } } else { for (int32_t j = 0; j < maxDoc; ++j) { if (reader->isDeleted(j)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to doc then pass it to termVectorsWriter.addAllDocVectors fieldsWriter->addDocument(reader->document(j)); ++docCount; checkAbort->work(300); } } return docCount; } int32_t SegmentMerger::copyFieldsNoDeletions(const FieldsWriterPtr& fieldsWriter, const IndexReaderPtr& reader, const FieldsReaderPtr& matchingFieldsReader) { int32_t docCount = 0; int32_t maxDoc = reader->maxDoc(); if (matchingFieldsReader) { // We can bulk-copy because the fieldInfos are "congruent" while (docCount < maxDoc) { int32_t len = std::min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); IndexInputPtr stream(matchingFieldsReader->rawDocs(rawDocLengths, docCount, len)); fieldsWriter->addRawDocuments(stream, rawDocLengths, len); docCount += len; checkAbort->work(300 * len); } } else { for (; docCount < maxDoc; ++docCount) { // NOTE: it's very important to first assign to doc then pass it to termVectorsWriter.addAllDocVectors fieldsWriter->addDocument(reader->document(docCount)); checkAbort->work(300); } } return docCount; } void SegmentMerger::mergeVectors() { TermVectorsWriterPtr termVectorsWriter(newLucene(directory, segment, fieldInfos)); LuceneException finally; try { int32_t idx = 0; for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { SegmentReaderPtr matchingSegmentReader(matchingSegmentReaders[idx++]); TermVectorsReaderPtr matchingVectorsReader; if (matchingSegmentReader) { TermVectorsReaderPtr vectorsReader(matchingSegmentReader->getTermVectorsReaderOrig()); // If the TV* files are an older format then they cannot read raw docs if (vectorsReader && vectorsReader->canReadRawDocs()) { matchingVectorsReader = vectorsReader; } } if ((*reader)->hasDeletions()) { copyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, *reader); } else { copyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, *reader); } } } catch (LuceneException& e) { finally = e; } termVectorsWriter->close(); finally.throwException(); String fileName(segment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); int64_t tvxSize = directory->fileLength(fileName); if (4 + ((int64_t)mergedDocs) * 16 != tvxSize) { boost::throw_exception(RuntimeException(L"mergeVectors produced an invalid result: mergedDocs is " + StringUtils::toString(mergedDocs) + L" but tvx size is " + StringUtils::toString(tvxSize) + L" file=" + fileName + L" file exists?=" + StringUtils::toString(directory->fileExists(fileName)) + L"; now aborting this merge to prevent index corruption")); } } void SegmentMerger::copyVectorsWithDeletions(const TermVectorsWriterPtr& termVectorsWriter, const TermVectorsReaderPtr& matchingVectorsReader, const IndexReaderPtr& reader) { int32_t maxDoc = reader->maxDoc(); if (matchingVectorsReader) { // We can bulk-copy because the fieldInfos are "congruent" for (int32_t docNum = 0; docNum < maxDoc;) { if (reader->isDeleted(docNum)) { // skip deleted docs ++docNum; continue; } // We can optimize this case (doing a bulk byte copy) since the field numbers are identical int32_t start = docNum; int32_t numDocs = 0; do { ++docNum; ++numDocs; if (docNum >= maxDoc) { break; } if (reader->isDeleted(docNum)) { ++docNum; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); matchingVectorsReader->rawDocs(rawDocLengths, rawDocLengths2, start, numDocs); termVectorsWriter->addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); checkAbort->work(300 * numDocs); } } else { for (int32_t docNum = 0; docNum < maxDoc; ++docNum) { if (reader->isDeleted(docNum)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to termVectorsWriter.addAllDocVectors termVectorsWriter->addAllDocVectors(reader->getTermFreqVectors(docNum)); checkAbort->work(300); } } } void SegmentMerger::copyVectorsNoDeletions(const TermVectorsWriterPtr& termVectorsWriter, const TermVectorsReaderPtr& matchingVectorsReader, const IndexReaderPtr& reader) { int32_t maxDoc = reader->maxDoc(); if (matchingVectorsReader) { // We can bulk-copy because the fieldInfos are "congruent" int32_t docCount = 0; while (docCount < maxDoc) { int32_t len = std::min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); matchingVectorsReader->rawDocs(rawDocLengths, rawDocLengths2, docCount, len); termVectorsWriter->addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len); docCount += len; checkAbort->work(300 * len); } } else { for (int32_t docNum = 0; docNum < maxDoc; ++docNum) { // NOTE: it's very important to first assign to vectors then pass it to termVectorsWriter.addAllDocVectors termVectorsWriter->addAllDocVectors(reader->getTermFreqVectors(docNum)); checkAbort->work(300); } } } void SegmentMerger::mergeTerms() { TestScope testScope(L"SegmentMerger", L"mergeTerms"); SegmentWriteStatePtr state(newLucene(DocumentsWriterPtr(), directory, segment, L"", mergedDocs, 0, termIndexInterval)); FormatPostingsFieldsConsumerPtr consumer(newLucene(state, fieldInfos)); LuceneException finally; try { queue = newLucene(readers.size()); mergeTermInfos(consumer); } catch (LuceneException& e) { finally = e; } consumer->finish(); if (queue) { queue->close(); } finally.throwException(); } void SegmentMerger::mergeTermInfos(const FormatPostingsFieldsConsumerPtr& consumer) { int32_t base = 0; int32_t readerCount = readers.size(); for (int32_t i = 0; i < readerCount; ++i) { IndexReaderPtr reader(readers[i]); TermEnumPtr termEnum(reader->terms()); SegmentMergeInfoPtr smi(newLucene(base, termEnum, reader)); Collection docMap(smi->getDocMap()); if (docMap) { if (!docMaps) { docMaps = Collection< Collection >::newInstance(readerCount); delCounts = Collection::newInstance(readerCount); } docMaps[i] = docMap; IndexReaderPtr segmentMergeReader(smi->_reader); delCounts[i] = segmentMergeReader->maxDoc() - segmentMergeReader->numDocs(); } base += reader->numDocs(); BOOST_ASSERT(reader->numDocs() == reader->maxDoc() - smi->delCount); if (smi->next()) { queue->add(smi); // initialize queue } else { smi->close(); } } Collection match(Collection::newInstance(readers.size())); String currentField; FormatPostingsTermsConsumerPtr termsConsumer; while (!queue->empty()) { int32_t matchSize = 0; // pop matching terms match[matchSize++] = queue->pop(); TermPtr term(match[0]->term); SegmentMergeInfoPtr top(queue->empty() ? SegmentMergeInfoPtr() : queue->top()); while (top && term->compareTo(top->term) == 0) { match[matchSize++] = queue->pop(); top = queue->top(); } if (currentField != term->_field) { currentField = term->_field; if (termsConsumer) { termsConsumer->finish(); } FieldInfoPtr fieldInfo(fieldInfos->fieldInfo(currentField)); termsConsumer = consumer->addField(fieldInfo); omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; } int32_t df = appendPostings(termsConsumer, match, matchSize); // add new TermInfo checkAbort->work(df / 3.0); while (matchSize > 0) { SegmentMergeInfoPtr smi(match[--matchSize]); if (smi->next()) { queue->add(smi); // restore queue } else { smi->close(); // done with a segment } } } } Collection< Collection > SegmentMerger::getDocMaps() { return docMaps; } Collection SegmentMerger::getDelCounts() { return delCounts; } int32_t SegmentMerger::appendPostings(const FormatPostingsTermsConsumerPtr& termsConsumer, Collection smis, int32_t n) { FormatPostingsDocsConsumerPtr docConsumer(termsConsumer->addTerm(smis[0]->term->_text)); int32_t df = 0; for (int32_t i = 0; i < n; ++i) { SegmentMergeInfoPtr smi(smis[i]); TermPositionsPtr postings(smi->getPositions()); BOOST_ASSERT(postings); int32_t base = smi->base; Collection docMap(smi->getDocMap()); postings->seek(smi->termEnum); while (postings->next()) { ++df; int32_t doc = postings->doc(); if (docMap) { doc = docMap[doc]; // map around deletions } doc += base; // convert to merged space int32_t freq = postings->freq(); FormatPostingsPositionsConsumerPtr posConsumer(docConsumer->addDoc(doc, freq)); if (!omitTermFreqAndPositions) { for (int32_t j = 0; j < freq; ++j) { int32_t position = postings->nextPosition(); int32_t payloadLength = postings->getPayloadLength(); if (payloadLength > 0) { if (!payloadBuffer) { payloadBuffer = ByteArray::newInstance(payloadLength); } if (payloadBuffer.size() < payloadLength) { payloadBuffer.resize(payloadLength); } postings->getPayload(payloadBuffer, 0); } posConsumer->addPosition(position, payloadBuffer, 0, payloadLength); } posConsumer->finish(); } } } docConsumer->finish(); return df; } void SegmentMerger::mergeNorms() { ByteArray normBuffer; IndexOutputPtr output; LuceneException finally; try { int32_t numFieldInfos = fieldInfos->size(); for (int32_t i = 0; i < numFieldInfos; ++i) { FieldInfoPtr fi(fieldInfos->fieldInfo(i)); if (fi->isIndexed && !fi->omitNorms) { if (!output) { output = directory->createOutput(segment + L"." + IndexFileNames::NORMS_EXTENSION()); output->writeBytes(NORMS_HEADER, SIZEOF_ARRAY(NORMS_HEADER)); } for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { int32_t maxDoc = (*reader)->maxDoc(); if (!normBuffer) { normBuffer = ByteArray::newInstance(maxDoc); } if (normBuffer.size() < maxDoc) { // the buffer is too small for the current segment normBuffer.resize(maxDoc); } MiscUtils::arrayFill(normBuffer.get(), 0, normBuffer.size(), 0); (*reader)->norms(fi->name, normBuffer, 0); if (!(*reader)->hasDeletions()) { // optimized case for segments without deleted docs output->writeBytes(normBuffer.get(), maxDoc); } else { // this segment has deleted docs, so we have to check for every doc if it is deleted or not for (int32_t k = 0; k < maxDoc; ++k) { if (!(*reader)->isDeleted(k)) { output->writeByte(normBuffer[k]); } } } checkAbort->work(maxDoc); } } } } catch (LuceneException& e) { finally = e; } if (output) { output->close(); } finally.throwException(); } CheckAbort::CheckAbort(const OneMergePtr& merge, const DirectoryPtr& dir) { workCount = 0; this->merge = merge; this->_dir = dir; } CheckAbort::~CheckAbort() { } void CheckAbort::work(double units) { workCount += units; if (workCount >= 10000.0) { merge->checkAborted(DirectoryPtr(_dir)); workCount = 0; } } CheckAbortNull::CheckAbortNull() : CheckAbort(OneMergePtr(), DirectoryPtr()) { } CheckAbortNull::~CheckAbortNull() { } void CheckAbortNull::work(double units) { // do nothing } } LucenePlusPlus-rel_3.0.9/src/core/index/SegmentReader.cpp000066400000000000000000001132371456444476200234160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "SegmentReader.h" #include "_SegmentReader.h" #include "IndexFileNames.h" #include "DirectoryReader.h" #include "CompoundFileReader.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "FieldsReader.h" #include "TermInfo.h" #include "TermInfosReader.h" #include "TermVectorsReader.h" #include "IndexOutput.h" #include "ReadOnlySegmentReader.h" #include "BitVector.h" #include "SegmentTermEnum.h" #include "SegmentTermPositions.h" #include "SegmentInfo.h" #include "SegmentMerger.h" #include "AllTermDocs.h" #include "DefaultSimilarity.h" #include "FieldCache.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { SegmentReader::SegmentReader() { _norms = MapStringNorm::newInstance(); readOnly = false; deletedDocsDirty = false; normsDirty = false; rollbackHasChanges = false; rollbackDeletedDocsDirty = false; rollbackNormsDirty = false; readBufferSize = 0; pendingDeleteCount = 0; rollbackPendingDeleteCount = 0; } SegmentReader::~SegmentReader() { } void SegmentReader::initialize() { fieldsReaderLocal = newLucene(shared_from_this()); } SegmentReaderPtr SegmentReader::get(bool readOnly, const SegmentInfoPtr& si, int32_t termInfosIndexDivisor) { return get(readOnly, si->dir, si, BufferedIndexInput::BUFFER_SIZE, true, termInfosIndexDivisor); } SegmentReaderPtr SegmentReader::get(bool readOnly, const DirectoryPtr& dir, const SegmentInfoPtr& si, int32_t readBufferSize, bool doOpenStores, int32_t termInfosIndexDivisor) { SegmentReaderPtr instance(readOnly ? newLucene() : newLucene()); instance->readOnly = readOnly; instance->si = si; instance->readBufferSize = readBufferSize; bool success = false; LuceneException finally; try { instance->core = newLucene(instance, dir, si, readBufferSize, termInfosIndexDivisor); if (doOpenStores) { instance->core->openDocStores(si); } instance->loadDeletedDocs(); instance->openNorms(instance->core->cfsDir, readBufferSize); success = true; } catch (LuceneException& e) { finally = e; } // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception above. // In this case, we want to explicitly close any subset of things that were opened if (!success) { instance->doClose(); } finally.throwException(); return instance; } void SegmentReader::openDocStores() { core->openDocStores(si); } bool SegmentReader::checkDeletedCounts() { int32_t recomputedCount = deletedDocs->getRecomputedCount(); BOOST_ASSERT(deletedDocs->count() == recomputedCount); BOOST_ASSERT(si->getDelCount() == recomputedCount); // Verify # deletes does not exceed maxDoc for this segment BOOST_ASSERT(si->getDelCount() <= maxDoc()); return true; } void SegmentReader::loadDeletedDocs() { // NOTE: the bitvector is stored using the regular directory, not cfs if (hasDeletions(si)) { deletedDocs = newLucene(directory(), si->getDelFileName()); deletedDocsRef = newLucene(); BOOST_ASSERT(checkDeletedCounts()); } else { BOOST_ASSERT(si->getDelCount() == 0); } } ByteArray SegmentReader::cloneNormBytes(ByteArray bytes) { ByteArray cloneBytes(ByteArray::newInstance(bytes.size())); MiscUtils::arrayCopy(bytes.get(), 0, cloneBytes.get(), 0, bytes.size()); return cloneBytes; } BitVectorPtr SegmentReader::cloneDeletedDocs(const BitVectorPtr& bv) { return boost::dynamic_pointer_cast(bv->clone()); } LuceneObjectPtr SegmentReader::clone(const LuceneObjectPtr& other) { try { return SegmentReader::clone(readOnly, other); // Preserve current readOnly } catch (...) { boost::throw_exception(RuntimeException()); } return LuceneObjectPtr(); } LuceneObjectPtr SegmentReader::clone(bool openReadOnly, const LuceneObjectPtr& other) { SyncLock syncLock(this); return reopenSegment(si, true, openReadOnly); } SegmentReaderPtr SegmentReader::reopenSegment(const SegmentInfoPtr& si, bool doClone, bool openReadOnly) { SyncLock syncLock(this); bool deletionsUpToDate = (this->si->hasDeletions() == si->hasDeletions() && (!si->hasDeletions() || this->si->getDelFileName() == si->getDelFileName())); bool normsUpToDate = true; int32_t fieldCount = core->fieldInfos->size(); Collection fieldNormsChanged(Collection::newInstance(fieldCount)); for (int32_t i = 0; i < fieldCount; ++i) { if (this->si->getNormFileName(i) != si->getNormFileName(i)) { normsUpToDate = false; fieldNormsChanged[i] = true; } } // if we're cloning we need to run through the reopenSegment logic also if both old and new readers // aren't readonly, we clone to avoid sharing modifications if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly) { return shared_from_this(); } // When cloning, the incoming SegmentInfos should not have any changes in it BOOST_ASSERT(!doClone || (normsUpToDate && deletionsUpToDate)); // clone reader SegmentReaderPtr clone(openReadOnly ? newLucene() : newLucene()); bool success = false; LuceneException finally; try { core->incRef(); clone->core = core; clone->readOnly = openReadOnly; clone->si = si; clone->readBufferSize = readBufferSize; if (!openReadOnly && _hasChanges) { // My pending changes transfer to the new reader clone->pendingDeleteCount = pendingDeleteCount; clone->deletedDocsDirty = deletedDocsDirty; clone->normsDirty = normsDirty; clone->_hasChanges = _hasChanges; _hasChanges = false; } if (doClone) { if (deletedDocs) { deletedDocsRef->incRef(); clone->deletedDocs = deletedDocs; clone->deletedDocsRef = deletedDocsRef; } } else { if (!deletionsUpToDate) { // load deleted docs BOOST_ASSERT(!clone->deletedDocs); clone->loadDeletedDocs(); } else if (deletedDocs) { deletedDocsRef->incRef(); clone->deletedDocs = deletedDocs; clone->deletedDocsRef = deletedDocsRef; } } clone->_norms = MapStringNorm::newInstance(); // Clone norms for (int32_t i = 0; i < fieldNormsChanged.size(); ++i) { // Clone unchanged norms to the cloned reader if (doClone || !fieldNormsChanged[i]) { String curField(core->fieldInfos->fieldInfo(i)->name); NormPtr norm(this->_norms.get(curField)); if (norm) { NormPtr cloneNorm(boost::dynamic_pointer_cast(norm->clone())); cloneNorm->_reader = clone; clone->_norms.put(curField, cloneNorm); } } } // If we are not cloning, then this will open anew any norms that have changed clone->openNorms(si->getUseCompoundFile() ? core->getCFSReader() : directory(), readBufferSize); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { // An exception occurred during reopen, we have to decRef the norms that we incRef'ed already // and close singleNormsStream and FieldsReader clone->decRef(); } finally.throwException(); return clone; } void SegmentReader::doCommit(MapStringString commitUserData) { if (_hasChanges) { startCommit(); bool success = false; LuceneException finally; try { commitChanges(commitUserData); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { rollbackCommit(); } finally.throwException(); } } void SegmentReader::commitChanges(MapStringString commitUserData) { if (deletedDocsDirty) { // re-write deleted si->advanceDelGen(); // We can write directly to the actual name (vs to a .tmp & renaming it) because the file // is not live until segments file is written String delFileName(si->getDelFileName()); bool success = false; LuceneException finally; try { deletedDocs->write(directory(), delFileName); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { try { directory()->deleteFile(delFileName); } catch (...) { // suppress this so we keep throwing the original exception } } finally.throwException(); si->setDelCount(si->getDelCount() + pendingDeleteCount); pendingDeleteCount = 0; BOOST_ASSERT(deletedDocs->count() == si->getDelCount()); // delete count mismatch during commit? } else { BOOST_ASSERT(pendingDeleteCount == 0); } if (normsDirty) { // re-write norms si->setNumFields(core->fieldInfos->size()); for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) { if (norm->second->dirty) { norm->second->reWrite(si); } } } deletedDocsDirty = false; normsDirty = false; _hasChanges = false; } FieldsReaderPtr SegmentReader::getFieldsReader() { return fieldsReaderLocal->get(); } void SegmentReader::doClose() { termVectorsLocal.close(); fieldsReaderLocal->close(); if (deletedDocs) { deletedDocsRef->decRef(); deletedDocs.reset(); // null so if an app hangs on to us we still free most ram } for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) { norm->second->decRef(); } if (core) { core->decRef(); } } bool SegmentReader::hasDeletions(const SegmentInfoPtr& si) { // Don't call ensureOpen() here (it could affect performance) return si->hasDeletions(); } bool SegmentReader::hasDeletions() { // Don't call ensureOpen() here (it could affect performance) return deletedDocs.get() != NULL; } bool SegmentReader::usesCompoundFile(const SegmentInfoPtr& si) { return si->getUseCompoundFile(); } bool SegmentReader::hasSeparateNorms(const SegmentInfoPtr& si) { return si->hasSeparateNorms(); } void SegmentReader::doDelete(int32_t docNum) { if (!deletedDocs) { deletedDocs = newLucene(maxDoc()); deletedDocsRef = newLucene(); } // there is more than 1 SegmentReader with a reference to this deletedDocs BitVector so decRef // the current deletedDocsRef, clone the BitVector, create a new deletedDocsRef if (deletedDocsRef->refCount() > 1) { SegmentReaderRefPtr oldRef(deletedDocsRef); deletedDocs = cloneDeletedDocs(deletedDocs); deletedDocsRef = newLucene(); oldRef->decRef(); } deletedDocsDirty = true; if (!deletedDocs->getAndSet(docNum)) { ++pendingDeleteCount; } } void SegmentReader::doUndeleteAll() { deletedDocsDirty = false; if (deletedDocs) { BOOST_ASSERT(deletedDocsRef); deletedDocsRef->decRef(); deletedDocs.reset(); deletedDocsRef.reset(); pendingDeleteCount = 0; si->clearDelGen(); si->setDelCount(0); } else { BOOST_ASSERT(!deletedDocsRef); BOOST_ASSERT(pendingDeleteCount == 0); } } HashSet SegmentReader::files() { return si->files(); } TermEnumPtr SegmentReader::terms() { ensureOpen(); return core->getTermsReader()->terms(); } TermEnumPtr SegmentReader::terms(const TermPtr& t) { ensureOpen(); return core->getTermsReader()->terms(t); } FieldInfosPtr SegmentReader::fieldInfos() { return core->fieldInfos; } DocumentPtr SegmentReader::document(int32_t n, const FieldSelectorPtr& fieldSelector) { ensureOpen(); return getFieldsReader()->doc(n, fieldSelector); } bool SegmentReader::isDeleted(int32_t n) { SyncLock syncLock(this); return (deletedDocs && deletedDocs->get(n)); } TermDocsPtr SegmentReader::termDocs(const TermPtr& term) { if (!term) { return newLucene(shared_from_this()); } else { return IndexReader::termDocs(term); } } TermDocsPtr SegmentReader::termDocs() { ensureOpen(); return newLucene(shared_from_this()); } TermPositionsPtr SegmentReader::termPositions() { ensureOpen(); return newLucene(shared_from_this()); } int32_t SegmentReader::docFreq(const TermPtr& t) { ensureOpen(); TermInfoPtr ti(core->getTermsReader()->get(t)); return ti ? ti->docFreq : 0; } int32_t SegmentReader::numDocs() { // Don't call ensureOpen() here (it could affect performance) int32_t n = maxDoc(); if (deletedDocs) { n -= deletedDocs->count(); } return n; } int32_t SegmentReader::maxDoc() { // Don't call ensureOpen() here (it could affect performance) return si->docCount; } HashSet SegmentReader::getFieldNames(FieldOption fieldOption) { ensureOpen(); HashSet fieldSet(HashSet::newInstance()); for (int32_t i = 0; i < core->fieldInfos->size(); ++i) { FieldInfoPtr fi(core->fieldInfos->fieldInfo(i)); if (fieldOption == FIELD_OPTION_ALL) { fieldSet.add(fi->name); } else if (!fi->isIndexed && fieldOption == FIELD_OPTION_UNINDEXED) { fieldSet.add(fi->name); } else if (fi->omitTermFreqAndPositions && fieldOption == FIELD_OPTION_OMIT_TERM_FREQ_AND_POSITIONS) { fieldSet.add(fi->name); } else if (fi->storePayloads && fieldOption == FIELD_OPTION_STORES_PAYLOADS) { fieldSet.add(fi->name); } else if (fi->isIndexed && fieldOption == FIELD_OPTION_INDEXED) { fieldSet.add(fi->name); } else if (fi->isIndexed && !fi->storeTermVector && fieldOption == FIELD_OPTION_INDEXED_NO_TERMVECTOR) { fieldSet.add(fi->name); } else if (fi->storeTermVector && !fi->storePositionWithTermVector && !fi->storeOffsetWithTermVector && fieldOption == FIELD_OPTION_TERMVECTOR) { fieldSet.add(fi->name); } else if (fi->isIndexed && fi->storeTermVector && fieldOption == FIELD_OPTION_INDEXED_WITH_TERMVECTOR) { fieldSet.add(fi->name); } else if (fi->storePositionWithTermVector && !fi->storeOffsetWithTermVector && fieldOption == FIELD_OPTION_TERMVECTOR_WITH_POSITION) { fieldSet.add(fi->name); } else if (fi->storeOffsetWithTermVector && !fi->storePositionWithTermVector && fieldOption == FIELD_OPTION_TERMVECTOR_WITH_OFFSET) { fieldSet.add(fi->name); } else if (fi->storeOffsetWithTermVector && fi->storePositionWithTermVector && fieldOption == FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET) { fieldSet.add(fi->name); } } return fieldSet; } bool SegmentReader::hasNorms(const String& field) { SyncLock syncLock(this); ensureOpen(); return _norms.contains(field); } ByteArray SegmentReader::getNorms(const String& field) { SyncLock syncLock(this); NormPtr norm(_norms.get(field)); return norm ? norm->bytes() : ByteArray(); } ByteArray SegmentReader::norms(const String& field) { SyncLock syncLock(this); ensureOpen(); return getNorms(field); } void SegmentReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { NormPtr norm(_norms.get(field)); if (!norm) { // not an indexed field return; } normsDirty = true; ByteArray bytes(norm->copyOnWrite()); if (doc < 0 || doc >= bytes.size()) { boost::throw_exception(IndexOutOfBoundsException()); } bytes[doc] = value; // set the value } void SegmentReader::norms(const String& field, ByteArray norms, int32_t offset) { SyncLock syncLock(this); ensureOpen(); NormPtr norm(_norms.get(field)); if (!norm) { MiscUtils::arrayFill(norms.get(), offset, norms.size(), DefaultSimilarity::encodeNorm(1.0)); return; } norm->bytes(norms.get(), offset, maxDoc()); } void SegmentReader::openNorms(const DirectoryPtr& cfsDir, int32_t readBufferSize) { int64_t nextNormSeek = SegmentMerger::NORMS_HEADER_LENGTH; // skip header (header unused for now) int32_t _maxDoc = maxDoc(); for (int32_t i = 0; i < core->fieldInfos->size(); ++i) { FieldInfoPtr fi(core->fieldInfos->fieldInfo(i)); if (_norms.contains(fi->name)) { // in case this SegmentReader is being re-opened, we might be able to reuse some norm // instances and skip loading them here continue; } if (fi->isIndexed && !fi->omitNorms) { DirectoryPtr d(directory()); String fileName(si->getNormFileName(fi->number)); if (!si->hasSeparateNorms(fi->number)) { d = cfsDir; } // singleNormFile means multiple norms share this file bool singleNormFile = boost::ends_with(fileName, String(L".") + IndexFileNames::NORMS_EXTENSION()); IndexInputPtr normInput; int64_t normSeek; if (singleNormFile) { normSeek = nextNormSeek; if (!singleNormStream) { singleNormStream = d->openInput(fileName, readBufferSize); singleNormRef = newLucene(); } else { singleNormRef->incRef(); } // All norms in the .nrm file can share a single IndexInput since they are only used in // a synchronized context. If this were to change in the future, a clone could be done here. normInput = singleNormStream; } else { normSeek = 0; normInput = d->openInput(fileName); } _norms.put(fi->name, newLucene(shared_from_this(), normInput, fi->number, normSeek)); nextNormSeek += _maxDoc; // increment also if some norms are separate } } } bool SegmentReader::termsIndexLoaded() { return core->termsIndexIsLoaded(); } void SegmentReader::loadTermsIndex(int32_t termsIndexDivisor) { core->loadTermsIndex(si, termsIndexDivisor); } bool SegmentReader::normsClosed() { if (singleNormStream) { return false; } for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) { if (norm->second->refCount > 0) { return false; } } return true; } bool SegmentReader::normsClosed(const String& field) { return (_norms.get(field)->refCount == 0); } TermVectorsReaderPtr SegmentReader::getTermVectorsReader() { TermVectorsReaderPtr tvReader(termVectorsLocal.get()); if (!tvReader) { TermVectorsReaderPtr orig(core->getTermVectorsReaderOrig()); if (!orig) { return TermVectorsReaderPtr(); } else { try { tvReader = boost::dynamic_pointer_cast(orig->clone()); } catch (...) { return TermVectorsReaderPtr(); } } termVectorsLocal.set(tvReader); } return tvReader; } TermVectorsReaderPtr SegmentReader::getTermVectorsReaderOrig() { return core->getTermVectorsReaderOrig(); } TermFreqVectorPtr SegmentReader::getTermFreqVector(int32_t docNumber, const String& field) { // Check if this field is invalid or has no stored term vector ensureOpen(); FieldInfoPtr fi(core->fieldInfos->fieldInfo(field)); if (!fi || !fi->storeTermVector) { return TermFreqVectorPtr(); } TermVectorsReaderPtr termVectorsReader(getTermVectorsReader()); if (!termVectorsReader) { return TermFreqVectorPtr(); } return termVectorsReader->get(docNumber, field); } void SegmentReader::getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper) { ensureOpen(); FieldInfoPtr fi(core->fieldInfos->fieldInfo(field)); if (!fi || !fi->storeTermVector) { return; } TermVectorsReaderPtr termVectorsReader(getTermVectorsReader()); if (!termVectorsReader) { return; } termVectorsReader->get(docNumber, field, mapper); } void SegmentReader::getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper) { ensureOpen(); TermVectorsReaderPtr termVectorsReader(getTermVectorsReader()); if (!termVectorsReader) { return; } termVectorsReader->get(docNumber, mapper); } Collection SegmentReader::getTermFreqVectors(int32_t docNumber) { ensureOpen(); TermVectorsReaderPtr termVectorsReader(getTermVectorsReader()); if (!termVectorsReader) { return Collection(); } return termVectorsReader->get(docNumber); } String SegmentReader::getSegmentName() { return core->segment; } SegmentInfoPtr SegmentReader::getSegmentInfo() { return si; } void SegmentReader::setSegmentInfo(const SegmentInfoPtr& info) { si = info; } void SegmentReader::startCommit() { rollbackSegmentInfo = boost::dynamic_pointer_cast(si->clone()); rollbackHasChanges = _hasChanges; rollbackDeletedDocsDirty = deletedDocsDirty; rollbackNormsDirty = normsDirty; rollbackPendingDeleteCount = pendingDeleteCount; for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) { norm->second->rollbackDirty = norm->second->dirty; } } void SegmentReader::rollbackCommit() { si->reset(rollbackSegmentInfo); _hasChanges = rollbackHasChanges; deletedDocsDirty = rollbackDeletedDocsDirty; normsDirty = rollbackNormsDirty; pendingDeleteCount = rollbackPendingDeleteCount; for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) { norm->second->dirty = norm->second->rollbackDirty; } } DirectoryPtr SegmentReader::directory() { // Don't ensureOpen here - in certain cases, when a cloned/reopened reader needs to commit, // it may call this method on the closed original reader return core->dir; } LuceneObjectPtr SegmentReader::getFieldCacheKey() { return core->freqStream; } LuceneObjectPtr SegmentReader::getDeletesCacheKey() { return deletedDocs; } int64_t SegmentReader::getUniqueTermCount() { return core->getTermsReader()->size(); } SegmentReaderPtr SegmentReader::getOnlySegmentReader(const DirectoryPtr& dir) { return getOnlySegmentReader(IndexReader::open(dir, false)); } SegmentReaderPtr SegmentReader::getOnlySegmentReader(const IndexReaderPtr& reader) { SegmentReaderPtr segmentReader(boost::dynamic_pointer_cast(reader)); if (segmentReader) { return segmentReader; } DirectoryReaderPtr directoryReader(boost::dynamic_pointer_cast(reader)); if (directoryReader) { Collection subReaders(directoryReader->getSequentialSubReaders()); if (subReaders.size() != 1) { boost::throw_exception(IllegalArgumentException(L"reader has " + StringUtils::toString(subReaders.size()) + L" segments instead of exactly one")); } return boost::dynamic_pointer_cast(subReaders[0]); } boost::throw_exception(IllegalArgumentException(L"reader is not a SegmentReader or a single-segment DirectoryReader")); return SegmentReaderPtr(); } int32_t SegmentReader::getTermInfosIndexDivisor() { return core->termsIndexDivisor; } CoreReaders::CoreReaders(const SegmentReaderPtr& origInstance, const DirectoryPtr& dir, const SegmentInfoPtr& si, int32_t readBufferSize, int32_t termsIndexDivisor) { ref = newLucene(); segment = si->name; this->readBufferSize = readBufferSize; this->dir = dir; bool success = false; LuceneException finally; try { DirectoryPtr dir0(dir); if (si->getUseCompoundFile()) { cfsReader = newLucene(dir, segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION(), readBufferSize); dir0 = cfsReader; } cfsDir = dir0; fieldInfos = newLucene(cfsDir, segment + L"." + IndexFileNames::FIELD_INFOS_EXTENSION()); this->termsIndexDivisor = termsIndexDivisor; TermInfosReaderPtr reader(newLucene(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor)); if (termsIndexDivisor == -1) { tisNoIndex = reader; } else { tis = reader; } // make sure that all index files have been read or are kept open so that if an index // update removes them we'll still have them freqStream = cfsDir->openInput(segment + L"." + IndexFileNames::FREQ_EXTENSION(), readBufferSize); if (fieldInfos->hasProx()) { proxStream = cfsDir->openInput(segment + L"." + IndexFileNames::PROX_EXTENSION(), readBufferSize); } success = true; } catch (LuceneException& e) { finally = e; } if (!success) { decRef(); } finally.throwException(); // Must assign this at the end -- if we hit an exception above core, we don't want to attempt to // purge the FieldCache (will hit NPE because core is not assigned yet). _origInstance = origInstance; } CoreReaders::~CoreReaders() { } TermVectorsReaderPtr CoreReaders::getTermVectorsReaderOrig() { SyncLock syncLock(this); return termVectorsReaderOrig; } FieldsReaderPtr CoreReaders::getFieldsReaderOrig() { SyncLock syncLock(this); return fieldsReaderOrig; } void CoreReaders::incRef() { SyncLock syncLock(this); ref->incRef(); } DirectoryPtr CoreReaders::getCFSReader() { SyncLock syncLock(this); return cfsReader; } TermInfosReaderPtr CoreReaders::getTermsReader() { SyncLock syncLock(this); return tis ? tis : tisNoIndex; } bool CoreReaders::termsIndexIsLoaded() { SyncLock syncLock(this); return tis.get() != NULL; } void CoreReaders::loadTermsIndex(const SegmentInfoPtr& si, int32_t termsIndexDivisor) { SyncLock syncLock(this); if (!tis) { DirectoryPtr dir0; if (si->getUseCompoundFile()) { // In some cases, we were originally opened when CFS was not used, but then we are asked // to open the terms reader with index, the segment has switched to CFS if (!cfsReader) { cfsReader = newLucene(dir, segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION(), readBufferSize); } dir0 = cfsReader; } else { dir0 = dir; } tis = newLucene(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor); } } void CoreReaders::decRef() { SyncLock syncLock(this); if (ref->decRef() == 0) { // close everything, nothing is shared anymore with other readers if (tis) { tis->close(); tis.reset(); // null so if an app hangs on to us we still free most ram } if (tisNoIndex) { tisNoIndex->close(); } if (freqStream) { freqStream->close(); } if (proxStream) { proxStream->close(); } if (termVectorsReaderOrig) { termVectorsReaderOrig->close(); } if (fieldsReaderOrig) { fieldsReaderOrig->close(); } if (cfsReader) { cfsReader->close(); } if (storeCFSReader) { storeCFSReader->close(); } // Force FieldCache to evict our entries at this point SegmentReaderPtr origInstance(_origInstance.lock()); if (origInstance) { FieldCache::DEFAULT()->purge(origInstance); } } } void CoreReaders::openDocStores(const SegmentInfoPtr& si) { SyncLock syncLock(this); BOOST_ASSERT(si->name == segment); if (!fieldsReaderOrig) { DirectoryPtr storeDir; if (si->getDocStoreOffset() != -1) { if (si->getDocStoreIsCompoundFile()) { BOOST_ASSERT(!storeCFSReader); storeCFSReader = newLucene(dir, si->getDocStoreSegment() + L"." + IndexFileNames::COMPOUND_FILE_STORE_EXTENSION(), readBufferSize); storeDir = storeCFSReader; BOOST_ASSERT(storeDir); } else { storeDir = dir; BOOST_ASSERT(storeDir); } } else if (si->getUseCompoundFile()) { // In some cases, we were originally opened when CFS was not used, but then we are asked to open doc // stores after the segment has switched to CFS if (!cfsReader) { cfsReader = newLucene(dir, segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION(), readBufferSize); } storeDir = cfsReader; BOOST_ASSERT(storeDir); } else { storeDir = dir; BOOST_ASSERT(storeDir); } String storesSegment(si->getDocStoreOffset() != -1 ? si->getDocStoreSegment() : segment); fieldsReaderOrig = newLucene(storeDir, storesSegment, fieldInfos, readBufferSize, si->getDocStoreOffset(), si->docCount); // Verify two sources of "maxDoc" agree if (si->getDocStoreOffset() == -1 && fieldsReaderOrig->size() != si->docCount) { boost::throw_exception(CorruptIndexException(L"doc counts differ for segment " + segment + L": fieldsReader shows " + StringUtils::toString(fieldsReaderOrig->size()) + L" but segmentInfo shows " + StringUtils::toString(si->docCount))); } if (fieldInfos->hasVectors()) { // open term vector files only as needed termVectorsReaderOrig = newLucene(storeDir, storesSegment, fieldInfos, readBufferSize, si->getDocStoreOffset(), si->docCount); } } } FieldsReaderLocal::FieldsReaderLocal(const SegmentReaderPtr& reader) { this->_reader = reader; } FieldsReaderPtr FieldsReaderLocal::initialValue() { return boost::dynamic_pointer_cast(SegmentReaderPtr(_reader)->core->getFieldsReaderOrig()->clone()); } SegmentReaderRef::SegmentReaderRef() { _refCount = 1; } SegmentReaderRef::~SegmentReaderRef() { } String SegmentReaderRef::toString() { StringStream buffer; buffer << L"refcount: " << _refCount; return buffer.str(); } int32_t SegmentReaderRef::refCount() { SyncLock syncLock(this); return _refCount; } int32_t SegmentReaderRef::incRef() { SyncLock syncLock(this); BOOST_ASSERT(_refCount > 0); return ++_refCount; } int32_t SegmentReaderRef::decRef() { SyncLock syncLock(this); BOOST_ASSERT(_refCount > 0); return --_refCount; } Norm::Norm() { this->refCount = 1; this->normSeek = 0; this->dirty = false; this->rollbackDirty = false; this->number = 0; } Norm::Norm(const SegmentReaderPtr& reader, const IndexInputPtr& in, int32_t number, int64_t normSeek) { this->_reader = reader; this->refCount = 1; this->dirty = false; this->rollbackDirty = false; this->in = in; this->number = number; this->normSeek = normSeek; } Norm::~Norm() { } void Norm::incRef() { SyncLock syncLock(this); BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); ++refCount; } void Norm::closeInput() { SegmentReaderPtr reader(_reader.lock()); if (in && reader) { if (in != reader->singleNormStream) { // It's private to us -- just close it in->close(); } else { // We are sharing this with others -- decRef and maybe close the shared norm stream if (reader->singleNormRef->decRef() == 0) { reader->singleNormStream->close(); reader->singleNormStream.reset(); } } in.reset(); } } void Norm::decRef() { SyncLock syncLock(this); BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); if (--refCount == 0) { if (origNorm) { origNorm->decRef(); origNorm.reset(); } else { closeInput(); } if (origReader) { origReader.reset(); } if (_bytes) { BOOST_ASSERT(_bytesRef); _bytesRef->decRef(); _bytes.reset(); _bytesRef.reset(); } else { BOOST_ASSERT(!_bytesRef); } } } void Norm::bytes(uint8_t* bytesOut, int32_t offset, int32_t length) { SyncLock syncLock(this); BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); if (_bytes) { // Already cached - copy from cache BOOST_ASSERT(length <= SegmentReaderPtr(_reader)->maxDoc()); MiscUtils::arrayCopy(_bytes.get(), 0, bytesOut, offset, length); } else { // Not cached if (origNorm) { // Ask origNorm to load origNorm->bytes(bytesOut, offset, length); } else { // We are orig - read ourselves from disk SyncLock instancesLock(in); in->seek(normSeek); in->readBytes(bytesOut, offset, length, false); } } } ByteArray Norm::bytes() { SyncLock syncLock(this); BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); if (!_bytes) { // value not yet read BOOST_ASSERT(!_bytesRef); if (origNorm) { // Ask origNorm to load so that for a series of reopened readers we share a single read-only byte[] _bytes = origNorm->bytes(); _bytesRef = origNorm->_bytesRef; _bytesRef->incRef(); // Once we've loaded the bytes we no longer need origNorm origNorm->decRef(); origNorm.reset(); origReader.reset(); } else { // We are the origNorm, so load the bytes for real ourself int32_t count = SegmentReaderPtr(_reader)->maxDoc(); _bytes = ByteArray::newInstance(count); // Since we are orig, in must not be null BOOST_ASSERT(in); // Read from disk. { SyncLock instancesLock(in); in->seek(normSeek); in->readBytes(_bytes.get(), 0, count, false); } _bytesRef = newLucene(); closeInput(); } } return _bytes; } SegmentReaderRefPtr Norm::bytesRef() { return _bytesRef; } ByteArray Norm::copyOnWrite() { SyncLock syncLock(this); BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); bytes(); BOOST_ASSERT(_bytes); BOOST_ASSERT(_bytesRef); if (_bytesRef->refCount() > 1) { // I cannot be the origNorm for another norm instance if I'm being changed. // ie, only the "head Norm" can be changed BOOST_ASSERT(refCount == 1); SegmentReaderRefPtr oldRef(_bytesRef); _bytes = SegmentReaderPtr(_reader)->cloneNormBytes(_bytes); _bytesRef = newLucene(); oldRef->decRef(); } dirty = true; return _bytes; } LuceneObjectPtr Norm::clone(const LuceneObjectPtr& other) { SyncLock syncLock(this); BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); LuceneObjectPtr clone = other ? other : newLucene(); NormPtr cloneNorm(boost::dynamic_pointer_cast(clone)); cloneNorm->_reader = _reader; cloneNorm->origNorm = origNorm; cloneNorm->origReader = origReader; cloneNorm->normSeek = normSeek; cloneNorm->_bytesRef = _bytesRef; cloneNorm->_bytes = _bytes; cloneNorm->dirty = dirty; cloneNorm->number = number; cloneNorm->rollbackDirty = rollbackDirty; cloneNorm->refCount = 1; if (_bytes) { BOOST_ASSERT(_bytesRef); BOOST_ASSERT(!origNorm); // Clone holds a reference to my bytes cloneNorm->_bytesRef->incRef(); } else { BOOST_ASSERT(!_bytesRef); if (!origNorm) { // I become the origNorm for the clone cloneNorm->origNorm = shared_from_this(); cloneNorm->origReader = SegmentReaderPtr(_reader); } cloneNorm->origNorm->incRef(); } // Only the origNorm will actually readBytes from in cloneNorm->in.reset(); return cloneNorm; } void Norm::reWrite(const SegmentInfoPtr& si) { BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); // NOTE: norms are re-written in regular directory, not cfs si->advanceNormGen(this->number); String normFileName(si->getNormFileName(this->number)); SegmentReaderPtr reader(_reader); IndexOutputPtr out(reader->directory()->createOutput(normFileName)); bool success = false; LuceneException finally; try { try { out->writeBytes(_bytes.get(), reader->maxDoc()); } catch (LuceneException& e) { finally = e; } out->close(); finally.throwException(); success = true; } catch (LuceneException& e) { finally = e; } if (!success) { try { reader->directory()->deleteFile(normFileName); } catch (...) { // suppress this so we keep throwing the original exception } } finally.throwException(); this->dirty = false; } } LucenePlusPlus-rel_3.0.9/src/core/index/SegmentTermDocs.cpp000066400000000000000000000153661456444476200237400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentTermDocs.h" #include "SegmentReader.h" #include "_SegmentReader.h" #include "SegmentTermEnum.h" #include "IndexInput.h" #include "TermInfosReader.h" #include "FieldInfos.h" #include "FieldInfo.h" #include "Term.h" #include "TermInfo.h" #include "DefaultSkipListReader.h" #include "BitVector.h" #include "MiscUtils.h" namespace Lucene { SegmentTermDocs::SegmentTermDocs(const SegmentReaderPtr& parent) { this->_parent = parent; this->count = 0; this->df = 0; this->_doc = 0; this->_freq = 0; this->freqBasePointer = 0; this->proxBasePointer = 0; this->skipPointer = 0; this->haveSkipped = false; this->currentFieldStoresPayloads = false; this->currentFieldOmitTermFreqAndPositions = false; this->_freqStream = boost::dynamic_pointer_cast(parent->core->freqStream->clone()); { SyncLock parentLock(parent); this->deletedDocs = parent->deletedDocs; this->__deletedDocs = this->deletedDocs.get(); } this->skipInterval = parent->core->getTermsReader()->getSkipInterval(); this->maxSkipLevels = parent->core->getTermsReader()->getMaxSkipLevels(); this->__parent = parent.get(); this->__freqStream = _freqStream.get(); } SegmentTermDocs::~SegmentTermDocs() { } void SegmentTermDocs::seek(const TermPtr& term) { TermInfoPtr ti(__parent->core->getTermsReader()->get(term)); seek(ti, term); } void SegmentTermDocs::seek(const TermEnumPtr& termEnum) { TermInfoPtr ti; TermPtr term; SegmentTermEnumPtr segmentTermEnum(boost::dynamic_pointer_cast(termEnum)); // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs if (segmentTermEnum && segmentTermEnum->fieldInfos == __parent->core->fieldInfos) { // optimized case term = segmentTermEnum->term(); ti = segmentTermEnum->termInfo(); } else { // punt case term = termEnum->term(); ti = __parent->core->getTermsReader()->get(term); } seek(ti, term); } void SegmentTermDocs::seek(const TermInfoPtr& ti, const TermPtr& term) { count = 0; FieldInfoPtr fi(__parent->core->fieldInfos->fieldInfo(term->_field)); currentFieldOmitTermFreqAndPositions = fi ? fi->omitTermFreqAndPositions : false; currentFieldStoresPayloads = fi ? fi->storePayloads : false; if (!ti) { df = 0; } else { df = ti->docFreq; _doc = 0; freqBasePointer = ti->freqPointer; proxBasePointer = ti->proxPointer; skipPointer = freqBasePointer + ti->skipOffset; __freqStream->seek(freqBasePointer); haveSkipped = false; } } void SegmentTermDocs::close() { __freqStream->close(); if (skipListReader) { skipListReader->close(); } } int32_t SegmentTermDocs::doc() { return _doc; } int32_t SegmentTermDocs::freq() { return _freq; } void SegmentTermDocs::skippingDoc() { } bool SegmentTermDocs::next() { while (true) { if (count == df) { return false; } int32_t docCode = __freqStream->readVInt(); if (currentFieldOmitTermFreqAndPositions) { _doc += docCode; _freq = 1; } else { _doc += MiscUtils::unsignedShift(docCode, 1); // shift off low bit if ((docCode & 1) != 0) { // if low bit is set _freq = 1; // freq is one } else { _freq = __freqStream->readVInt(); // else read freq } } ++count; if (!__deletedDocs || !__deletedDocs->get(_doc)) { break; } skippingDoc(); } return true; } int32_t SegmentTermDocs::read(Collection& docs, Collection& freqs) { auto* __docs = docs.get(); auto* __freqs = freqs.get(); int32_t length = __docs->size(); if (currentFieldOmitTermFreqAndPositions) { return readNoTf(docs, freqs, length); } else { int32_t i = 0; while (i < length && count < df) { // manually inlined call to next() for speed int32_t docCode = __freqStream->readVInt(); _doc += MiscUtils::unsignedShift(docCode, 1); // shift off low bit if ((docCode & 1) != 0) { // if low bit is set _freq = 1; // freq is one } else { _freq = __freqStream->readVInt(); // else read freq } ++count; if (!__deletedDocs || !__deletedDocs->get(_doc)) { (*__docs)[i] = _doc; (*__freqs)[i] = _freq; ++i; } } return i; } } int32_t SegmentTermDocs::readNoTf(Collection& docs, Collection& freqs, int32_t length) { int32_t i = 0; while (i < length && count < df) { // manually inlined call to next() for speed _doc += __freqStream->readVInt(); ++count; if (!__deletedDocs || !__deletedDocs->get(_doc)) { docs[i] = _doc; // Hardware freq to 1 when term freqs were not stored in the index freqs[i] = 1; ++i; } } return i; } void SegmentTermDocs::skipProx(int64_t proxPointer, int32_t payloadLength) { } bool SegmentTermDocs::skipTo(int32_t target) { if (df >= skipInterval) { // optimized case if (!skipListReader) { skipListReader = newLucene(boost::dynamic_pointer_cast(__freqStream->clone()), maxSkipLevels, skipInterval); // lazily clone } if (!haveSkipped) { // lazily initialize skip stream skipListReader->init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads); haveSkipped = true; } int32_t newCount = skipListReader->skipTo(target); if (newCount > count) { __freqStream->seek(skipListReader->getFreqPointer()); skipProx(skipListReader->getProxPointer(), skipListReader->getPayloadLength()); _doc = skipListReader->getDoc(); count = newCount; } } // done skipping, now just scan do { if (!next()) { return false; } } while (target > _doc); return true; } IndexInputPtr SegmentTermDocs::freqStream() { return _freqStream; } void SegmentTermDocs::freqStream(const IndexInputPtr& freqStream) { _freqStream = freqStream; __freqStream = freqStream.get(); } } LucenePlusPlus-rel_3.0.9/src/core/index/SegmentTermEnum.cpp000066400000000000000000000142411456444476200237430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentTermEnum.h" #include "TermInfosWriter.h" #include "IndexInput.h" #include "TermBuffer.h" #include "TermInfo.h" #include "StringUtils.h" namespace Lucene { SegmentTermEnum::SegmentTermEnum() { format = 0; termBuffer = newLucene(); prevBuffer = newLucene(); scanBuffer = newLucene(); _termInfo = newLucene(); formatM1SkipInterval = 0; size = 0; position = -1; indexPointer = 0; indexInterval = 0; skipInterval = 0; maxSkipLevels = 0; isIndex = false; maxSkipLevels = 0; } SegmentTermEnum::SegmentTermEnum(const IndexInputPtr& i, const FieldInfosPtr& fis, bool isi) { format = 0; termBuffer = newLucene(); prevBuffer = newLucene(); scanBuffer = newLucene(); _termInfo = newLucene(); formatM1SkipInterval = 0; size = 0; position = -1; indexPointer = 0; indexInterval = 0; skipInterval = 0; maxSkipLevels = 0; input = i; fieldInfos = fis; isIndex = isi; maxSkipLevels = 1; // use single-level skip lists for formats > -3 int32_t firstInt = input->readInt(); if (firstInt >= 0) { // original-format file, without explicit format version number format = 0; size = firstInt; // back-compatible settings indexInterval = 128; skipInterval = INT_MAX; // switch off skipTo optimization } else { // we have a format version number format = firstInt; // check that it is a format we can understand if (format < TermInfosWriter::FORMAT_CURRENT) { boost::throw_exception(CorruptIndexException(L"Unknown format version:" + StringUtils::toString(format) + L" expected " + StringUtils::toString(TermInfosWriter::FORMAT_CURRENT) + L" or higher")); } size = input->readLong(); // read the size if (format == -1) { if (!isIndex) { indexInterval = input->readInt(); formatM1SkipInterval = input->readInt(); } // switch off skipTo optimization for file format prior to 1.4rc2 skipInterval = INT_MAX; } else { indexInterval = input->readInt(); skipInterval = input->readInt(); if (format <= TermInfosWriter::FORMAT) { // this new format introduces multi-level skipping maxSkipLevels = input->readInt(); } } BOOST_ASSERT(indexInterval > 0); // must not be negative BOOST_ASSERT(skipInterval > 0); // must not be negative } if (format > TermInfosWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { termBuffer->setPreUTF8Strings(); scanBuffer->setPreUTF8Strings(); prevBuffer->setPreUTF8Strings(); } } SegmentTermEnum::~SegmentTermEnum() { } LuceneObjectPtr SegmentTermEnum::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(); SegmentTermEnumPtr cloneEnum(boost::dynamic_pointer_cast(TermEnum::clone(clone))); cloneEnum->format = format; cloneEnum->isIndex = isIndex; cloneEnum->formatM1SkipInterval = formatM1SkipInterval; cloneEnum->fieldInfos = fieldInfos; cloneEnum->size = size; cloneEnum->position = position; cloneEnum->indexPointer = indexPointer; cloneEnum->indexInterval = indexInterval; cloneEnum->skipInterval = skipInterval; cloneEnum->maxSkipLevels = maxSkipLevels; cloneEnum->input = boost::dynamic_pointer_cast(input->clone()); cloneEnum->_termInfo = newLucene(_termInfo); cloneEnum->termBuffer = boost::dynamic_pointer_cast(termBuffer->clone()); cloneEnum->prevBuffer = boost::dynamic_pointer_cast(prevBuffer->clone()); cloneEnum->scanBuffer = newLucene(); return cloneEnum; } void SegmentTermEnum::seek(int64_t pointer, int64_t p, const TermPtr& t, const TermInfoPtr& ti) { input->seek(pointer); position = p; termBuffer->set(t); prevBuffer->reset(); _termInfo->set(ti); } bool SegmentTermEnum::next() { if (position++ >= size - 1) { prevBuffer->set(termBuffer); termBuffer->reset(); return false; } prevBuffer->set(termBuffer); termBuffer->read(input, fieldInfos); _termInfo->docFreq = input->readVInt(); // read doc freq _termInfo->freqPointer += input->readVLong(); // read freq pointer _termInfo->proxPointer += input->readVLong(); // read prox pointer if (format == -1) { // just read skipOffset in order to increment file pointer; value is never used // since skipTo is switched off if (!isIndex && _termInfo->docFreq > formatM1SkipInterval) { _termInfo->skipOffset = input->readVInt(); } } else if (_termInfo->docFreq >= skipInterval) { _termInfo->skipOffset = input->readVInt(); } if (isIndex) { indexPointer += input->readVLong(); // read index pointer } return true; } int32_t SegmentTermEnum::scanTo(const TermPtr& term) { scanBuffer->set(term); int32_t count = 0; while (scanBuffer->compareTo(termBuffer) > 0 && next()) { ++count; } return count; } TermPtr SegmentTermEnum::term() { return termBuffer->toTerm(); } TermPtr SegmentTermEnum::prev() { return prevBuffer->toTerm(); } TermInfoPtr SegmentTermEnum::termInfo() { return newLucene(_termInfo); } void SegmentTermEnum::termInfo(const TermInfoPtr& ti) { ti->set(_termInfo); } int32_t SegmentTermEnum::docFreq() { return _termInfo->docFreq; } int64_t SegmentTermEnum::freqPointer() { return _termInfo->freqPointer; } int64_t SegmentTermEnum::proxPointer() { return _termInfo->proxPointer; } void SegmentTermEnum::close() { input->close(); } } LucenePlusPlus-rel_3.0.9/src/core/index/SegmentTermPositionVector.cpp000066400000000000000000000034171456444476200260310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentTermPositionVector.h" #include "TermVectorOffsetInfo.h" namespace Lucene { SegmentTermPositionVector::SegmentTermPositionVector(const String& field, Collection terms, Collection termFreqs, Collection< Collection > positions, Collection< Collection > offsets) : SegmentTermVector(field, terms, termFreqs) { this->offsets = offsets; this->positions = positions; } SegmentTermPositionVector::~SegmentTermPositionVector() { } const Collection SegmentTermPositionVector::EMPTY_TERM_POS() { static Collection _EMPTY_TERM_POS; LUCENE_RUN_ONCE( _EMPTY_TERM_POS = Collection::newInstance(); ); return _EMPTY_TERM_POS; } Collection SegmentTermPositionVector::getOffsets(int32_t index) { Collection result(TermVectorOffsetInfo::EMPTY_OFFSET_INFO()); if (!offsets) { return Collection(); } if (index >=0 && index < offsets.size()) { result = offsets[index]; } return result; } Collection SegmentTermPositionVector::getTermPositions(int32_t index) { Collection result(EMPTY_TERM_POS()); if (!positions) { return Collection(); } if (index >= 0 && index < positions.size()) { result = positions[index]; } return result; } } LucenePlusPlus-rel_3.0.9/src/core/index/SegmentTermPositions.cpp000066400000000000000000000117071456444476200250320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentTermPositions.h" #include "SegmentReader.h" #include "_SegmentReader.h" #include "TermInfo.h" #include "IndexInput.h" #include "MiscUtils.h" namespace Lucene { SegmentTermPositions::SegmentTermPositions(const SegmentReaderPtr& parent) : SegmentTermDocs(parent) { this->proxCount = 0; this->position = 0; this->payloadLength = 0; this->needToLoadPayload = false; this->lazySkipPointer = -1; this->lazySkipProxCount = 0; } SegmentTermPositions::~SegmentTermPositions() { } void SegmentTermPositions::seek(const TermInfoPtr& ti, const TermPtr& term) { SegmentTermDocs::seek(ti, term); if (ti) { lazySkipPointer = ti->proxPointer; } lazySkipProxCount = 0; proxCount = 0; payloadLength = 0; needToLoadPayload = false; } void SegmentTermPositions::close() { SegmentTermDocs::close(); if (proxStream) { proxStream->close(); } } int32_t SegmentTermPositions::nextPosition() { if (currentFieldOmitTermFreqAndPositions) { // This field does not store term freq, positions, payloads return 0; } // perform lazy skips if necessary lazySkip(); --proxCount; position += readDeltaPosition(); return position; } int32_t SegmentTermPositions::readDeltaPosition() { int32_t delta = proxStream->readVInt(); if (currentFieldStoresPayloads) { // if the current field stores payloads then the position delta is shifted one bit to the left. // if the LSB is set, then we have to read the current payload length if ((delta & 1) != 0) { payloadLength = proxStream->readVInt(); } delta = MiscUtils::unsignedShift(delta, 1); needToLoadPayload = true; } return delta; } void SegmentTermPositions::skippingDoc() { // we remember to skip a document lazily lazySkipProxCount += _freq; } bool SegmentTermPositions::next() { // we remember to skip the remaining positions of the current document lazily lazySkipProxCount += proxCount; if (SegmentTermDocs::next()) { proxCount = _freq; // note frequency position = 0; // reset position return true; } return false; } int32_t SegmentTermPositions::read(Collection& docs, Collection& freqs) { boost::throw_exception(UnsupportedOperationException(L"TermPositions does not support processing multiple documents in one call. Use TermDocs instead.")); return 0; } void SegmentTermPositions::skipProx(int64_t proxPointer, int32_t payloadLength) { // we save the pointer, we might have to skip there lazily lazySkipPointer = proxPointer; lazySkipProxCount = 0; proxCount = 0; this->payloadLength = payloadLength; needToLoadPayload = false; } void SegmentTermPositions::skipPositions(int32_t n) { BOOST_ASSERT(!currentFieldOmitTermFreqAndPositions); for (int32_t i = n; i > 0; --i) { // skip unread positions readDeltaPosition(); skipPayload(); } } void SegmentTermPositions::skipPayload() { if (needToLoadPayload && payloadLength > 0) { proxStream->seek(proxStream->getFilePointer() + payloadLength); } needToLoadPayload = false; } void SegmentTermPositions::lazySkip() { if (!proxStream) { // clone lazily proxStream = boost::dynamic_pointer_cast(SegmentReaderPtr(_parent)->core->proxStream->clone()); } // we might have to skip the current payload if it was not read yet skipPayload(); if (lazySkipPointer != -1) { proxStream->seek(lazySkipPointer); lazySkipPointer = -1; } if (lazySkipProxCount != 0) { skipPositions(lazySkipProxCount); lazySkipProxCount = 0; } } int32_t SegmentTermPositions::getPayloadLength() { return payloadLength; } ByteArray SegmentTermPositions::getPayload(ByteArray data, int32_t offset) { if (!needToLoadPayload) { boost::throw_exception(IOException(L"Either no payload exists at this term position or an attempt was made to load it more than once.")); } // read payloads lazily ByteArray retArray; int32_t retOffset = 0; if (!data || data.size() - offset < payloadLength) { // the array is too small to store the payload data, so we allocate a new one retArray = ByteArray::newInstance(payloadLength); retOffset = 0; } else { retArray = data; retOffset = offset; } proxStream->readBytes(retArray.get(), retOffset, payloadLength); needToLoadPayload = false; return retArray; } bool SegmentTermPositions::isPayloadAvailable() { return (needToLoadPayload && payloadLength > 0); } } LucenePlusPlus-rel_3.0.9/src/core/index/SegmentTermVector.cpp000066400000000000000000000036421456444476200243040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentTermVector.h" namespace Lucene { SegmentTermVector::SegmentTermVector(const String& field, Collection terms, Collection termFreqs) { this->field = field; this->terms = terms; this->termFreqs = termFreqs; } SegmentTermVector::~SegmentTermVector() { } String SegmentTermVector::getField() { return field; } String SegmentTermVector::toString() { StringStream segTermVector; segTermVector << L"{" << field; if (terms) { for (int32_t i = 0; i < terms.size(); ++i) { if (i > 0) { segTermVector << L", "; } segTermVector << terms[i] << L"/" << termFreqs[i]; } } segTermVector << L"}"; return segTermVector.str(); } int32_t SegmentTermVector::size() { return terms ? terms.size() : 0; } Collection SegmentTermVector::getTerms() { return terms; } Collection SegmentTermVector::getTermFrequencies() { return termFreqs; } int32_t SegmentTermVector::indexOf(const String& term) { if (!terms) { return -1; } Collection::iterator search = std::lower_bound(terms.begin(), terms.end(), term); return (search == terms.end() || term < *search) ? -1 : std::distance(terms.begin(), search); } Collection SegmentTermVector::indexesOf(Collection termNumbers, int32_t start, int32_t length) { Collection res(Collection::newInstance(length)); for (int32_t i = 0; i < length; ++i) { res[i] = indexOf(termNumbers[start + i]); } return res; } } LucenePlusPlus-rel_3.0.9/src/core/index/SegmentWriteState.cpp000066400000000000000000000022751456444476200243060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SegmentWriteState.h" namespace Lucene { SegmentWriteState::SegmentWriteState(const DocumentsWriterPtr& docWriter, const DirectoryPtr& directory, const String& segmentName, const String& docStoreSegmentName, int32_t numDocs, int32_t numDocsInStore, int32_t termIndexInterval) { this->_docWriter = docWriter; this->directory = directory; this->segmentName = segmentName; this->docStoreSegmentName = docStoreSegmentName; this->numDocs = numDocs; this->numDocsInStore = numDocsInStore; this->termIndexInterval = termIndexInterval; this->flushedFiles = HashSet::newInstance(); } SegmentWriteState::~SegmentWriteState() { } String SegmentWriteState::segmentFileName(const String& ext) { return segmentName + L"." + ext; } } LucenePlusPlus-rel_3.0.9/src/core/index/SerialMergeScheduler.cpp000066400000000000000000000014251456444476200247220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SerialMergeScheduler.h" #include "IndexWriter.h" namespace Lucene { SerialMergeScheduler::~SerialMergeScheduler() { } void SerialMergeScheduler::merge(const IndexWriterPtr& writer) { SyncLock syncLock(this); while (true) { OneMergePtr merge(writer->getNextMerge()); if (!merge) { break; } writer->merge(merge); } } void SerialMergeScheduler::close() { } } LucenePlusPlus-rel_3.0.9/src/core/index/SnapshotDeletionPolicy.cpp000066400000000000000000000065411456444476200253330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SnapshotDeletionPolicy.h" #include "_SnapshotDeletionPolicy.h" namespace Lucene { SnapshotDeletionPolicy::SnapshotDeletionPolicy(const IndexDeletionPolicyPtr& primary) { this->primary = primary; } SnapshotDeletionPolicy::~SnapshotDeletionPolicy() { } void SnapshotDeletionPolicy::onInit(Collection commits) { SyncLock syncLock(this); primary->onInit(wrapCommits(commits)); lastCommit = commits[commits.size() - 1]; } void SnapshotDeletionPolicy::onCommit(Collection commits) { SyncLock syncLock(this); primary->onCommit(wrapCommits(commits)); lastCommit = commits[commits.size() - 1]; } IndexCommitPtr SnapshotDeletionPolicy::snapshot() { SyncLock syncLock(this); if (!lastCommit) { boost::throw_exception(IllegalStateException(L"no index commits to snapshot")); } if (_snapshot.empty()) { _snapshot = lastCommit->getSegmentsFileName(); } else { boost::throw_exception(IllegalStateException(L"snapshot is already set; please call release() first")); } return lastCommit; } void SnapshotDeletionPolicy::release() { SyncLock syncLock(this); if (!_snapshot.empty()) { _snapshot.clear(); } else { boost::throw_exception(IllegalStateException(L"snapshot was not set; please call snapshot() first")); } } Collection SnapshotDeletionPolicy::wrapCommits(Collection commits) { Collection myCommits(Collection::newInstance()); for (Collection::iterator commit = commits.begin(); commit != commits.end(); ++commit) { myCommits.add(newLucene(shared_from_this(), *commit)); } return myCommits; } MyCommitPoint::MyCommitPoint(const SnapshotDeletionPolicyPtr& deletionPolicy, const IndexCommitPtr& cp) { this->_deletionPolicy = deletionPolicy; this->cp = cp; } MyCommitPoint::~MyCommitPoint() { } String MyCommitPoint::toString() { return L"SnapshotDeletionPolicy.SnapshotCommitPoint(" + cp->toString() + L")"; } String MyCommitPoint::getSegmentsFileName() { return cp->getSegmentsFileName(); } HashSet MyCommitPoint::getFileNames() { return cp->getFileNames(); } DirectoryPtr MyCommitPoint::getDirectory() { return cp->getDirectory(); } void MyCommitPoint::deleteCommit() { SnapshotDeletionPolicyPtr deletionPolicy(_deletionPolicy); SyncLock policyLock(deletionPolicy); // Suppress the delete request if this commit point is our current snapshot. if (deletionPolicy->_snapshot.empty() || deletionPolicy->_snapshot != getSegmentsFileName()) { cp->deleteCommit(); } } bool MyCommitPoint::isDeleted() { return cp->isDeleted(); } int64_t MyCommitPoint::getVersion() { return cp->getVersion(); } int64_t MyCommitPoint::getGeneration() { return cp->getGeneration(); } MapStringString MyCommitPoint::getUserData() { return cp->getUserData(); } bool MyCommitPoint::isOptimized() { return cp->isOptimized(); } } LucenePlusPlus-rel_3.0.9/src/core/index/SortedTermVectorMapper.cpp000066400000000000000000000073431456444476200253110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SortedTermVectorMapper.h" #include "TermVectorEntry.h" namespace Lucene { const wchar_t* SortedTermVectorMapper::ALL = L"_ALL_"; SortedTermVectorMapper::SortedTermVectorMapper(TermVectorEntryComparator comparator) : TermVectorMapper(false, false) { this->storeOffsets = false; this->storePositions = false; this->comparator = comparator; this->currentSet = Collection::newInstance(); this->termToTVE = MapStringTermVectorEntry::newInstance(); } SortedTermVectorMapper::SortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator) : TermVectorMapper(ignoringPositions, ignoringPositions) { this->storeOffsets = false; this->storePositions = false; this->comparator = comparator; this->currentSet = Collection::newInstance(); this->termToTVE = MapStringTermVectorEntry::newInstance(); } SortedTermVectorMapper::~SortedTermVectorMapper() { } void SortedTermVectorMapper::map(const String& term, int32_t frequency, Collection offsets, Collection positions) { // We need to combine any previous mentions of the term TermVectorEntryPtr entry(termToTVE.get(term)); if (!entry) { entry = newLucene(ALL, term, frequency, storeOffsets ? offsets : Collection(), storePositions ? positions : Collection()); termToTVE.put(term, entry); if (!currentSet.contains_if(luceneEqualTo(entry))) { currentSet.insert(std::upper_bound(currentSet.begin(), currentSet.end(), entry, comparator), entry); } } else { entry->setFrequency(entry->getFrequency() + frequency); if (storeOffsets) { Collection existingOffsets(entry->getOffsets()); // A few diff. cases here: offsets is null, existing offsets is null, both are null, same for positions if (existingOffsets && offsets && !offsets.empty()) { // copy over the existing offsets Collection newOffsets(Collection::newInstance(existingOffsets.begin(), existingOffsets.end())); newOffsets.addAll(offsets.begin(), offsets.end()); entry->setOffsets(newOffsets); } else if (!existingOffsets && offsets && !offsets.empty()) { entry->setOffsets(offsets); } // else leave it alone } if (storePositions) { Collection existingPositions(entry->getPositions()); if (existingPositions && positions && !positions.empty()) { Collection newPositions(existingPositions); newPositions.addAll(positions.begin(), positions.end()); entry->setPositions(newPositions); } else if (!existingPositions && positions && !positions.empty()) { entry->setPositions(positions); } // else leave it alone } } } void SortedTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) { this->storeOffsets = storeOffsets; this->storePositions = storePositions; } Collection SortedTermVectorMapper::getTermVectorEntrySet() { return currentSet; } } LucenePlusPlus-rel_3.0.9/src/core/index/StoredFieldsWriter.cpp000066400000000000000000000155771456444476200244650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StoredFieldsWriter.h" #include "StoredFieldsWriterPerThread.h" #include "RAMOutputStream.h" #include "SegmentWriteState.h" #include "FieldsWriter.h" #include "IndexFileNames.h" #include "IndexWriter.h" #include "Directory.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { StoredFieldsWriter::StoredFieldsWriter(const DocumentsWriterPtr& docWriter, const FieldInfosPtr& fieldInfos) { lastDocID = 0; docFreeList = Collection::newInstance(1); freeCount = 0; allocCount = 0; this->_docWriter = docWriter; this->fieldInfos = fieldInfos; } StoredFieldsWriter::~StoredFieldsWriter() { } StoredFieldsWriterPerThreadPtr StoredFieldsWriter::addThread(const DocStatePtr& docState) { return newLucene(docState, shared_from_this()); } void StoredFieldsWriter::flush(const SegmentWriteStatePtr& state) { SyncLock syncLock(this); if (state->numDocsInStore > 0) { // It's possible that all documents seen in this segment hit non-aborting exceptions, // in which case we will not have yet init'd the FieldsWriter initFieldsWriter(); // Fill fdx file to include any final docs that we skipped because they hit non-aborting // exceptions fill(state->numDocsInStore - DocumentsWriterPtr(_docWriter)->getDocStoreOffset()); } if (fieldsWriter) { fieldsWriter->flush(); } } void StoredFieldsWriter::initFieldsWriter() { if (!fieldsWriter) { DocumentsWriterPtr docWriter(_docWriter); String docStoreSegment(docWriter->getDocStoreSegment()); if (!docStoreSegment.empty()) { fieldsWriter = newLucene(docWriter->directory, docStoreSegment, fieldInfos); docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::FIELDS_EXTENSION()); docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); lastDocID = 0; } } } void StoredFieldsWriter::closeDocStore(const SegmentWriteStatePtr& state) { SyncLock syncLock(this); int32_t inc = state->numDocsInStore - lastDocID; if (inc > 0) { initFieldsWriter(); fill(state->numDocsInStore - DocumentsWriterPtr(_docWriter)->getDocStoreOffset()); } if (fieldsWriter) { fieldsWriter->close(); fieldsWriter.reset(); lastDocID = 0; BOOST_ASSERT(!state->docStoreSegmentName.empty()); state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_EXTENSION()); state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); DocumentsWriterPtr docWriter(state->_docWriter); docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_EXTENSION()); docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); String fileName(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); if (4 + ((int64_t)state->numDocsInStore) * 8 != state->directory->fileLength(fileName)) { boost::throw_exception(RuntimeException(L"after flush: fdx size mismatch: " + StringUtils::toString(state->numDocsInStore) + L" docs vs " + StringUtils::toString(state->directory->fileLength(fileName)) + L" length in bytes of " + fileName + L" file exists?=" + StringUtils::toString(state->directory->fileExists(fileName)))); } } } StoredFieldsWriterPerDocPtr StoredFieldsWriter::getPerDoc() { SyncLock syncLock(this); if (freeCount == 0) { ++allocCount; if (allocCount > docFreeList.size()) { // Grow our free list up front to make sure we have enough space to recycle all // outstanding StoredFieldsWriterPerDoc instances BOOST_ASSERT(allocCount == docFreeList.size() + 1); docFreeList.resize(MiscUtils::getNextSize(allocCount)); } return newLucene(shared_from_this()); } else { return docFreeList[--freeCount]; } } void StoredFieldsWriter::abort() { SyncLock syncLock(this); if (fieldsWriter) { try { fieldsWriter->close(); } catch (...) { } fieldsWriter.reset(); lastDocID = 0; } } void StoredFieldsWriter::fill(int32_t docID) { int32_t docStoreOffset = DocumentsWriterPtr(_docWriter)->getDocStoreOffset(); // We must "catch up" for all docs before us that had no stored fields int32_t end = docID + docStoreOffset; while (lastDocID < end) { fieldsWriter->skipDocument(); ++lastDocID; } } void StoredFieldsWriter::finishDocument(const StoredFieldsWriterPerDocPtr& perDoc) { SyncLock syncLock(this); IndexWriterPtr writer(DocumentsWriterPtr(_docWriter)->_writer); BOOST_ASSERT(writer->testPoint(L"StoredFieldsWriter.finishDocument start")); initFieldsWriter(); fill(perDoc->docID); // Append stored fields to the real FieldsWriter fieldsWriter->flushDocument(perDoc->numStoredFields, perDoc->fdt); ++lastDocID; perDoc->reset(); free(perDoc); BOOST_ASSERT(writer->testPoint(L"StoredFieldsWriter.finishDocument end")); } bool StoredFieldsWriter::freeRAM() { return false; } void StoredFieldsWriter::free(const StoredFieldsWriterPerDocPtr& perDoc) { SyncLock syncLock(this); BOOST_ASSERT(freeCount < docFreeList.size()); BOOST_ASSERT(perDoc->numStoredFields == 0); BOOST_ASSERT(perDoc->fdt->length() == 0); BOOST_ASSERT(perDoc->fdt->getFilePointer() == 0); docFreeList[freeCount++] = perDoc; } StoredFieldsWriterPerDoc::StoredFieldsWriterPerDoc(const StoredFieldsWriterPtr& fieldsWriter) { this->_fieldsWriter = fieldsWriter; buffer = DocumentsWriterPtr(fieldsWriter->_docWriter)->newPerDocBuffer(); fdt = newLucene(buffer); numStoredFields = 0; } StoredFieldsWriterPerDoc::~StoredFieldsWriterPerDoc() { } void StoredFieldsWriterPerDoc::reset() { fdt->reset(); buffer->recycle(); numStoredFields = 0; } void StoredFieldsWriterPerDoc::abort() { reset(); StoredFieldsWriterPtr(_fieldsWriter)->free(shared_from_this()); } int64_t StoredFieldsWriterPerDoc::sizeInBytes() { return buffer->getSizeInBytes(); } void StoredFieldsWriterPerDoc::finish() { StoredFieldsWriterPtr(_fieldsWriter)->finishDocument(shared_from_this()); } } LucenePlusPlus-rel_3.0.9/src/core/index/StoredFieldsWriterPerThread.cpp000066400000000000000000000041411456444476200262450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StoredFieldsWriterPerThread.h" #include "StoredFieldsWriter.h" #include "FieldsWriter.h" #include "RAMOutputStream.h" namespace Lucene { StoredFieldsWriterPerThread::StoredFieldsWriterPerThread(const DocStatePtr& docState, const StoredFieldsWriterPtr& storedFieldsWriter) { this->_storedFieldsWriter = storedFieldsWriter; this->docState = docState; localFieldsWriter = newLucene(IndexOutputPtr(), IndexOutputPtr(), storedFieldsWriter->fieldInfos); } StoredFieldsWriterPerThread::~StoredFieldsWriterPerThread() { } void StoredFieldsWriterPerThread::startDocument() { if (doc) { // Only happens if previous document hit non-aborting exception while writing stored fields // into localFieldsWriter doc->reset(); doc->docID = docState->docID; } } void StoredFieldsWriterPerThread::addField(const FieldablePtr& field, const FieldInfoPtr& fieldInfo) { if (!doc) { doc = StoredFieldsWriterPtr(_storedFieldsWriter)->getPerDoc(); doc->docID = docState->docID; localFieldsWriter->setFieldsStream(doc->fdt); BOOST_ASSERT(doc->numStoredFields == 0); BOOST_ASSERT(doc->fdt->length() == 0); BOOST_ASSERT(doc->fdt->getFilePointer() == 0); } localFieldsWriter->writeField(fieldInfo, field); BOOST_ASSERT(docState->testPoint(L"StoredFieldsWriterPerThread.processFields.writeField")); ++doc->numStoredFields; } DocWriterPtr StoredFieldsWriterPerThread::finishDocument() { // If there were any stored fields in this doc, doc will be non-null; else it's null. DocWriterPtr finishDoc(doc); doc.reset(); return finishDoc; } void StoredFieldsWriterPerThread::abort() { if (doc) { doc->abort(); doc.reset(); } } } LucenePlusPlus-rel_3.0.9/src/core/index/Term.cpp000066400000000000000000000035141456444476200215740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Term.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { Term::Term(const String& fld, const String& txt) : _field(fld), _text(txt) { } Term::~Term() { } String Term::field() { return _field; } String Term::text() { return _text; } TermPtr Term::createTerm(const String& text) { return newLucene(_field, text); } bool Term::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } if (!other) { return false; } if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } TermPtr otherTerm(boost::dynamic_pointer_cast(other)); if (!otherTerm) { return false; } return (_field == otherTerm->_field && _text == otherTerm->_text); } int32_t Term::hashCode() { int32_t prime = 31; int32_t result = 1; result = prime * result + (_field.empty() ? 0 : StringUtils::hashCode(_field)); result = prime * result + (_text.empty() ? 0 : StringUtils::hashCode(_text)); return result; } int32_t Term::compareTo(const LuceneObjectPtr& other) { TermPtr otherTerm(boost::static_pointer_cast(other)); if (_field == otherTerm->_field) { return _text.compare(otherTerm->_text); } else { return _field.compare(otherTerm->_field); } } void Term::set(const String& fld, const String& txt) { _field = fld; _text = txt; } String Term::toString() { return _field + L":" + _text; } } LucenePlusPlus-rel_3.0.9/src/core/index/TermBuffer.cpp000066400000000000000000000067341456444476200227350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermBuffer.h" #include "IndexInput.h" #include "FieldInfos.h" #include "Term.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { TermBuffer::TermBuffer() { preUTF8Strings = false; text = newLucene(); bytes = newLucene(); } TermBuffer::~TermBuffer() { } int32_t TermBuffer::compareTo(const LuceneObjectPtr& other) { TermBufferPtr otherTermBuffer(boost::static_pointer_cast(other)); if (field == otherTermBuffer->field) { return compareChars(text->result.get(), text->length, otherTermBuffer->text->result.get(), otherTermBuffer->text->length); } else { return field.compare(otherTermBuffer->field); } } int32_t TermBuffer::compareChars(wchar_t* chars1, int32_t len1, wchar_t* chars2, int32_t len2) { int32_t end = len1 < len2 ? len1 : len2; for (int32_t k = 0; k < end; ++k) { wchar_t c1 = chars1[k]; wchar_t c2 = chars2[k]; if (c1 != c2) { return c1 - c2; } } return len1 - len2; } void TermBuffer::setPreUTF8Strings() { preUTF8Strings = true; } void TermBuffer::read(const IndexInputPtr& input, const FieldInfosPtr& fieldInfos) { this->term.reset(); // invalidate cache int32_t start = input->readVInt(); int32_t length = input->readVInt(); int32_t totalLength = start + length; if (preUTF8Strings) { text->setLength(totalLength); text->setLength(start + input->readChars(text->result.get(), start, length)); } else { StringUtils::toUTF8(text->result.get(), text->length, bytes); bytes->setLength(totalLength); input->readBytes(bytes->result.get(), start, length); StringUtils::toUnicode(bytes->result.get(), totalLength, text); } this->field = fieldInfos->fieldName(input->readVInt()); } void TermBuffer::set(const TermPtr& term) { if (!term) { reset(); return; } String termText(term->text()); int32_t termLen = termText.length(); text->setLength(termLen); MiscUtils::arrayCopy(termText.begin(), 0, text->result.get(), 0, termLen); field = term->field(); this->term = term; } void TermBuffer::set(const TermBufferPtr& other) { text->copyText(other->text); field = other->field; term = other->term; } void TermBuffer::reset() { field.clear(); text->setLength(0); term.reset(); } TermPtr TermBuffer::toTerm() { if (field.empty()) { // unset return TermPtr(); } if (!term) { term = newLucene(field, String(text->result.get(), text->length)); } return term; } LuceneObjectPtr TermBuffer::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(); TermBufferPtr cloneBuffer(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); cloneBuffer->field = field; cloneBuffer->term = term; cloneBuffer->preUTF8Strings = preUTF8Strings; cloneBuffer->bytes = newLucene(); cloneBuffer->text = newLucene(); cloneBuffer->text->copyText(text); return cloneBuffer; } } LucenePlusPlus-rel_3.0.9/src/core/index/TermDocs.cpp000066400000000000000000000022071456444476200224030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermDocs.h" namespace Lucene { TermDocs::TermDocs() { } void TermDocs::seek(const TermPtr& term) { BOOST_ASSERT(false); // override } void TermDocs::seek(const TermEnumPtr& termEnum) { BOOST_ASSERT(false); // override } int32_t TermDocs::doc() { BOOST_ASSERT(false); return 0; // override } int32_t TermDocs::freq() { BOOST_ASSERT(false); return 0; // override } bool TermDocs::next() { BOOST_ASSERT(false); return false; // override } int32_t TermDocs::read(Collection& docs, Collection& freqs) { BOOST_ASSERT(false); return 0; // override } bool TermDocs::skipTo(int32_t target) { BOOST_ASSERT(false); return false; // override } void TermDocs::close() { BOOST_ASSERT(false); // override } } LucenePlusPlus-rel_3.0.9/src/core/index/TermEnum.cpp000066400000000000000000000006621456444476200224220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermEnum.h" namespace Lucene { TermEnum::~TermEnum() { } } LucenePlusPlus-rel_3.0.9/src/core/index/TermFreqVector.cpp000066400000000000000000000022721456444476200235750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermFreqVector.h" namespace Lucene { TermFreqVector::TermFreqVector() { } TermFreqVector::~TermFreqVector() { } String TermFreqVector::getField() { BOOST_ASSERT(false); return L""; // override } int32_t TermFreqVector::size() { BOOST_ASSERT(false); return 0; // override } Collection TermFreqVector::getTerms() { BOOST_ASSERT(false); return Collection(); // override } Collection TermFreqVector::getTermFrequencies() { BOOST_ASSERT(false); return Collection(); // override } int32_t TermFreqVector::indexOf(const String& term) { BOOST_ASSERT(false); return 0; // override } Collection TermFreqVector::indexesOf(Collection terms, int32_t start, int32_t length) { BOOST_ASSERT(false); return Collection(); // override } } LucenePlusPlus-rel_3.0.9/src/core/index/TermInfo.cpp000066400000000000000000000020361456444476200224060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermInfo.h" namespace Lucene { TermInfo::TermInfo(const TermInfoPtr& ti) { set(ti); } TermInfo::TermInfo(int32_t df, int64_t fp, int64_t pp) { docFreq = df; freqPointer = fp; proxPointer = pp; skipOffset = 0; } TermInfo::~TermInfo() { } void TermInfo::set(int32_t docFreq, int64_t freqPointer, int64_t proxPointer, int32_t skipOffset) { this->docFreq = docFreq; this->freqPointer = freqPointer; this->proxPointer = proxPointer; this->skipOffset = skipOffset; } void TermInfo::set(const TermInfoPtr& ti) { docFreq = ti->docFreq; freqPointer = ti->freqPointer; proxPointer = ti->proxPointer; skipOffset = ti->skipOffset; } } LucenePlusPlus-rel_3.0.9/src/core/index/TermInfosReader.cpp000066400000000000000000000171661456444476200237260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermInfosReader.h" #include "SegmentTermEnum.h" #include "Directory.h" #include "IndexFileNames.h" #include "Term.h" #include "StringUtils.h" namespace Lucene { const int32_t TermInfosReader::DEFAULT_CACHE_SIZE = 1024; TermInfosReader::TermInfosReader(const DirectoryPtr& dir, const String& seg, const FieldInfosPtr& fis, int32_t readBufferSize, int32_t indexDivisor) { bool success = false; if (indexDivisor < 1 && indexDivisor != -1) { boost::throw_exception(IllegalArgumentException(L"indexDivisor must be -1 (don't load terms index) or greater than 0: got " + StringUtils::toString(indexDivisor))); } LuceneException finally; try { directory = dir; segment = seg; fieldInfos = fis; origEnum = newLucene(directory->openInput(segment + L"." + IndexFileNames::TERMS_EXTENSION(), readBufferSize), fieldInfos, false); _size = origEnum->size; if (indexDivisor != -1) { // Load terms index totalIndexInterval = origEnum->indexInterval * indexDivisor; SegmentTermEnumPtr indexEnum(newLucene(directory->openInput(segment + L"." + IndexFileNames::TERMS_INDEX_EXTENSION(), readBufferSize), fieldInfos, true)); try { int32_t indexSize = 1 + ((int32_t)indexEnum->size - 1) / indexDivisor; // otherwise read index indexTerms = Collection::newInstance(indexSize); indexInfos = Collection::newInstance(indexSize); indexPointers = Collection::newInstance(indexSize); for (int32_t i = 0; indexEnum->next(); ++i) { indexTerms[i] = indexEnum->term(); indexInfos[i] = indexEnum->termInfo(); indexPointers[i] = indexEnum->indexPointer; for (int32_t j = 1; j < indexDivisor; ++j) { if (!indexEnum->next()) { break; } } } } catch (LuceneException& e) { finally = e; } indexEnum->close(); } else { // Do not load terms index totalIndexInterval = -1; } success = true; } catch (LuceneException& e) { finally = e; } // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception above. // In this case, we want to explicitly close any subset of things that were opened. if (!success) { close(); } finally.throwException(); } TermInfosReader::~TermInfosReader() { } int32_t TermInfosReader::getMaxSkipLevels() { return origEnum->maxSkipLevels; } int32_t TermInfosReader::getSkipInterval() { return origEnum->skipInterval; } void TermInfosReader::close() { if (origEnum) { origEnum->close(); } threadResources.close(); } int64_t TermInfosReader::size() { return _size; } TermInfosReaderThreadResourcesPtr TermInfosReader::getThreadResources() { TermInfosReaderThreadResourcesPtr resources(threadResources.get()); if (!resources) { resources = newLucene(); resources->termEnum = terms(); // Cache does not have to be thread-safe, it is only used by one thread at the same time resources->termInfoCache = newInstance(DEFAULT_CACHE_SIZE); threadResources.set(resources); } return resources; } int32_t TermInfosReader::getIndexOffset(const TermPtr& term) { // binary search indexTerms Collection::iterator indexTerm = std::upper_bound(indexTerms.begin(), indexTerms.end(), term, luceneCompare()); return (std::distance(indexTerms.begin(), indexTerm) - 1); } void TermInfosReader::seekEnum(const SegmentTermEnumPtr& enumerator, int32_t indexOffset) { enumerator->seek(indexPointers[indexOffset], ((int64_t)indexOffset * (int64_t)totalIndexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]); } TermInfoPtr TermInfosReader::get(const TermPtr& term) { return get(term, true); } TermInfoPtr TermInfosReader::get(const TermPtr& term, bool useCache) { if (_size == 0) { return TermInfoPtr(); } ensureIndexIsRead(); TermInfoPtr ti; TermInfosReaderThreadResourcesPtr resources(getThreadResources()); TermInfoCachePtr cache; if (useCache) { cache = resources->termInfoCache; // check the cache first if the term was recently looked up ti = cache->get(term); if (ti) { return ti; } } // optimize sequential access: first try scanning cached enum without seeking SegmentTermEnumPtr enumerator = resources->termEnum; if (enumerator->term() && // term is at or past current ((enumerator->prev() && term->compareTo(enumerator->prev()) > 0) || term->compareTo(enumerator->term()) >= 0)) { int32_t enumOffset = (int32_t)(enumerator->position / totalIndexInterval ) + 1; if (indexTerms.size() == enumOffset || // but before end of block term->compareTo(indexTerms[enumOffset]) < 0) { // no need to seek int32_t numScans = enumerator->scanTo(term); if (enumerator->term() && term->compareTo(enumerator->term()) == 0) { ti = enumerator->termInfo(); if (cache && numScans > 1) { // we only want to put this TermInfo into the cache if scanEnum skipped more // than one dictionary entry. This prevents RangeQueries or WildcardQueries to // wipe out the cache when they iterate over a large numbers of terms in order. cache->put(term, ti); } } else { ti.reset(); } return ti; } } // random-access: must seek seekEnum(enumerator, getIndexOffset(term)); enumerator->scanTo(term); if (enumerator->term() && term->compareTo(enumerator->term()) == 0) { ti = enumerator->termInfo(); if (cache) { cache->put(term, ti); } } else { ti.reset(); } return ti; } void TermInfosReader::ensureIndexIsRead() { if (!indexTerms) { boost::throw_exception(IllegalStateException(L"terms index was not loaded when this reader was created")); } } int64_t TermInfosReader::getPosition(const TermPtr& term) { if (_size == 0) { return -1; } ensureIndexIsRead(); int32_t indexOffset = getIndexOffset(term); SegmentTermEnumPtr enumerator(getThreadResources()->termEnum); seekEnum(enumerator, indexOffset); while (term->compareTo(enumerator->term()) > 0 && enumerator->next()) { } return term->compareTo(enumerator->term()) == 0 ? enumerator->position : -1; } SegmentTermEnumPtr TermInfosReader::terms() { return boost::static_pointer_cast(origEnum->clone()); } SegmentTermEnumPtr TermInfosReader::terms(const TermPtr& term) { // don't use the cache in this call because we want to reposition the enumeration get(term, false); return boost::static_pointer_cast(getThreadResources()->termEnum->clone()); } TermInfosReaderThreadResources::~TermInfosReaderThreadResources() { } } LucenePlusPlus-rel_3.0.9/src/core/index/TermInfosWriter.cpp000066400000000000000000000144641456444476200237760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermInfosWriter.h" #include "Directory.h" #include "IndexOutput.h" #include "Term.h" #include "TermInfo.h" #include "FieldInfos.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { /// The file format version, a negative number. const int32_t TermInfosWriter::FORMAT = -3; /// Changed strings to true utf8 with length-in-bytes not length-in-chars. const int32_t TermInfosWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = -4; /// NOTE: always change this if you switch to a new format. const int32_t TermInfosWriter::FORMAT_CURRENT = TermInfosWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; TermInfosWriter::TermInfosWriter(const DirectoryPtr& directory, const String& segment, const FieldInfosPtr& fis, int32_t interval) { initialize(directory, segment, fis, interval, false); otherWriter = newLucene(directory, segment, fis, interval, true); } TermInfosWriter::TermInfosWriter(const DirectoryPtr& directory, const String& segment, const FieldInfosPtr& fis, int32_t interval, bool isIndex) { initialize(directory, segment, fis, interval, isIndex); } TermInfosWriter::~TermInfosWriter() { } void TermInfosWriter::initialize() { if (otherWriter) { _other = otherWriter; otherWriter->_other = shared_from_this(); } } void TermInfosWriter::initialize(const DirectoryPtr& directory, const String& segment, const FieldInfosPtr& fis, int32_t interval, bool isi) { lastTi = newLucene(); utf8Result = newLucene(); lastTermBytes = ByteArray::newInstance(10); lastTermBytesLength = 0; lastFieldNumber = -1; skipInterval = 16; maxSkipLevels = 10; size = 0; lastIndexPointer = 0; indexInterval = interval; fieldInfos = fis; isIndex = isi; output = directory->createOutput(segment + (isIndex ? L".tii" : L".tis")); output->writeInt(FORMAT_CURRENT); // write format output->writeLong(0); // leave space for size output->writeInt(indexInterval); // write indexInterval output->writeInt(skipInterval); // write skipInterval output->writeInt(maxSkipLevels); // write maxSkipLevels BOOST_ASSERT(initUnicodeResults()); } void TermInfosWriter::add(const TermPtr& term, const TermInfoPtr& ti) { StringUtils::toUTF8(term->_text.c_str(), term->_text.size(), utf8Result); add(fieldInfos->fieldNumber(term->_field), utf8Result->result, utf8Result->length, ti); } bool TermInfosWriter::initUnicodeResults() { unicodeResult1 = newLucene(); unicodeResult2 = newLucene(); return true; } int32_t TermInfosWriter::compareToLastTerm(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength) { if (lastFieldNumber != fieldNumber) { int32_t cmp = fieldInfos->fieldName(lastFieldNumber).compare(fieldInfos->fieldName(fieldNumber)); // If there is a field named "" (empty string) then we will get 0 on this comparison, yet, it's "OK". // But it's not OK if two different field numbers map to the same name. if (cmp != 0 || lastFieldNumber != -1) { return cmp; } } StringUtils::toUnicode(lastTermBytes.get(), lastTermBytesLength, unicodeResult1); StringUtils::toUnicode(termBytes.get(), termBytesLength, unicodeResult2); int32_t len = std::min(unicodeResult1->length, unicodeResult2->length); for (int32_t i = 0; i < len; ++i) { wchar_t ch1 = unicodeResult1->result[i]; wchar_t ch2 = unicodeResult2->result[i]; if (ch1 != ch2) { return (ch1 - ch2); } } return (unicodeResult1->length - unicodeResult2->length); } void TermInfosWriter::add(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength, const TermInfoPtr& ti) { // terms out of order? BOOST_ASSERT(compareToLastTerm(fieldNumber, termBytes, termBytesLength) < 0 || (isIndex && termBytesLength == 0 && lastTermBytesLength == 0)); BOOST_ASSERT(ti->freqPointer >= lastTi->freqPointer); // freqPointer out of order? BOOST_ASSERT(ti->proxPointer >= lastTi->proxPointer); // proxPointer out of order? TermInfosWriterPtr other(_other); if (!isIndex && size % indexInterval == 0) { other->add(lastFieldNumber, lastTermBytes, lastTermBytesLength, lastTi); // add an index term } writeTerm(fieldNumber, termBytes, termBytesLength); // write term output->writeVInt(ti->docFreq); // write doc freq output->writeVLong(ti->freqPointer - lastTi->freqPointer); // write pointers output->writeVLong(ti->proxPointer - lastTi->proxPointer); if (ti->docFreq >= skipInterval) { output->writeVInt(ti->skipOffset); } if (isIndex) { output->writeVLong(other->output->getFilePointer() - lastIndexPointer); lastIndexPointer = other->output->getFilePointer(); // write pointer } lastFieldNumber = fieldNumber; lastTi->set(ti); ++size; } void TermInfosWriter::writeTerm(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength) { // Compute prefix in common with last term int32_t start = 0; int32_t limit = std::min(termBytesLength, lastTermBytesLength); while (start < limit) { if (termBytes[start] != lastTermBytes[start]) { break; } ++start; } int32_t length = termBytesLength - start; output->writeVInt(start); // write shared prefix length output->writeVInt(length); // write delta length output->writeBytes(termBytes.get(), start, length); // write delta bytes output->writeVInt(fieldNumber); // write field num if (lastTermBytes.size() < termBytesLength) { lastTermBytes.resize((int32_t)((double)termBytesLength * 1.5)); } MiscUtils::arrayCopy(termBytes.get(), start, lastTermBytes.get(), start, length); lastTermBytesLength = termBytesLength; } void TermInfosWriter::close() { output->seek(4); // write size after format output->writeLong(size); output->close(); if (!isIndex) { TermInfosWriterPtr(_other)->close(); } } } LucenePlusPlus-rel_3.0.9/src/core/index/TermPositionVector.cpp000066400000000000000000000015001456444476200244750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermPositionVector.h" namespace Lucene { TermPositionVector::TermPositionVector() { } TermPositionVector::~TermPositionVector() { } Collection TermPositionVector::getTermPositions(int32_t index) { BOOST_ASSERT(false); return Collection(); // override } Collection TermPositionVector::getOffsets(int32_t index) { BOOST_ASSERT(false); return Collection(); // override } } LucenePlusPlus-rel_3.0.9/src/core/index/TermPositions.cpp000066400000000000000000000016201456444476200235000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermPositions.h" namespace Lucene { TermPositions::TermPositions() { } TermPositions::~TermPositions() { } int32_t TermPositions::nextPosition() { BOOST_ASSERT(false); return 0; // override } int32_t TermPositions::getPayloadLength() { BOOST_ASSERT(false); return 0; // override } ByteArray TermPositions::getPayload(ByteArray data, int32_t offset) { BOOST_ASSERT(false); return ByteArray(); // override } bool TermPositions::isPayloadAvailable() { BOOST_ASSERT(false); return false; // override } } LucenePlusPlus-rel_3.0.9/src/core/index/TermVectorEntry.cpp000066400000000000000000000041351456444476200240010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorEntry.h" #include "StringUtils.h" namespace Lucene { TermVectorEntry::TermVectorEntry(const String& field, const String& term, int32_t frequency, Collection offsets, Collection positions) { this->field = field; this->term = term; this->frequency = frequency; this->offsets = offsets; this->positions = positions; } TermVectorEntry::~TermVectorEntry() { } String TermVectorEntry::getField() { return field; } int32_t TermVectorEntry::getFrequency() { return frequency; } Collection TermVectorEntry::getOffsets() { return offsets; } Collection TermVectorEntry::getPositions() { return positions; } String TermVectorEntry::getTerm() { return term; } void TermVectorEntry::setFrequency(int32_t frequency) { this->frequency = frequency; } void TermVectorEntry::setOffsets(Collection offsets) { this->offsets = offsets; } void TermVectorEntry::setPositions(Collection positions) { this->positions = positions; } bool TermVectorEntry::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } TermVectorEntryPtr otherTermVectorEntry(boost::dynamic_pointer_cast(other)); if (otherTermVectorEntry) { return (term == otherTermVectorEntry->term); } return false; } int32_t TermVectorEntry::hashCode() { return StringUtils::hashCode(term); } String TermVectorEntry::toString() { StringStream buffer; buffer << L"TermVectorEntry{field='" << field; buffer << L"\', term='" << term; buffer << L"\', frequency=" << frequency << L"}"; return buffer.str(); } } LucenePlusPlus-rel_3.0.9/src/core/index/TermVectorEntryFreqSortedComparator.cpp000066400000000000000000000020411456444476200300220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorEntryFreqSortedComparator.h" #include "TermVectorEntry.h" namespace Lucene { TermVectorEntryFreqSortedComparator::~TermVectorEntryFreqSortedComparator() { } bool TermVectorEntryFreqSortedComparator::compare(const TermVectorEntryPtr& first, const TermVectorEntryPtr& second) { int32_t result = (second->getFrequency() - first->getFrequency()); if (result < 0) { return true; } if (result > 0) { return false; } result = first->getTerm().compare(second->getTerm()); if (result < 0) { return true; } if (result > 0) { return false; } return (first->getField().compare(second->getField()) < 0); } } LucenePlusPlus-rel_3.0.9/src/core/index/TermVectorMapper.cpp000066400000000000000000000015601456444476200241230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorMapper.h" namespace Lucene { TermVectorMapper::TermVectorMapper(bool ignoringPositions, bool ignoringOffsets) { this->ignoringPositions = ignoringPositions; this->ignoringOffsets = ignoringOffsets; } TermVectorMapper::~TermVectorMapper() { } bool TermVectorMapper::isIgnoringPositions() { return ignoringPositions; } bool TermVectorMapper::isIgnoringOffsets() { return ignoringOffsets; } void TermVectorMapper::setDocumentNumber(int32_t documentNumber) { // override } } LucenePlusPlus-rel_3.0.9/src/core/index/TermVectorOffsetInfo.cpp000066400000000000000000000033731456444476200247450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorOffsetInfo.h" namespace Lucene { TermVectorOffsetInfo::TermVectorOffsetInfo(int32_t startOffset, int32_t endOffset) { this->endOffset = endOffset; this->startOffset = startOffset; } TermVectorOffsetInfo::~TermVectorOffsetInfo() { } const Collection TermVectorOffsetInfo::EMPTY_OFFSET_INFO() { static Collection _EMPTY_OFFSET_INFO; LUCENE_RUN_ONCE( _EMPTY_OFFSET_INFO = Collection::newInstance(); ); return _EMPTY_OFFSET_INFO; } int32_t TermVectorOffsetInfo::getEndOffset() { return endOffset; } void TermVectorOffsetInfo::setEndOffset(int32_t endOffset) { this->endOffset = endOffset; } int32_t TermVectorOffsetInfo::getStartOffset() { return startOffset; } void TermVectorOffsetInfo::setStartOffset(int32_t startOffset) { this->startOffset = startOffset; } bool TermVectorOffsetInfo::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } TermVectorOffsetInfoPtr otherTermVector(boost::dynamic_pointer_cast(other)); if (!otherTermVector) { return false; } return (endOffset == otherTermVector->endOffset && startOffset == otherTermVector->startOffset); } int32_t TermVectorOffsetInfo::hashCode() { int32_t result = startOffset; return (29 * result + endOffset); } } LucenePlusPlus-rel_3.0.9/src/core/index/TermVectorsReader.cpp000066400000000000000000000462161456444476200242730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorsReader.h" #include "BufferedIndexInput.h" #include "IndexFileNames.h" #include "Directory.h" #include "FieldInfos.h" #include "SegmentTermPositionVector.h" #include "TermVectorOffsetInfo.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { /// NOTE: if you make a new format, it must be larger than the current format const int32_t TermVectorsReader::FORMAT_VERSION = 2; /// Changes to speed up bulk merging of term vectors const int32_t TermVectorsReader::FORMAT_VERSION2 = 3; /// Changed strings to UTF8 with length-in-bytes not length-in-chars const int32_t TermVectorsReader::FORMAT_UTF8_LENGTH_IN_BYTES = 4; /// NOTE: always change this if you switch to a new format const int32_t TermVectorsReader::FORMAT_CURRENT = TermVectorsReader::FORMAT_UTF8_LENGTH_IN_BYTES; /// The size in bytes that the FORMAT_VERSION will take up at the beginning of each file const int32_t TermVectorsReader::FORMAT_SIZE = 4; const uint8_t TermVectorsReader::STORE_POSITIONS_WITH_TERMVECTOR = 0x1; const uint8_t TermVectorsReader::STORE_OFFSET_WITH_TERMVECTOR = 0x2; TermVectorsReader::TermVectorsReader() { this->_size = 0; this->numTotalDocs = 0; this->docStoreOffset = 0; this->format = 0; } TermVectorsReader::TermVectorsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos) { ConstructReader(d, segment, fieldInfos, BufferedIndexInput::BUFFER_SIZE, -1, 0); } TermVectorsReader::TermVectorsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size) { ConstructReader(d, segment, fieldInfos, readBufferSize, docStoreOffset, size); } TermVectorsReader::~TermVectorsReader() { } void TermVectorsReader::ConstructReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size) { this->_size = 0; this->numTotalDocs = 0; this->docStoreOffset = 0; this->format = 0; bool success = false; LuceneException finally; try { if (d->fileExists(segment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION())) { tvx = d->openInput(segment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION(), readBufferSize); format = checkValidFormat(tvx); tvd = d->openInput(segment + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION(), readBufferSize); int32_t tvdFormat = checkValidFormat(tvd); tvf = d->openInput(segment + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION(), readBufferSize); int32_t tvfFormat = checkValidFormat(tvf); BOOST_ASSERT(format == tvdFormat); BOOST_ASSERT(format == tvfFormat); if (format >= FORMAT_VERSION2) { BOOST_ASSERT((tvx->length() - FORMAT_SIZE) % 16 == 0); numTotalDocs = (int32_t)(tvx->length() >> 4); } else { BOOST_ASSERT((tvx->length() - FORMAT_SIZE) % 8 == 0); numTotalDocs = (int32_t)(tvx->length() >> 3); } if (docStoreOffset == -1) { this->docStoreOffset = 0; this->_size = numTotalDocs; BOOST_ASSERT(size == 0 || numTotalDocs == size); } else { this->docStoreOffset = docStoreOffset; this->_size = size; // Verify the file is long enough to hold all of our docs BOOST_ASSERT(numTotalDocs >= size + docStoreOffset); } } else { // If all documents flushed in a segment had hit non-aborting exceptions, it's possible that // FieldInfos.hasVectors returns true yet the term vector files don't exist. format = 0; } this->fieldInfos = fieldInfos; success = true; } catch (LuceneException& e) { finally = e; } // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception // above. In this case, we want to explicitly close any subset of things that were opened. if (!success) { close(); } finally.throwException(); } IndexInputPtr TermVectorsReader::getTvdStream() { return tvd; } IndexInputPtr TermVectorsReader::getTvfStream() { return tvf; } void TermVectorsReader::seekTvx(int32_t docNum) { if (format < FORMAT_VERSION2) { tvx->seek((docNum + docStoreOffset) * 8 + FORMAT_SIZE); } else { tvx->seek((docNum + docStoreOffset) * 16 + FORMAT_SIZE); } } bool TermVectorsReader::canReadRawDocs() { return (format >= FORMAT_UTF8_LENGTH_IN_BYTES); } void TermVectorsReader::rawDocs(Collection tvdLengths, Collection tvfLengths, int32_t startDocID, int32_t numDocs) { if (!tvx) { MiscUtils::arrayFill(tvdLengths.begin(), 0, tvdLengths.size(), 0); MiscUtils::arrayFill(tvfLengths.begin(), 0, tvfLengths.size(), 0); return; } // SegmentMerger calls canReadRawDocs() first and should not call us if that returns false. if (format < FORMAT_VERSION2) { boost::throw_exception(IllegalStateException(L"cannot read raw docs with older term vector formats")); } seekTvx(startDocID); int64_t tvdPosition = tvx->readLong(); tvd->seek(tvdPosition); int64_t tvfPosition = tvx->readLong(); tvf->seek(tvfPosition); int64_t lastTvdPosition = tvdPosition; int64_t lastTvfPosition = tvfPosition; int32_t count = 0; while (count < numDocs) { int32_t docID = docStoreOffset + startDocID + count + 1; BOOST_ASSERT(docID <= numTotalDocs); if (docID < numTotalDocs) { tvdPosition = tvx->readLong(); tvfPosition = tvx->readLong(); } else { tvdPosition = tvd->length(); tvfPosition = tvf->length(); BOOST_ASSERT(count == numDocs - 1); } tvdLengths[count] = (int32_t)(tvdPosition - lastTvdPosition); tvfLengths[count] = (int32_t)(tvfPosition - lastTvfPosition); ++count; lastTvdPosition = tvdPosition; lastTvfPosition = tvfPosition; } } int32_t TermVectorsReader::checkValidFormat(const IndexInputPtr& in) { int32_t format = in->readInt(); if (format > FORMAT_CURRENT) { boost::throw_exception(CorruptIndexException(L"Incompatible format version: " + StringUtils::toString(format) + L" expected " + StringUtils::toString(FORMAT_CURRENT) + L" or less")); } return format; } void TermVectorsReader::close() { // make all effort to close up. Keep the first exception and throw it as a new one. LuceneException keep; if (tvx) { try { tvx->close(); } catch (LuceneException& e) { if (keep.isNull()) { keep = e; } } } if (tvd) { try { tvd->close(); } catch (LuceneException& e) { if (keep.isNull()) { keep = e; } } } if (tvf) { try { tvf->close(); } catch (LuceneException& e) { if (keep.isNull()) { keep = e; } } } keep.throwException(); } int32_t TermVectorsReader::size() { return _size; } void TermVectorsReader::get(int32_t docNum, const String& field, const TermVectorMapperPtr& mapper) { if (tvx) { int32_t fieldNumber = fieldInfos->fieldNumber(field); // We need to account for the FORMAT_SIZE at when seeking in the tvx. We don't need to do // this in other seeks because we already have the file pointer that was written in another file seekTvx(docNum); int64_t tvdPosition = tvx->readLong(); tvd->seek(tvdPosition); int32_t fieldCount = tvd->readVInt(); // There are only a few fields per document. We opt for a full scan rather then requiring that they // be ordered. We need to read through all of the fields anyway to get to the tvf pointers. int32_t number = 0; int32_t found = -1; for (int32_t i = 0; i < fieldCount; ++i) { if (format >= FORMAT_VERSION) { number = tvd->readVInt(); } else { number += tvd->readVInt(); } if (number == fieldNumber) { found = i; } } // This field, although valid in the segment, was not found in this document if (found != -1) { // Compute position in the tvf file int64_t position; if (format >= FORMAT_VERSION2) { position = tvx->readLong(); } else { position = tvd->readVLong(); } for (int32_t i = 1; i <= found; ++i) { position += tvd->readVLong(); } mapper->setDocumentNumber(docNum); readTermVector(field, position, mapper); } } } TermFreqVectorPtr TermVectorsReader::get(int32_t docNum, const String& field) { // Check if no term vectors are available for this segment at all ParallelArrayTermVectorMapperPtr mapper(newLucene()); get(docNum, field, mapper); return mapper->materializeVector(); } Collection TermVectorsReader::readFields(int32_t fieldCount) { int32_t number = 0; Collection fields(Collection::newInstance(fieldCount)); for (int32_t i = 0; i < fieldCount; ++i) { if (format >= FORMAT_VERSION) { number = tvd->readVInt(); } else { number += tvd->readVInt(); } fields[i] = fieldInfos->fieldName(number); } return fields; } Collection TermVectorsReader::readTvfPointers(int32_t fieldCount) { // Compute position in the tvf file int64_t position; if (format >= FORMAT_VERSION2) { position = tvx->readLong(); } else { position = tvd->readVLong(); } Collection tvfPointers(Collection::newInstance(fieldCount)); tvfPointers[0] = position; for (int32_t i = 1; i < fieldCount; ++i) { position += tvd->readVLong(); tvfPointers[i] = position; } return tvfPointers; } Collection TermVectorsReader::get(int32_t docNum) { Collection result; if (tvx) { // We need to offset by seekTvx(docNum); int64_t tvdPosition = tvx->readLong(); tvd->seek(tvdPosition); int32_t fieldCount = tvd->readVInt(); // No fields are vectorized for this document if (fieldCount != 0) { Collection fields(readFields(fieldCount)); Collection tvfPointers(readTvfPointers(fieldCount)); result = readTermVectors(docNum, fields, tvfPointers); } } return result; } void TermVectorsReader::get(int32_t docNumber, const TermVectorMapperPtr& mapper) { // Check if no term vectors are available for this segment at all if (tvx) { // We need to offset by seekTvx(docNumber); int64_t tvdPosition = tvx->readLong(); tvd->seek(tvdPosition); int32_t fieldCount = tvd->readVInt(); // No fields are vectorized for this document if (fieldCount != 0) { Collection fields(readFields(fieldCount)); Collection tvfPointers(readTvfPointers(fieldCount)); mapper->setDocumentNumber(docNumber); readTermVectors(fields, tvfPointers, mapper); } } } Collection TermVectorsReader::readTermVectors(int32_t docNum, Collection fields, Collection tvfPointers) { Collection res(Collection::newInstance(fields.size())); for (int32_t i = 0; i < fields.size(); ++i) { ParallelArrayTermVectorMapperPtr mapper(newLucene()); mapper->setDocumentNumber(docNum); readTermVector(fields[i], tvfPointers[i], mapper); res[i] = mapper->materializeVector(); } return res; } void TermVectorsReader::readTermVectors(Collection fields, Collection tvfPointers, const TermVectorMapperPtr& mapper) { for (int32_t i = 0; i < fields.size(); ++i) { readTermVector(fields[i], tvfPointers[i], mapper); } } void TermVectorsReader::readTermVector(const String& field, int64_t tvfPointer, const TermVectorMapperPtr& mapper) { // Now read the data from specified position. We don't need to offset by the FORMAT here since // the pointer already includes the offset tvf->seek(tvfPointer); int32_t numTerms = tvf->readVInt(); // If no terms - return a constant empty termvector. However, this should never occur! if (numTerms == 0) { return; } bool storePositions; bool storeOffsets; if (format >= FORMAT_VERSION) { uint8_t bits = tvf->readByte(); storePositions = ((bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0); storeOffsets = ((bits & STORE_OFFSET_WITH_TERMVECTOR) != 0); } else { tvf->readVInt(); storePositions = false; storeOffsets = false; } mapper->setExpectations(field, numTerms, storeOffsets, storePositions); int32_t start = 0; int32_t deltaLength = 0; int32_t totalLength = 0; ByteArray byteBuffer; CharArray charBuffer; bool preUTF8 = (format < FORMAT_UTF8_LENGTH_IN_BYTES); // init the buffers if (preUTF8) { charBuffer = CharArray::newInstance(10); byteBuffer.reset(); } else { charBuffer.reset(); byteBuffer = ByteArray::newInstance(20); } for (int32_t i = 0; i < numTerms; ++i) { start = tvf->readVInt(); deltaLength = tvf->readVInt(); totalLength = start + deltaLength; String term; if (preUTF8) { // Term stored as "java chars" if (charBuffer.size() < totalLength) { charBuffer.resize((int32_t)(1.5 * (double)totalLength)); } totalLength = start + tvf->readChars(charBuffer.get(), start, deltaLength); term.append(charBuffer.get(), totalLength); } else { // Term stored as utf8 bytes if (byteBuffer.size() < totalLength) { byteBuffer.resize((int32_t)(1.5 * (double)totalLength)); } tvf->readBytes(byteBuffer.get(), start, deltaLength); term = StringUtils::toUnicode(byteBuffer.get(), totalLength); } int32_t freq = tvf->readVInt(); Collection positions; if (storePositions) { // read in the positions // does the mapper even care about positions? if (!mapper->isIgnoringPositions()) { positions = Collection::newInstance(freq); int32_t prevPosition = 0; for (Collection::iterator position = positions.begin(); position != positions.end(); ++position) { *position = prevPosition + tvf->readVInt(); prevPosition = *position; } } else { // we need to skip over the positions. Since these are VInts, I don't believe there // is anyway to know for sure how far to skip for (int32_t j = 0; j < freq; ++j) { tvf->readVInt(); } } } Collection offsets; if (storeOffsets) { // does the mapper even care about offsets? if (!mapper->isIgnoringOffsets()) { offsets = Collection::newInstance(freq); int32_t prevOffset = 0; for (Collection::iterator offset = offsets.begin(); offset != offsets.end(); ++offset) { int32_t startOffset = prevOffset + tvf->readVInt(); int32_t endOffset = startOffset + tvf->readVInt(); *offset = newLucene(startOffset, endOffset); prevOffset = endOffset; } } else { for (int32_t j = 0; j < freq; ++j) { tvf->readVInt(); tvf->readVInt(); } } } mapper->map(term, freq, offsets, positions); } } LuceneObjectPtr TermVectorsReader::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(); TermVectorsReaderPtr cloneReader(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); cloneReader->fieldInfos = fieldInfos; cloneReader->_size = _size; cloneReader->numTotalDocs = numTotalDocs; cloneReader->docStoreOffset = docStoreOffset; cloneReader->format = format; // These are null when a TermVectorsReader was created on a segment that did not have term vectors saved if (tvx && tvd && tvf) { cloneReader->tvx = boost::dynamic_pointer_cast(tvx->clone()); cloneReader->tvd = boost::dynamic_pointer_cast(tvd->clone()); cloneReader->tvf = boost::dynamic_pointer_cast(tvf->clone()); } return cloneReader; } ParallelArrayTermVectorMapper::ParallelArrayTermVectorMapper() { currentPosition = 0; storingOffsets = false; storingPositions = false; } ParallelArrayTermVectorMapper::~ParallelArrayTermVectorMapper() { } void ParallelArrayTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) { this->field = field; terms = Collection::newInstance(numTerms); termFreqs = Collection::newInstance(numTerms); this->storingOffsets = storeOffsets; this->storingPositions = storePositions; if (storePositions) { this->positions = Collection< Collection >::newInstance(numTerms); } if (storeOffsets) { this->offsets = Collection< Collection >::newInstance(numTerms); } } void ParallelArrayTermVectorMapper::map(const String& term, int32_t frequency, Collection offsets, Collection positions) { terms[currentPosition] = term; termFreqs[currentPosition] = frequency; if (storingOffsets) { this->offsets[currentPosition] = offsets; } if (storingPositions) { this->positions[currentPosition] = positions; } ++currentPosition; } TermFreqVectorPtr ParallelArrayTermVectorMapper::materializeVector() { SegmentTermVectorPtr tv; if (!field.empty() && terms) { if (storingPositions || storingOffsets) { tv = newLucene(field, terms, termFreqs, positions, offsets); } else { tv = newLucene(field, terms, termFreqs); } } return tv; } } LucenePlusPlus-rel_3.0.9/src/core/index/TermVectorsTermsWriter.cpp000066400000000000000000000260601456444476200253530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorsTermsWriter.h" #include "TermVectorsTermsWriterPerThread.h" #include "TermVectorsTermsWriterPerField.h" #include "TermVectorsReader.h" #include "TermsHashPerField.h" #include "TermsHashPerThread.h" #include "RAMOutputStream.h" #include "IndexWriter.h" #include "IndexFileNames.h" #include "SegmentWriteState.h" #include "Directory.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { TermVectorsTermsWriter::TermVectorsTermsWriter(const DocumentsWriterPtr& docWriter) { this->freeCount = 0; this->lastDocID = 0; this->allocCount = 0; this->_docWriter = docWriter; this->docFreeList = Collection::newInstance(1); } TermVectorsTermsWriter::~TermVectorsTermsWriter() { } TermsHashConsumerPerThreadPtr TermVectorsTermsWriter::addThread(const TermsHashPerThreadPtr& perThread) { return newLucene(perThread, shared_from_this()); } void TermVectorsTermsWriter::createPostings(Collection postings, int32_t start, int32_t count) { int32_t end = start + count; for (int32_t i = start; i < end; ++i) { postings[i] = newLucene(); } } void TermVectorsTermsWriter::flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) { SyncLock syncLock(this); // NOTE: it's possible that all documents seen in this segment hit non-aborting exceptions, in which case we will // not have yet init'd the TermVectorsWriter. This is actually OK (unlike in the stored fields case) because, // although IieldInfos.hasVectors() will return true, the TermVectorsReader gracefully handles non-existence of // the term vectors files. if (tvx) { if (state->numDocsInStore > 0) { // In case there are some final documents that we didn't see (because they hit a non-aborting exception) fill(state->numDocsInStore - DocumentsWriterPtr(_docWriter)->getDocStoreOffset()); } tvx->flush(); tvd->flush(); tvf->flush(); } for (MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { for (Collection::iterator field = entry->second.begin(); field != entry->second.end(); ++field) { TermVectorsTermsWriterPerFieldPtr perField(boost::static_pointer_cast(*field)); TermsHashPerFieldPtr(perField->_termsHashPerField)->reset(); perField->shrinkHash(); } TermVectorsTermsWriterPerThreadPtr perThread(boost::static_pointer_cast(entry->first)); TermsHashPerThreadPtr(perThread->_termsHashPerThread)->reset(true); } } void TermVectorsTermsWriter::closeDocStore(const SegmentWriteStatePtr& state) { SyncLock syncLock(this); if (tvx) { DocumentsWriterPtr docWriter(_docWriter); // At least one doc in this run had term vectors enabled fill(state->numDocsInStore - docWriter->getDocStoreOffset()); tvx->close(); tvf->close(); tvd->close(); tvx.reset(); BOOST_ASSERT(!state->docStoreSegmentName.empty()); String fileName(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); if (4 + ((int64_t)state->numDocsInStore) * 16 != state->directory->fileLength(fileName)) { boost::throw_exception(RuntimeException(L"after flush: tvx size mismatch: " + StringUtils::toString(state->numDocsInStore) + L" docs vs " + StringUtils::toString(state->directory->fileLength(fileName)) + L" length in bytes of " + fileName + L" file exists?=" + StringUtils::toString(state->directory->fileExists(fileName)))); } state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); lastDocID = 0; } } TermVectorsTermsWriterPerDocPtr TermVectorsTermsWriter::getPerDoc() { SyncLock syncLock(this); if (freeCount == 0) { if (++allocCount > docFreeList.size()) { // Grow our free list up front to make sure we have enough space to recycle all outstanding // PerDoc instances BOOST_ASSERT(allocCount == 1 + docFreeList.size()); docFreeList.resize(MiscUtils::getNextSize(allocCount)); } return newLucene(shared_from_this()); } else { return docFreeList[--freeCount]; } } void TermVectorsTermsWriter::fill(int32_t docID) { int32_t docStoreOffset = DocumentsWriterPtr(_docWriter)->getDocStoreOffset(); int32_t end = docID + docStoreOffset; if (lastDocID < end) { int64_t tvfPosition = tvf->getFilePointer(); while (lastDocID < end) { tvx->writeLong(tvd->getFilePointer()); tvd->writeVInt(0); tvx->writeLong(tvfPosition); ++lastDocID; } } } void TermVectorsTermsWriter::initTermVectorsWriter() { SyncLock syncLock(this); if (!tvx) { DocumentsWriterPtr docWriter(_docWriter); String docStoreSegment(docWriter->getDocStoreSegment()); if (docStoreSegment.empty()) { return; } // If we hit an exception while init'ing the term vector output files, we must abort this segment // because those files will be in an unknown state tvx = docWriter->directory->createOutput(docStoreSegment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); tvd = docWriter->directory->createOutput(docStoreSegment + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); tvf = docWriter->directory->createOutput(docStoreSegment + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); tvx->writeInt(TermVectorsReader::FORMAT_CURRENT); tvd->writeInt(TermVectorsReader::FORMAT_CURRENT); tvf->writeInt(TermVectorsReader::FORMAT_CURRENT); docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); lastDocID = 0; } } void TermVectorsTermsWriter::finishDocument(const TermVectorsTermsWriterPerDocPtr& perDoc) { SyncLock syncLock(this); DocumentsWriterPtr docWriter(_docWriter); BOOST_ASSERT(IndexWriterPtr(docWriter->_writer)->testPoint(L"TermVectorsTermsWriter.finishDocument start")); initTermVectorsWriter(); fill(perDoc->docID); // Append term vectors to the real outputs tvx->writeLong(tvd->getFilePointer()); tvx->writeLong(tvf->getFilePointer()); tvd->writeVInt(perDoc->numVectorFields); if (perDoc->numVectorFields > 0) { for (int32_t i = 0; i < perDoc->numVectorFields; ++i) { tvd->writeVInt(perDoc->fieldNumbers[i]); } BOOST_ASSERT(perDoc->fieldPointers[0] == 0); int64_t lastPos = perDoc->fieldPointers[0]; for (int32_t i = 1; i < perDoc->numVectorFields; ++i) { int64_t pos = perDoc->fieldPointers[i]; tvd->writeVLong(pos - lastPos); lastPos = pos; } perDoc->perDocTvf->writeTo(tvf); perDoc->numVectorFields = 0; } BOOST_ASSERT(lastDocID == perDoc->docID + docWriter->getDocStoreOffset()); ++lastDocID; perDoc->reset(); free(perDoc); BOOST_ASSERT(IndexWriterPtr(docWriter->_writer)->testPoint(L"TermVectorsTermsWriter.finishDocument end")); } bool TermVectorsTermsWriter::freeRAM() { // We don't hold any state beyond one doc, so we don't free persistent RAM here return false; } void TermVectorsTermsWriter::abort() { if (tvx) { try { tvx->close(); } catch (...) { } tvx.reset(); } if (tvd) { try { tvd->close(); } catch (...) { } tvd.reset(); } if (tvf) { try { tvf->close(); } catch (...) { } tvf.reset(); } lastDocID = 0; } void TermVectorsTermsWriter::free(const TermVectorsTermsWriterPerDocPtr& doc) { SyncLock syncLock(this); BOOST_ASSERT(freeCount < docFreeList.size()); docFreeList[freeCount++] = doc; } int32_t TermVectorsTermsWriter::bytesPerPosting() { return (RawPostingList::BYTES_SIZE + 3 * DocumentsWriter::INT_NUM_BYTE); } TermVectorsTermsWriterPerDoc::TermVectorsTermsWriterPerDoc(const TermVectorsTermsWriterPtr& termsWriter) { this->_termsWriter = termsWriter; buffer = DocumentsWriterPtr(termsWriter->_docWriter)->newPerDocBuffer(); perDocTvf = newLucene(buffer); numVectorFields = 0; fieldNumbers = Collection::newInstance(1); fieldPointers = Collection::newInstance(1); } TermVectorsTermsWriterPerDoc::~TermVectorsTermsWriterPerDoc() { } void TermVectorsTermsWriterPerDoc::reset() { perDocTvf->reset(); buffer->recycle(); numVectorFields = 0; } void TermVectorsTermsWriterPerDoc::abort() { reset(); TermVectorsTermsWriterPtr(_termsWriter)->free(shared_from_this()); } void TermVectorsTermsWriterPerDoc::addField(int32_t fieldNumber) { if (numVectorFields == fieldNumbers.size()) { fieldNumbers.resize(MiscUtils::getNextSize(fieldNumbers.size())); fieldPointers.resize(MiscUtils::getNextSize(fieldPointers.size())); } fieldNumbers[numVectorFields] = fieldNumber; fieldPointers[numVectorFields] = perDocTvf->getFilePointer(); ++numVectorFields; } int64_t TermVectorsTermsWriterPerDoc::sizeInBytes() { return buffer->getSizeInBytes(); } void TermVectorsTermsWriterPerDoc::finish() { TermVectorsTermsWriterPtr(_termsWriter)->finishDocument(shared_from_this()); } TermVectorsTermsWriterPostingList::TermVectorsTermsWriterPostingList() { freq = 0; lastOffset = 0; lastPosition = 0; } TermVectorsTermsWriterPostingList::~TermVectorsTermsWriterPostingList() { } } LucenePlusPlus-rel_3.0.9/src/core/index/TermVectorsTermsWriterPerField.cpp000066400000000000000000000220651456444476200267670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorsTermsWriterPerField.h" #include "TermVectorsTermsWriterPerThread.h" #include "TermVectorsTermsWriter.h" #include "TermsHashPerField.h" #include "TermsHashPerThread.h" #include "TermVectorsReader.h" #include "Fieldable.h" #include "FieldInfo.h" #include "FieldInvertState.h" #include "RAMOutputStream.h" #include "ByteSliceReader.h" #include "CharBlockPool.h" #include "OffsetAttribute.h" #include "AttributeSource.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { TermVectorsTermsWriterPerField::TermVectorsTermsWriterPerField(const TermsHashPerFieldPtr& termsHashPerField, const TermVectorsTermsWriterPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo) { this->doVectors = false; this->doVectorPositions = false; this->doVectorOffsets = false; this->maxNumPostings = 0; this->_termsHashPerField = termsHashPerField; this->_perThread = perThread; this->_termsWriter = perThread->_termsWriter; this->fieldInfo = fieldInfo; _docState = termsHashPerField->docState; _fieldState = termsHashPerField->fieldState; } TermVectorsTermsWriterPerField::~TermVectorsTermsWriterPerField() { } int32_t TermVectorsTermsWriterPerField::getStreamCount() { return 2; } bool TermVectorsTermsWriterPerField::start(Collection fields, int32_t count) { doVectors = false; doVectorPositions = false; doVectorOffsets = false; for (int32_t i = 0; i < count; ++i) { FieldablePtr field(fields[i]); if (field->isIndexed() && field->isTermVectorStored()) { doVectors = true; if (field->isStorePositionWithTermVector()) { doVectorPositions = true; } if (field->isStoreOffsetWithTermVector()) { doVectorOffsets = true; } } } if (doVectors) { TermVectorsTermsWriterPerThreadPtr perThread(_perThread); DocStatePtr docState(_docState); if (!perThread->doc) { perThread->doc = TermVectorsTermsWriterPtr(_termsWriter)->getPerDoc(); perThread->doc->docID = docState->docID; BOOST_ASSERT(perThread->doc->numVectorFields == 0); BOOST_ASSERT(perThread->doc->perDocTvf->length() == 0); BOOST_ASSERT(perThread->doc->perDocTvf->getFilePointer() == 0); } BOOST_ASSERT(perThread->doc->docID == docState->docID); TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); if (termsHashPerField->numPostings != 0) { // Only necessary if previous doc hit a non-aborting exception while writing vectors // in this field termsHashPerField->reset(); TermsHashPerThreadPtr(perThread->_termsHashPerThread)->reset(false); } } return doVectors; } void TermVectorsTermsWriterPerField::abort() { } void TermVectorsTermsWriterPerField::finish() { BOOST_ASSERT(DocStatePtr(_docState)->testPoint(L"TermVectorsTermsWriterPerField.finish start")); TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); int32_t numPostings = termsHashPerField->numPostings; BOOST_ASSERT(numPostings >= 0); if (!doVectors || numPostings == 0) { return; } if (numPostings > maxNumPostings) { maxNumPostings = numPostings; } TermVectorsTermsWriterPerThreadPtr perThread(_perThread); IndexOutputPtr tvf(perThread->doc->perDocTvf); // This is called once, after inverting all occurrences of a given field in the doc. At this point we flush // our hash into the DocWriter. BOOST_ASSERT(fieldInfo->storeTermVector); BOOST_ASSERT(perThread->vectorFieldsInOrder(fieldInfo)); perThread->doc->addField(termsHashPerField->fieldInfo->number); Collection postings(termsHashPerField->sortPostings()); tvf->writeVInt(numPostings); uint8_t bits = 0x0; if (doVectorPositions) { bits |= TermVectorsReader::STORE_POSITIONS_WITH_TERMVECTOR; } if (doVectorOffsets) { bits |= TermVectorsReader::STORE_OFFSET_WITH_TERMVECTOR; } tvf->writeByte(bits); int32_t encoderUpto = 0; int32_t lastTermBytesCount = 0; ByteSliceReaderPtr reader(perThread->vectorSliceReader); Collection charBuffers(TermsHashPerThreadPtr(perThread->_termsHashPerThread)->charPool->buffers); for (int32_t j = 0; j < numPostings; ++j) { TermVectorsTermsWriterPostingListPtr posting(boost::static_pointer_cast(postings[j])); int32_t freq = posting->freq; CharArray text2(charBuffers[posting->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT]); int32_t start2 = (posting->textStart & DocumentsWriter::CHAR_BLOCK_MASK); // We swap between two encoders to save copying last Term's byte array UTF8ResultPtr utf8Result(perThread->utf8Results[encoderUpto]); StringUtils::toUTF8(text2.get() + start2, text2.size(), utf8Result); int32_t termBytesCount = utf8Result->length; // Compute common prefix between last term and this term int32_t prefix = 0; if (j > 0) { ByteArray lastTermBytes(perThread->utf8Results[1 - encoderUpto]->result); ByteArray termBytes(perThread->utf8Results[encoderUpto]->result); while (prefix < lastTermBytesCount && prefix < termBytesCount) { if (lastTermBytes[prefix] != termBytes[prefix]) { break; } ++prefix; } } encoderUpto = 1 - encoderUpto; lastTermBytesCount = termBytesCount; int32_t suffix = termBytesCount - prefix; tvf->writeVInt(prefix); tvf->writeVInt(suffix); tvf->writeBytes(utf8Result->result.get(), prefix, suffix); tvf->writeVInt(freq); if (doVectorPositions) { termsHashPerField->initReader(reader, posting, 0); reader->writeTo(tvf); } if (doVectorOffsets) { termsHashPerField->initReader(reader, posting, 1); reader->writeTo(tvf); } } termsHashPerField->reset(); // NOTE: we clear per-field at the thread level, because term vectors fully write themselves on each // field; this saves RAM (eg if large doc has two large fields with term vectors on) because we // recycle/reuse all RAM after each field TermsHashPerThreadPtr(perThread->_termsHashPerThread)->reset(false); } void TermVectorsTermsWriterPerField::shrinkHash() { TermsHashPerFieldPtr(_termsHashPerField)->shrinkHash(maxNumPostings); maxNumPostings = 0; } void TermVectorsTermsWriterPerField::start(const FieldablePtr& field) { if (doVectorOffsets) { offsetAttribute = FieldInvertStatePtr(_fieldState)->attributeSource->addAttribute(); } else { offsetAttribute.reset(); } } void TermVectorsTermsWriterPerField::newTerm(const RawPostingListPtr& p0) { BOOST_ASSERT(DocStatePtr(_docState)->testPoint(L"TermVectorsTermsWriterPerField.newTerm start")); TermVectorsTermsWriterPostingListPtr p(boost::static_pointer_cast(p0)); p->freq = 1; FieldInvertStatePtr fieldState(_fieldState); TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); if (doVectorOffsets) { int32_t startOffset = fieldState->offset + offsetAttribute->startOffset(); int32_t endOffset = fieldState->offset + offsetAttribute->endOffset(); termsHashPerField->writeVInt(1, startOffset); termsHashPerField->writeVInt(1, endOffset - startOffset); p->lastOffset = endOffset; } if (doVectorPositions) { termsHashPerField->writeVInt(0, fieldState->position); p->lastPosition = fieldState->position; } } void TermVectorsTermsWriterPerField::addTerm(const RawPostingListPtr& p0) { BOOST_ASSERT(DocStatePtr(_docState)->testPoint(L"TermVectorsTermsWriterPerField.newTerm start")); TermVectorsTermsWriterPostingListPtr p(boost::static_pointer_cast(p0)); ++p->freq; FieldInvertStatePtr fieldState(_fieldState); TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); if (doVectorOffsets) { int32_t startOffset = fieldState->offset + offsetAttribute->startOffset(); int32_t endOffset = fieldState->offset + offsetAttribute->endOffset(); termsHashPerField->writeVInt(1, startOffset - p->lastOffset); termsHashPerField->writeVInt(1, endOffset - startOffset); p->lastOffset = endOffset; } if (doVectorPositions) { termsHashPerField->writeVInt(0, fieldState->position - p->lastPosition); p->lastPosition = fieldState->position; } } void TermVectorsTermsWriterPerField::skippingLongTerm() { } } LucenePlusPlus-rel_3.0.9/src/core/index/TermVectorsTermsWriterPerThread.cpp000066400000000000000000000043051456444476200271500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorsTermsWriterPerThread.h" #include "TermVectorsTermsWriterPerField.h" #include "TermVectorsTermsWriter.h" #include "TermsHashPerThread.h" #include "ByteSliceReader.h" #include "FieldInfo.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { TermVectorsTermsWriterPerThread::TermVectorsTermsWriterPerThread(const TermsHashPerThreadPtr& termsHashPerThread, const TermVectorsTermsWriterPtr& termsWriter) { utf8Results = newCollection(newInstance(), newInstance()); this->vectorSliceReader = newLucene(); this->_termsWriter = termsWriter; this->_termsHashPerThread = termsHashPerThread; _docState = termsHashPerThread->docState; } TermVectorsTermsWriterPerThread::~TermVectorsTermsWriterPerThread() { } void TermVectorsTermsWriterPerThread::startDocument() { BOOST_ASSERT(clearLastVectorFieldName()); if (doc) { doc->reset(); doc->docID = DocStatePtr(_docState)->docID; } } DocWriterPtr TermVectorsTermsWriterPerThread::finishDocument() { DocWriterPtr returnDoc(doc); doc.reset(); return returnDoc; } TermsHashConsumerPerFieldPtr TermVectorsTermsWriterPerThread::addField(const TermsHashPerFieldPtr& termsHashPerField, const FieldInfoPtr& fieldInfo) { return newLucene(termsHashPerField, shared_from_this(), fieldInfo); } void TermVectorsTermsWriterPerThread::abort() { if (doc) { doc->abort(); doc.reset(); } } bool TermVectorsTermsWriterPerThread::clearLastVectorFieldName() { lastVectorFieldName.clear(); return true; } bool TermVectorsTermsWriterPerThread::vectorFieldsInOrder(const FieldInfoPtr& fi) { bool inOrder = lastVectorFieldName.empty() ? true : (lastVectorFieldName < fi->name); lastVectorFieldName = fi->name; return inOrder; } } LucenePlusPlus-rel_3.0.9/src/core/index/TermVectorsWriter.cpp000066400000000000000000000171041456444476200243370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermVectorsWriter.h" #include "IndexFileNames.h" #include "IndexOutput.h" #include "TermVectorsReader.h" #include "TermVectorOffsetInfo.h" #include "TermPositionVector.h" #include "Directory.h" #include "FieldInfos.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { TermVectorsWriter::TermVectorsWriter(const DirectoryPtr& directory, const String& segment, const FieldInfosPtr& fieldInfos) { utf8Results = newCollection(newInstance(), newInstance()); // Open files for TermVector storage tvx = directory->createOutput(segment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); tvx->writeInt(TermVectorsReader::FORMAT_CURRENT); tvd = directory->createOutput(segment + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); tvd->writeInt(TermVectorsReader::FORMAT_CURRENT); tvf = directory->createOutput(segment + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); tvf->writeInt(TermVectorsReader::FORMAT_CURRENT); this->fieldInfos = fieldInfos; } TermVectorsWriter::~TermVectorsWriter() { } void TermVectorsWriter::addAllDocVectors(Collection vectors) { tvx->writeLong(tvd->getFilePointer()); tvx->writeLong(tvf->getFilePointer()); if (vectors) { int32_t numFields = vectors.size(); tvd->writeVInt(numFields); Collection fieldPointers(Collection::newInstance(numFields)); for (int32_t i = 0; i < numFields; ++i) { fieldPointers[i] = tvf->getFilePointer(); int32_t fieldNumber = fieldInfos->fieldNumber(vectors[i]->getField()); // 1st pass: write field numbers to tvd tvd->writeVInt(fieldNumber); int32_t numTerms = vectors[i]->size(); tvf->writeVInt(numTerms); TermPositionVectorPtr tpVector(boost::dynamic_pointer_cast(vectors[i])); uint8_t bits; bool storePositions; bool storeOffsets; if (tpVector) { // May have positions & offsets storePositions = (tpVector->size() > 0 && !tpVector->getTermPositions(0)); storeOffsets = (tpVector->size() > 0 && tpVector->getOffsets(0)); bits = (uint8_t)((storePositions ? TermVectorsReader::STORE_POSITIONS_WITH_TERMVECTOR : 0) + (storeOffsets ? TermVectorsReader::STORE_OFFSET_WITH_TERMVECTOR : 0)); } else { bits = 0; storePositions = false; storeOffsets = false; } tvf->writeVInt(bits); Collection terms(vectors[i]->getTerms()); Collection freqs(vectors[i]->getTermFrequencies()); int32_t utf8Upto = 0; utf8Results[1]->length = 0; for (int32_t j = 0; j < numTerms; ++j) { StringUtils::toUTF8(terms[j].c_str(), terms[j].length(), utf8Results[utf8Upto]); int32_t start = MiscUtils::bytesDifference(utf8Results[1 - utf8Upto]->result.get(), utf8Results[1 - utf8Upto]->length, utf8Results[utf8Upto]->result.get(), utf8Results[utf8Upto]->length); int32_t length = utf8Results[utf8Upto]->length - start; tvf->writeVInt(start); // write shared prefix length tvf->writeVInt(length); // write delta length tvf->writeBytes(utf8Results[utf8Upto]->result.get(), start, length); // write delta bytes utf8Upto = 1 - utf8Upto; int32_t termFreq = freqs[j]; tvf->writeVInt(termFreq); if (storePositions) { Collection positions(tpVector->getTermPositions(j)); if (!positions) { boost::throw_exception(IllegalStateException(L"Trying to write positions that are null!")); } BOOST_ASSERT(positions.size() == termFreq); // use delta encoding for positions int32_t lastPosition = 0; for (int32_t k = 0; k < positions.size(); ++k) { int32_t position = positions[k]; tvf->writeVInt(position - lastPosition); lastPosition = position; } } if (storeOffsets) { Collection offsets(tpVector->getOffsets(j)); if (!offsets) { boost::throw_exception(IllegalStateException(L"Trying to write offsets that are null!")); } BOOST_ASSERT(offsets.size() == termFreq); // use delta encoding for offsets int32_t lastEndOffset = 0; for (int32_t k = 0; k < offsets.size(); ++k) { int32_t startOffset = offsets[k]->getStartOffset(); int32_t endOffset = offsets[k]->getEndOffset(); tvf->writeVInt(startOffset - lastEndOffset); tvf->writeVInt(endOffset - startOffset); lastEndOffset = endOffset; } } } } // 2nd pass: write field pointers to tvd if (numFields > 1) { int64_t lastFieldPointer = fieldPointers[0]; for (int32_t i = 1; i < numFields; ++i) { int64_t fieldPointer = fieldPointers[i]; tvd->writeVLong(fieldPointer - lastFieldPointer); lastFieldPointer = fieldPointer; } } } else { tvd->writeVInt(0); } } void TermVectorsWriter::addRawDocuments(const TermVectorsReaderPtr& reader, Collection tvdLengths, Collection tvfLengths, int32_t numDocs) { int64_t tvdPosition = tvd->getFilePointer(); int64_t tvfPosition = tvf->getFilePointer(); int64_t tvdStart = tvdPosition; int64_t tvfStart = tvfPosition; for (int32_t i = 0; i < numDocs; ++i) { tvx->writeLong(tvdPosition); tvdPosition += tvdLengths[i]; tvx->writeLong(tvfPosition); tvfPosition += tvfLengths[i]; } tvd->copyBytes(reader->getTvdStream(), tvdPosition - tvdStart); tvf->copyBytes(reader->getTvfStream(), tvfPosition - tvfStart); BOOST_ASSERT(tvd->getFilePointer() == tvdPosition); BOOST_ASSERT(tvf->getFilePointer() == tvfPosition); } void TermVectorsWriter::close() { // make an effort to close all streams we can but remember and re-throw the first exception // encountered in this process LuceneException keep; if (tvx) { try { tvx->close(); } catch (LuceneException& e) { if (keep.isNull()) { keep = e; } } } if (tvd) { try { tvd->close(); } catch (LuceneException& e) { if (keep.isNull()) { keep = e; } } } if (tvf) { try { tvf->close(); } catch (LuceneException& e) { if (keep.isNull()) { keep = e; } } } keep.throwException(); } } LucenePlusPlus-rel_3.0.9/src/core/index/TermsHash.cpp000066400000000000000000000176641456444476200225760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermsHash.h" #include "DocumentsWriter.h" #include "TermsHashConsumer.h" #include "TermsHashPerThread.h" #include "TermsHashPerField.h" #include "TermsHashConsumerPerThread.h" #include "DocInverterPerThread.h" #include "TermsHashConsumerPerField.h" #include "IndexWriter.h" #include "MiscUtils.h" namespace Lucene { TermsHash::TermsHash(const DocumentsWriterPtr& docWriter, bool trackAllocations, const TermsHashConsumerPtr& consumer, const TermsHashPtr& nextTermsHash) { this->postingsFreeCount = 0; this->postingsAllocCount = 0; this->trackAllocations = false; this->postingsFreeList = Collection::newInstance(1); this->_docWriter = docWriter; this->consumer = consumer; this->nextTermsHash = nextTermsHash; this->trackAllocations = trackAllocations; bytesPerPosting = consumer->bytesPerPosting() + 4 * DocumentsWriter::POINTER_NUM_BYTE; postingsFreeChunk = (int32_t)((double)DocumentsWriter::BYTE_BLOCK_SIZE / (double)bytesPerPosting); } TermsHash::~TermsHash() { } InvertedDocConsumerPerThreadPtr TermsHash::addThread(const DocInverterPerThreadPtr& docInverterPerThread) { return newLucene(docInverterPerThread, shared_from_this(), nextTermsHash, TermsHashPerThreadPtr()); } TermsHashPerThreadPtr TermsHash::addThread(const DocInverterPerThreadPtr& docInverterPerThread, const TermsHashPerThreadPtr& primaryPerThread) { return newLucene(docInverterPerThread, shared_from_this(), nextTermsHash, primaryPerThread); } void TermsHash::setFieldInfos(const FieldInfosPtr& fieldInfos) { this->fieldInfos = fieldInfos; consumer->setFieldInfos(fieldInfos); } void TermsHash::abort() { consumer->abort(); if (nextTermsHash) { nextTermsHash->abort(); } } void TermsHash::shrinkFreePostings(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) { BOOST_ASSERT(postingsFreeCount == postingsAllocCount); int32_t newSize = 1; if (newSize != postingsFreeList.size()) { if (postingsFreeCount > newSize) { if (trackAllocations) { DocumentsWriterPtr(_docWriter)->bytesAllocated(-(postingsFreeCount - newSize) * bytesPerPosting); } postingsFreeCount = newSize; postingsAllocCount = newSize; } postingsFreeList.resize(newSize); } } void TermsHash::closeDocStore(const SegmentWriteStatePtr& state) { SyncLock syncLock(this); consumer->closeDocStore(state); if (nextTermsHash) { nextTermsHash->closeDocStore(state); } } void TermsHash::flush(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) { SyncLock syncLock(this); MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField childThreadsAndFields(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField::newInstance()); MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField nextThreadsAndFields; if (nextTermsHash) { nextThreadsAndFields = MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField::newInstance(); } for (MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { Collection childFields(Collection::newInstance()); Collection nextChildFields; if (nextTermsHash) { nextChildFields = Collection::newInstance(); } for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end(); ++perField) { childFields.add(boost::static_pointer_cast(*perField)->consumer); if (nextTermsHash) { nextChildFields.add(boost::static_pointer_cast(*perField)->nextPerField); } } childThreadsAndFields.put(boost::static_pointer_cast(entry->first)->consumer, childFields); if (nextTermsHash) { nextThreadsAndFields.put(boost::static_pointer_cast(entry->first)->nextPerThread, nextChildFields); } } consumer->flush(childThreadsAndFields, state); shrinkFreePostings(threadsAndFields, state); if (nextTermsHash) { nextTermsHash->flush(nextThreadsAndFields, state); } } bool TermsHash::freeRAM() { if (!trackAllocations) { return false; } bool any = false; int64_t bytesFreed = 0; { SyncLock syncLock(this); int32_t numToFree = postingsFreeCount >= postingsFreeChunk ? postingsFreeChunk : postingsFreeCount; any = (numToFree > 0); if (any) { MiscUtils::arrayFill(postingsFreeList.begin(), postingsFreeCount - numToFree, postingsFreeCount, RawPostingListPtr()); postingsFreeCount -= numToFree; postingsAllocCount -= numToFree; bytesFreed = -numToFree * bytesPerPosting; any = true; } } if (any) { DocumentsWriterPtr(_docWriter)->bytesAllocated(bytesFreed); } if (nextTermsHash && nextTermsHash->freeRAM()) { any = true; } return any; } void TermsHash::recyclePostings(Collection postings, int32_t numPostings) { SyncLock syncLock(this); BOOST_ASSERT(postings.size() >= numPostings); // Move all Postings from this ThreadState back to our free list. We pre-allocated this array while we // were creating Postings to make sure it's large enough BOOST_ASSERT(postingsFreeCount + numPostings <= postingsFreeList.size()); MiscUtils::arrayCopy(postings.begin(), 0, postingsFreeList.begin(), postingsFreeCount, numPostings); postingsFreeCount += numPostings; } void TermsHash::getPostings(Collection postings) { SyncLock syncLock(this); DocumentsWriterPtr docWriter(_docWriter); IndexWriterPtr writer(docWriter->_writer); BOOST_ASSERT(writer->testPoint(L"TermsHash.getPostings start")); BOOST_ASSERT(postingsFreeCount <= postingsFreeList.size()); BOOST_ASSERT(postingsFreeCount <= postingsAllocCount); int32_t numToCopy = postingsFreeCount < postings.size() ? postingsFreeCount : postings.size(); int32_t start = postingsFreeCount - numToCopy; BOOST_ASSERT(start >= 0); BOOST_ASSERT(start + numToCopy <= postingsFreeList.size()); BOOST_ASSERT(numToCopy <= postings.size()); MiscUtils::arrayCopy(postingsFreeList.begin(), start, postings.begin(), 0, numToCopy); // Directly allocate the remainder if any if (numToCopy != postings.size()) { int32_t extra = postings.size() - numToCopy; int32_t newPostingsAllocCount = postingsAllocCount + extra; consumer->createPostings(postings, numToCopy, extra); BOOST_ASSERT(writer->testPoint(L"TermsHash.getPostings after create")); postingsAllocCount += extra; if (trackAllocations) { docWriter->bytesAllocated(extra * bytesPerPosting); } if (newPostingsAllocCount > postingsFreeList.size()) { // Pre-allocate the postingsFreeList so it's large enough to hold all postings we've given out postingsFreeList = Collection::newInstance(MiscUtils::getNextSize(newPostingsAllocCount)); } } postingsFreeCount -= numToCopy; if (trackAllocations) { docWriter->bytesUsed(postings.size() * bytesPerPosting); } } } LucenePlusPlus-rel_3.0.9/src/core/index/TermsHashConsumer.cpp000066400000000000000000000010741456444476200242760ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermsHashConsumer.h" namespace Lucene { TermsHashConsumer::~TermsHashConsumer() { } void TermsHashConsumer::setFieldInfos(const FieldInfosPtr& fieldInfos) { this->fieldInfos = fieldInfos; } } LucenePlusPlus-rel_3.0.9/src/core/index/TermsHashConsumerPerField.cpp000066400000000000000000000007451456444476200257150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermsHashConsumerPerField.h" namespace Lucene { TermsHashConsumerPerField::~TermsHashConsumerPerField() { } } LucenePlusPlus-rel_3.0.9/src/core/index/TermsHashConsumerPerThread.cpp000066400000000000000000000007501456444476200260750ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermsHashConsumerPerThread.h" namespace Lucene { TermsHashConsumerPerThread::~TermsHashConsumerPerThread() { } } LucenePlusPlus-rel_3.0.9/src/core/index/TermsHashPerField.cpp000066400000000000000000000412631456444476200242010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermsHashPerField.h" #include "TermsHashPerThread.h" #include "TermsHashConsumerPerThread.h" #include "TermsHashConsumerPerField.h" #include "TermsHash.h" #include "TermAttribute.h" #include "AttributeSource.h" #include "DocInverterPerField.h" #include "DocumentsWriter.h" #include "IntBlockPool.h" #include "CharBlockPool.h" #include "ByteSliceReader.h" #include "RawPostingList.h" #include "FieldInvertState.h" #include "UTF8Stream.h" #include "MiscUtils.h" namespace Lucene { TermsHashPerField::TermsHashPerField(const DocInverterPerFieldPtr& docInverterPerField, const TermsHashPerThreadPtr& perThread, const TermsHashPerThreadPtr& nextPerThread, const FieldInfoPtr& fieldInfo) { this->_docInverterPerField = docInverterPerField; this->_perThread = perThread; this->nextPerThread = nextPerThread; this->fieldInfo = fieldInfo; } TermsHashPerField::~TermsHashPerField() { } void TermsHashPerField::initialize() { this->postingsCompacted = false; this->numPostings = 0; this->postingsHashSize = 4; this->postingsHashHalfSize = this->postingsHashSize / 2; this->postingsHashMask = this->postingsHashSize - 1; this->postingsHash = Collection::newInstance(postingsHashSize); this->doCall = false; this->doNextCall = false; this->intUptoStart = 0; TermsHashPerThreadPtr perThread(_perThread); intPool = perThread->intPool; charPool = perThread->charPool; bytePool = perThread->bytePool; docState = perThread->docState; DocInverterPerFieldPtr docInverterPerField(_docInverterPerField); fieldState = docInverterPerField->fieldState; this->consumer = perThread->consumer->addField(shared_from_this(), fieldInfo); streamCount = consumer->getStreamCount(); numPostingInt = 2 * streamCount; if (nextPerThread) { nextPerField = boost::dynamic_pointer_cast(nextPerThread->addField(docInverterPerField, fieldInfo)); } } void TermsHashPerField::shrinkHash(int32_t targetSize) { BOOST_ASSERT(postingsCompacted || numPostings == 0); int32_t newSize = 4; if (newSize != postingsHash.size()) { postingsHash.resize(newSize); postingsHashSize = newSize; postingsHashHalfSize = newSize / 2; postingsHashMask = newSize - 1; } MiscUtils::arrayFill(postingsHash.begin(), 0, postingsHash.size(), RawPostingListPtr()); } void TermsHashPerField::reset() { if (!postingsCompacted) { compactPostings(); } BOOST_ASSERT(numPostings <= postingsHash.size()); if (numPostings > 0) { TermsHashPtr(TermsHashPerThreadPtr(_perThread)->_termsHash)->recyclePostings(postingsHash, numPostings); MiscUtils::arrayFill(postingsHash.begin(), 0, numPostings, RawPostingListPtr()); numPostings = 0; } postingsCompacted = false; if (nextPerField) { nextPerField->reset(); } } void TermsHashPerField::abort() { SyncLock syncLock(this); reset(); if (nextPerField) { nextPerField->abort(); } } void TermsHashPerField::initReader(const ByteSliceReaderPtr& reader, const RawPostingListPtr& p, int32_t stream) { BOOST_ASSERT(stream < streamCount); IntArray ints(intPool->buffers[p->intStart >> DocumentsWriter::INT_BLOCK_SHIFT]); int32_t upto = (p->intStart & DocumentsWriter::INT_BLOCK_MASK); reader->init(bytePool, p->byteStart + stream * ByteBlockPool::FIRST_LEVEL_SIZE(), ints[upto + stream]); } void TermsHashPerField::compactPostings() { SyncLock syncLock(this); int32_t upto = 0; for (int32_t i = 0; i < postingsHashSize; ++i) { if (postingsHash[i]) { if (upto < i) { postingsHash[upto] = postingsHash[i]; postingsHash[i].reset(); } ++upto; } } BOOST_ASSERT(upto == numPostings); postingsCompacted = true; } struct comparePostings { comparePostings(Collection buffers) { this->buffers = buffers; } /// Compares term text for two Posting instance inline bool operator()(const RawPostingListPtr& first, const RawPostingListPtr& second) const { if (first == second) { return false; } wchar_t* text1 = buffers[first->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT].get(); int32_t pos1 = (first->textStart & DocumentsWriter::CHAR_BLOCK_MASK); wchar_t* text2 = buffers[second->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT].get(); int32_t pos2 = (second->textStart & DocumentsWriter::CHAR_BLOCK_MASK); BOOST_ASSERT(text1 != text2 || pos1 != pos2); while (true) { wchar_t c1 = text1[pos1++]; wchar_t c2 = text2[pos2++]; if (c1 != c2) { if (c2 == UTF8Base::UNICODE_TERMINATOR) { return false; } else if (c1 == UTF8Base::UNICODE_TERMINATOR) { return true; } else { return (c1 < c2); } } else { // This method should never compare equal postings unless first == second BOOST_ASSERT(c1 != UTF8Base::UNICODE_TERMINATOR); } } } Collection buffers; }; Collection TermsHashPerField::sortPostings() { compactPostings(); std::sort(postingsHash.begin(), postingsHash.begin() + numPostings, comparePostings(charPool->buffers)); return postingsHash; } bool TermsHashPerField::postingEquals(const wchar_t* tokenText, int32_t tokenTextLen) { wchar_t* text = TermsHashPerThreadPtr(_perThread)->charPool->buffers[p->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT].get(); BOOST_ASSERT(text); int32_t pos = (p->textStart & DocumentsWriter::CHAR_BLOCK_MASK); int32_t tokenPos = 0; for (; tokenPos < tokenTextLen; ++pos, ++tokenPos) { if (tokenText[tokenPos] != text[pos]) { return false; } } return (text[pos] == UTF8Base::UNICODE_TERMINATOR); } void TermsHashPerField::start(const FieldablePtr& field) { termAtt = fieldState->attributeSource->addAttribute(); consumer->start(field); if (nextPerField) { nextPerField->start(field); } } bool TermsHashPerField::start(Collection fields, int32_t count) { doCall = consumer->start(fields, count); if (nextPerField) { doNextCall = nextPerField->start(fields, count); } return (doCall || doNextCall); } void TermsHashPerField::add(int32_t textStart) { // Secondary entry point (for 2nd and subsequent TermsHash), we hash by textStart int32_t code = textStart; int32_t hashPos = (code & postingsHashMask); BOOST_ASSERT(!postingsCompacted); // Locate RawPostingList in hash p = postingsHash[hashPos]; if (p && p->textStart != textStart) { // Conflict: keep searching different locations in the hash table. int32_t inc = (((code >> 8) + code) | 1); do { code += inc; hashPos = (code & postingsHashMask); p = postingsHash[hashPos]; } while (p && p->textStart != textStart); } if (!p) { // First time we are seeing this token since we last flushed the hash. TermsHashPerThreadPtr perThread(_perThread); // Refill? if (perThread->freePostingsCount == 0) { perThread->morePostings(); } // Pull next free RawPostingList from free list p = perThread->freePostings[--perThread->freePostingsCount]; BOOST_ASSERT(p); p->textStart = textStart; BOOST_ASSERT(!postingsHash[hashPos]); postingsHash[hashPos] = p; ++numPostings; if (numPostings == postingsHashHalfSize) { rehashPostings(2 * postingsHashSize); } // Init stream slices if (numPostingInt + intPool->intUpto > DocumentsWriter::INT_BLOCK_SIZE) { intPool->nextBuffer(); } if (DocumentsWriter::BYTE_BLOCK_SIZE - bytePool->byteUpto < numPostingInt * ByteBlockPool::FIRST_LEVEL_SIZE()) { bytePool->nextBuffer(); } intUptos = intPool->buffer; intUptoStart = intPool->intUpto; intPool->intUpto += streamCount; p->intStart = intUptoStart + intPool->intOffset; for (int32_t i = 0; i < streamCount; ++i) { int32_t upto = bytePool->newSlice(ByteBlockPool::FIRST_LEVEL_SIZE()); intUptos[intUptoStart + i] = upto + bytePool->byteOffset; } p->byteStart = intUptos[intUptoStart]; consumer->newTerm(p); } else { intUptos = intPool->buffers[p->intStart >> DocumentsWriter::INT_BLOCK_SHIFT]; intUptoStart = (p->intStart & DocumentsWriter::INT_BLOCK_MASK); consumer->addTerm(p); } } void TermsHashPerField::add() { BOOST_ASSERT(!postingsCompacted); // Get the text of this term. wchar_t* tokenText = termAtt->termBufferArray(); int32_t tokenTextLen = termAtt->termLength(); // Compute hashcode and replace any invalid UTF16 sequences int32_t downto = tokenTextLen; int32_t code = 0; while (downto > 0) { wchar_t ch = tokenText[--downto]; #ifdef LPP_UNICODE_CHAR_SIZE_2 if (ch >= UTF8Base::TRAIL_SURROGATE_MIN && ch <= UTF8Base::TRAIL_SURROGATE_MAX) { if (downto == 0) { // Unpaired ch = UTF8Base::UNICODE_REPLACEMENT_CHAR; tokenText[downto] = ch; } else { wchar_t ch2 = tokenText[downto - 1]; if (ch2 >= UTF8Base::LEAD_SURROGATE_MIN && ch2 <= UTF8Base::LEAD_SURROGATE_MAX) { // OK: high followed by low. This is a valid surrogate pair. code = ((code * 31) + ch) * 31 + ch2; --downto; continue; } else { // Unpaired ch = UTF8Base::UNICODE_REPLACEMENT_CHAR; tokenText[downto] = ch; } } } else if (ch >= UTF8Base::LEAD_SURROGATE_MIN && (ch <= UTF8Base::LEAD_SURROGATE_MAX || ch == UTF8Base::UNICODE_TERMINATOR)) { // Unpaired or UTF8Base::UNICODE_TERMINATOR ch = UTF8Base::UNICODE_REPLACEMENT_CHAR; tokenText[downto] = ch; } #else if (ch == UTF8Base::UNICODE_TERMINATOR) { // Unpaired or UTF8Base::UNICODE_TERMINATOR ch = UTF8Base::UNICODE_REPLACEMENT_CHAR; tokenText[downto] = ch; } #endif code = (code * 31) + ch; } int32_t hashPos = (code & postingsHashMask); // Locate RawPostingList in hash p = postingsHash[hashPos]; if (p && !postingEquals(tokenText, tokenTextLen)) { // Conflict: keep searching different locations in the hash table. int32_t inc = (((code >> 8) + code) | 1); do { code += inc; hashPos = (code & postingsHashMask); p = postingsHash[hashPos]; } while (p && !postingEquals(tokenText, tokenTextLen)); } if (!p) { // First time we are seeing this token since we last flushed the hash. int32_t textLen1 = 1 + tokenTextLen; if (textLen1 + charPool->charUpto > DocumentsWriter::CHAR_BLOCK_SIZE) { if (textLen1 > DocumentsWriter::CHAR_BLOCK_SIZE) { // Just skip this term, to remain as robust as possible during indexing. A TokenFilter // can be inserted into the analyzer chain if other behavior is wanted (pruning the term // to a prefix, throwing an exception, etc). if (docState->maxTermPrefix.empty()) { docState->maxTermPrefix.append(tokenText, std::min((int32_t)30, tokenTextLen)); } consumer->skippingLongTerm(); return; } charPool->nextBuffer(); } TermsHashPerThreadPtr perThread(_perThread); // Refill? if (perThread->freePostingsCount == 0) { perThread->morePostings(); } // Pull next free RawPostingList from free list p = perThread->freePostings[--perThread->freePostingsCount]; BOOST_ASSERT(p); wchar_t* text = charPool->buffer.get(); int32_t textUpto = charPool->charUpto; p->textStart = textUpto + charPool->charOffset; charPool->charUpto += textLen1; MiscUtils::arrayCopy(tokenText, 0, text, textUpto, tokenTextLen); text[textUpto + tokenTextLen] = UTF8Base::UNICODE_TERMINATOR; BOOST_ASSERT(!postingsHash[hashPos]); postingsHash[hashPos] = p; ++numPostings; if (numPostings == postingsHashHalfSize) { rehashPostings(2 * postingsHashSize); } // Init stream slices if (numPostingInt + intPool->intUpto > DocumentsWriter::INT_BLOCK_SIZE) { intPool->nextBuffer(); } if (DocumentsWriter::BYTE_BLOCK_SIZE - bytePool->byteUpto < numPostingInt * ByteBlockPool::FIRST_LEVEL_SIZE()) { bytePool->nextBuffer(); } intUptos = intPool->buffer; intUptoStart = intPool->intUpto; intPool->intUpto += streamCount; p->intStart = intUptoStart + intPool->intOffset; for (int32_t i = 0; i < streamCount; ++i) { int32_t upto = bytePool->newSlice(ByteBlockPool::FIRST_LEVEL_SIZE()); intUptos[intUptoStart + i] = upto + bytePool->byteOffset; } p->byteStart = intUptos[intUptoStart]; consumer->newTerm(p); } else { intUptos = intPool->buffers[p->intStart >> DocumentsWriter::INT_BLOCK_SHIFT]; intUptoStart = (p->intStart & DocumentsWriter::INT_BLOCK_MASK); consumer->addTerm(p); } if (doNextCall) { nextPerField->add(p->textStart); } } void TermsHashPerField::writeByte(int32_t stream, int8_t b) { int32_t upto = intUptos[intUptoStart + stream]; ByteArray bytes(bytePool->buffers[upto >> DocumentsWriter::BYTE_BLOCK_SHIFT]); BOOST_ASSERT(bytes); int32_t offset = (upto & DocumentsWriter::BYTE_BLOCK_MASK); if (bytes[offset] != 0) { // End of slice; allocate a new one offset = bytePool->allocSlice(bytes, offset); bytes = bytePool->buffer; intUptos[intUptoStart + stream] = offset + bytePool->byteOffset; } bytes[offset] = b; intUptos[intUptoStart + stream]++; } void TermsHashPerField::writeBytes(int32_t stream, const uint8_t* b, int32_t offset, int32_t length) { int32_t end = offset + length; for (int32_t i = offset; i < end; ++i) { writeByte(stream, b[i]); } } void TermsHashPerField::writeVInt(int32_t stream, int32_t i) { BOOST_ASSERT(stream < streamCount); while ((i & ~0x7f) != 0) { writeByte(stream, (uint8_t)((i & 0x7f) | 0x80)); i = MiscUtils::unsignedShift(i, 7); } writeByte(stream, (uint8_t)i); } void TermsHashPerField::finish() { consumer->finish(); if (nextPerField) { nextPerField->finish(); } } void TermsHashPerField::rehashPostings(int32_t newSize) { int32_t newMask = newSize - 1; Collection newHash(Collection::newInstance(newSize)); TermsHashPerThreadPtr perThread(_perThread); for (int32_t i = 0; i < postingsHashSize; ++i) { RawPostingListPtr p0(postingsHash[i]); if (p0) { int32_t code; if (perThread->primary) { int32_t start = (p0->textStart & DocumentsWriter::CHAR_BLOCK_MASK); CharArray text = charPool->buffers[p0->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT]; int32_t pos = start; while (text[pos] != UTF8Base::UNICODE_TERMINATOR) { ++pos; } code = 0; while (pos > start) { code = (code * 31) + text[--pos]; } } else { code = p0->textStart; } int32_t hashPos = (code & newMask); BOOST_ASSERT(hashPos >= 0); if (newHash[hashPos]) { int32_t inc = (((code >> 8) + code) | 1); do { code += inc; hashPos = (code & newMask); } while (newHash[hashPos]); } newHash[hashPos] = p0; } } postingsHashMask = newMask; postingsHash = newHash; postingsHashSize = newSize; postingsHashHalfSize = (newSize >> 1); } } LucenePlusPlus-rel_3.0.9/src/core/index/TermsHashPerThread.cpp000066400000000000000000000074221456444476200243640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermsHashPerThread.h" #include "TermsHashPerField.h" #include "DocInverterPerThread.h" #include "TermsHash.h" #include "TermsHashConsumer.h" #include "TermsHashConsumerPerThread.h" #include "CharBlockPool.h" #include "IntBlockPool.h" #include "DocumentsWriter.h" namespace Lucene { TermsHashPerThread::TermsHashPerThread(const DocInverterPerThreadPtr& docInverterPerThread, const TermsHashPtr& termsHash, const TermsHashPtr& nextTermsHash, const TermsHashPerThreadPtr& primaryPerThread) { this->freePostings = Collection::newInstance(256); this->freePostingsCount = 0; this->primary = false; this->_docInverterPerThread = docInverterPerThread; this->_termsHash = termsHash; this->nextTermsHash = nextTermsHash; this->_primaryPerThread = primaryPerThread; } TermsHashPerThread::~TermsHashPerThread() { } void TermsHashPerThread::initialize() { DocInverterPerThreadPtr docInverterPerThread(_docInverterPerThread); TermsHashPtr termsHash(_termsHash); docState = docInverterPerThread->docState; consumer = termsHash->consumer->addThread(shared_from_this()); if (nextTermsHash) { // We are primary charPool = newLucene(DocumentsWriterPtr(termsHash->_docWriter)); primary = true; } else { charPool = TermsHashPerThreadPtr(_primaryPerThread)->charPool; primary = false; } intPool = newLucene(DocumentsWriterPtr(termsHash->_docWriter), termsHash->trackAllocations); bytePool = newLucene(DocumentsWriterPtr(termsHash->_docWriter)->byteBlockAllocator, termsHash->trackAllocations); if (nextTermsHash) { nextPerThread = nextTermsHash->addThread(docInverterPerThread, shared_from_this()); } } InvertedDocConsumerPerFieldPtr TermsHashPerThread::addField(const DocInverterPerFieldPtr& docInverterPerField, const FieldInfoPtr& fieldInfo) { return newLucene(docInverterPerField, shared_from_this(), nextPerThread, fieldInfo); } void TermsHashPerThread::abort() { SyncLock syncLock(this); reset(true); consumer->abort(); if (nextPerThread) { nextPerThread->abort(); } } void TermsHashPerThread::morePostings() { BOOST_ASSERT(freePostingsCount == 0); TermsHashPtr(_termsHash)->getPostings(freePostings); freePostingsCount = freePostings.size(); BOOST_ASSERT(noNullPostings(freePostings, freePostingsCount, L"consumer=" + consumer->toString())); } bool TermsHashPerThread::noNullPostings(Collection postings, int32_t count, const String& details) { for (int32_t i = 0; i < count; ++i) { BOOST_ASSERT(postings[i]); } return true; } void TermsHashPerThread::startDocument() { consumer->startDocument(); if (nextPerThread) { nextPerThread->consumer->startDocument(); } } DocWriterPtr TermsHashPerThread::finishDocument() { DocWriterPtr doc(consumer->finishDocument()); DocWriterPtr doc2(nextPerThread ? nextPerThread->consumer->finishDocument() : DocWriterPtr()); if (!doc) { return doc2; } else { doc->setNext(doc2); return doc; } } void TermsHashPerThread::reset(bool recyclePostings) { intPool->reset(); bytePool->reset(); if (primary) { charPool->reset(); } if (recyclePostings) { TermsHashPtr(_termsHash)->recyclePostings(freePostings, freePostingsCount); freePostingsCount = 0; } } } LucenePlusPlus-rel_3.0.9/src/core/msvc/000077500000000000000000000000001456444476200200175ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/msvc/LuceneInc.cpp000066400000000000000000000005521456444476200223720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" LucenePlusPlus-rel_3.0.9/src/core/msvc/dllmain.cpp000066400000000000000000000012621456444476200221440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #if defined(_WIN32) && defined(LPP_HAVE_DLL) BOOL APIENTRY DllMain(HMODULE module, DWORD ul_reason_for_call, LPVOID lpReserved) { switch (ul_reason_for_call) { case DLL_PROCESS_ATTACH: case DLL_THREAD_ATTACH: case DLL_THREAD_DETACH: case DLL_PROCESS_DETACH: break; } return TRUE; } #endif LucenePlusPlus-rel_3.0.9/src/core/msvc/lucene++.vcproj000066400000000000000000002270441456444476200226560ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.9/src/core/msvc/lucene++.vcxproj000066400000000000000000001767431456444476200230570ustar00rootroot00000000000000 Debug DLL Win32 Debug Static Win32 Release DLL Win32 Release Static Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A} lucene++ Win32Proj StaticLibrary Unicode true StaticLibrary Unicode DynamicLibrary Unicode true DynamicLibrary Unicode <_ProjectFileVersion>10.0.40219.1 $(SolutionDir)$(Configuration)\ $(Configuration)\ true $(SolutionDir)$(Configuration)\ $(Configuration)\ false $(SolutionDir)$(Configuration)\ $(Configuration)\ $(SolutionDir)$(Configuration)\ $(Configuration)\ Disabled ..\..\..\include;..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;_DEBUG;_WINDOWS;_USRDLL;LPP_HAVE_DLL;LPP_BUILDING_LIB;%(PreprocessorDefinitions) true Async EnableFastChecks MultiThreadedDebugDLL Use LuceneInc.h Level3 EditAndContinue 4996;%(DisableSpecificWarnings) false $(BOOST_ROOT)\lib32-msvc-10.0;%(AdditionalLibraryDirectories) true Windows MachineX86 if not exist "..\..\..\lib" mkdir "..\..\..\lib" if not exist "..\..\..\bin" mkdir "..\..\..\bin" copy "$(OutDir)$(ProjectName).lib" "..\..\..\lib\." copy "$(OutDir)$(ProjectName).dll" "..\..\..\bin\." MaxSpeed AnySuitable true Speed true ..\..\..\include;..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;NDEBUG;_WINDOWS;_USRDLL;LPP_HAVE_DLL;LPP_BUILDING_LIB;%(PreprocessorDefinitions) Async MultiThreadedDLL true Use LuceneInc.h Level3 ProgramDatabase 4996;%(DisableSpecificWarnings) false $(BOOST_ROOT)\stage\lib;%(AdditionalLibraryDirectories) true Windows true true MachineX86 if not exist "..\..\..\lib" mkdir "..\..\..\lib" if not exist "..\..\..\bin" mkdir "..\..\..\bin" copy "$(OutDir)$(ProjectName).lib" "..\..\..\lib\." copy "$(OutDir)$(ProjectName).dll" "..\..\..\bin\." Disabled ..\..\..\include;..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;_DEBUG;_LIB;LPP_BUILDING_LIB;%(PreprocessorDefinitions) true EnableFastChecks MultiThreadedDebugDLL Use LuceneInc.h Level3 EditAndContinue 4996;%(DisableSpecificWarnings) false /IGNORE:4221 %(AdditionalOptions) $(BOOST_ROOT)\stage\lib;%(AdditionalLibraryDirectories) if not exist "..\..\..\lib" mkdir "..\..\..\lib" copy "$(OutDir)$(ProjectName).lib" "..\..\..\lib\." MaxSpeed AnySuitable true Speed true ..\..\..\include;..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;NDEBUG;_LIB;LPP_BUILDING_LIB;%(PreprocessorDefinitions) MultiThreadedDLL true Use LuceneInc.h Level3 ProgramDatabase 4996;%(DisableSpecificWarnings) false /IGNORE:4221 %(AdditionalOptions) $(BOOST_ROOT)\stage\lib;%(AdditionalLibraryDirectories) if not exist "..\..\..\lib" mkdir "..\..\..\lib" copy "$(OutDir)$(ProjectName).lib" "..\..\..\lib\." false false false false Create Create Create Create LucenePlusPlus-rel_3.0.9/src/core/msvc/lucene++.vcxproj.filters000066400000000000000000002703321456444476200245130ustar00rootroot00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hpp;hxx;hm;inl;inc;xsd {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav {48502038-53e1-4765-991b-c97b4be11da4} {a684d11c-6040-49c1-b5e5-0615984a8c2c} {b61dc7e6-fb87-4b3f-ba4c-226a4361b847} {c2e416ff-52fb-4d36-9be1-e8610beefd4d} {88cac9c3-83c6-42df-a5eb-a1f0d741b627} {3e5e8bae-b0a3-498d-89bb-c4d511c21f4f} {9a837aa2-bfff-4729-b020-bece9d615183} {174ad8b8-0c6c-4ff7-8a6f-84f5627420ea} {f6b6a77f-cc4a-406e-9b54-cb4af439c1e1} {d4777928-6836-4755-9087-b4025ec3fc78} {88de8393-eac2-4c2b-b102-0aa83c3ff01b} {eb586280-ac06-4447-a2f7-404725729407} {cc88a9b9-9490-48e9-a2d2-c77a8e370f45} {81e1de0d-e621-451a-94dd-97e69cff08e7} {ce3c5a86-a7c4-4f59-9c67-92d38e9dc50a} source files source files index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index store store store store store store store store store store store store store store store store store store store store store store analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis\tokenattributes analysis\tokenattributes analysis\tokenattributes analysis\tokenattributes analysis\tokenattributes analysis\tokenattributes analysis\standard analysis\standard analysis\standard analysis\standard util util util util util util util util util util util util util util util search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\payloads search\payloads search\payloads search\payloads search\payloads search\payloads search\payloads search\function search\function search\function search\function search\function search\function search\function search\function search\function search\function search\function search\function document document document document document document document document document document document document document query parser query parser query parser query parser query parser query parser query parser query parser platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform\md5 platform\unicode header files header files header files header files header files header files index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index index store store store store store store store store store store store store store store store store store store store store store store analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis analysis\tokenattributes analysis\tokenattributes analysis\tokenattributes analysis\tokenattributes analysis\tokenattributes analysis\tokenattributes analysis\standard analysis\standard analysis\standard analysis\standard analysis\standard util util util util util util util util util util util util util util util util util util util util util util util search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\spans search\payloads search\payloads search\payloads search\payloads search\payloads search\payloads search\payloads search\payloads search\function search\function search\function search\function search\function search\function search\function search\function search\function search\function search\function search\function search\function search\function search\function search\function search\function search\function document document document document document document document document document document document document document query parser query parser query parser query parser query parser query parser query parser query parser platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform platform\md5 platform\unicode platform\unicode index index index index index index index index index index store store store store store store LucenePlusPlus-rel_3.0.9/src/core/queryparser/000077500000000000000000000000001456444476200214315ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/queryparser/FastCharStream.cpp000066400000000000000000000052541456444476200250120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FastCharStream.h" #include "Reader.h" #include "MiscUtils.h" namespace Lucene { FastCharStream::FastCharStream(const ReaderPtr& reader) { input = reader; bufferLength = 0; bufferPosition = 0; tokenStart = 0; bufferStart = 0; } FastCharStream::~FastCharStream() { } wchar_t FastCharStream::readChar() { if (bufferPosition >= bufferLength) { refill(); } return buffer[bufferPosition++]; } void FastCharStream::refill() { int32_t newPosition = bufferLength - tokenStart; if (tokenStart == 0) { // token won't fit in buffer if (!buffer) { buffer = CharArray::newInstance(2048); } else if (bufferLength == buffer.size()) { // grow buffer buffer.resize(buffer.size() * 2); } } else { // shift token to front MiscUtils::arrayCopy(buffer.get(), tokenStart, buffer.get(), 0, newPosition); } bufferLength = newPosition; // update state bufferPosition = newPosition; bufferStart += tokenStart; tokenStart = 0; int32_t charsRead = input->read(buffer.get(), newPosition, buffer.size() - newPosition); // fill space in buffer if (charsRead == -1) { boost::throw_exception(IOException(L"read past eof")); } else { bufferLength += charsRead; } } wchar_t FastCharStream::BeginToken() { tokenStart = bufferPosition; return readChar(); } void FastCharStream::backup(int32_t amount) { bufferPosition -= amount; } String FastCharStream::GetImage() { return String(buffer.get() + tokenStart, bufferPosition - tokenStart); } CharArray FastCharStream::GetSuffix(int32_t length) { CharArray value(CharArray::newInstance(length)); MiscUtils::arrayCopy(buffer.get(), bufferPosition - length, value.get(), 0, length); return value; } void FastCharStream::Done() { try { input->close(); } catch (IOException&) { // ignore IO exceptions } } int32_t FastCharStream::getColumn() { return bufferStart + bufferPosition; } int32_t FastCharStream::getLine() { return 1; } int32_t FastCharStream::getEndColumn() { return bufferStart + bufferPosition; } int32_t FastCharStream::getEndLine() { return 1; } int32_t FastCharStream::getBeginColumn() { return bufferStart + tokenStart; } int32_t FastCharStream::getBeginLine() { return 1; } } LucenePlusPlus-rel_3.0.9/src/core/queryparser/MultiFieldQueryParser.cpp000066400000000000000000000166121456444476200264040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultiFieldQueryParser.h" #include "BooleanQuery.h" #include "BooleanClause.h" #include "PhraseQuery.h" #include "MultiPhraseQuery.h" #include "MiscUtils.h" namespace Lucene { MultiFieldQueryParser::MultiFieldQueryParser(LuceneVersion::Version matchVersion, Collection fields, const AnalyzerPtr& analyzer, MapStringDouble boosts) : QueryParser(matchVersion, L"", analyzer) { this->boosts = boosts; this->fields = fields; } MultiFieldQueryParser::MultiFieldQueryParser(LuceneVersion::Version matchVersion, Collection fields, const AnalyzerPtr& analyzer) : QueryParser(matchVersion, L"", analyzer) { this->fields = fields; } MultiFieldQueryParser::~MultiFieldQueryParser() { } QueryPtr MultiFieldQueryParser::getFieldQuery(const String& field, const String& queryText, int32_t slop) { if (field.empty()) { Collection clauses(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { QueryPtr query(QueryParser::getFieldQuery(*field, queryText)); if (query) { // If the user passes a map of boosts if (boosts) { // Get the boost from the map and apply them MapStringDouble::iterator boost = boosts.find(*field); if (boost != boosts.end()) { query->setBoost(boost->second); } } applySlop(query, slop); clauses.add(newLucene(query, BooleanClause::SHOULD)); } } if (clauses.empty()) { // happens for stopwords return QueryPtr(); } return getBooleanQuery(clauses, true); } QueryPtr query(QueryParser::getFieldQuery(field, queryText)); applySlop(query, slop); return query; } QueryPtr MultiFieldQueryParser::getFieldQuery(const String& field, const String& queryText) { return getFieldQuery(field, queryText, 0); } void MultiFieldQueryParser::applySlop(const QueryPtr& query, int32_t slop) { if (MiscUtils::typeOf(query)) { boost::dynamic_pointer_cast(query)->setSlop(slop); } if (MiscUtils::typeOf(query)) { boost::dynamic_pointer_cast(query)->setSlop(slop); } } QueryPtr MultiFieldQueryParser::getFuzzyQuery(const String& field, const String& termStr, double minSimilarity) { if (field.empty()) { Collection clauses(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { clauses.add(newLucene(getFuzzyQuery(*field, termStr, minSimilarity), BooleanClause::SHOULD)); } return getBooleanQuery(clauses, true); } return QueryParser::getFuzzyQuery(field, termStr, minSimilarity); } QueryPtr MultiFieldQueryParser::getPrefixQuery(const String& field, const String& termStr) { if (field.empty()) { Collection clauses(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { clauses.add(newLucene(getPrefixQuery(*field, termStr), BooleanClause::SHOULD)); } return getBooleanQuery(clauses, true); } return QueryParser::getPrefixQuery(field, termStr); } QueryPtr MultiFieldQueryParser::getWildcardQuery(const String& field, const String& termStr) { if (field.empty()) { Collection clauses(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { clauses.add(newLucene(getWildcardQuery(*field, termStr), BooleanClause::SHOULD)); } return getBooleanQuery(clauses, true); } return QueryParser::getWildcardQuery(field, termStr); } QueryPtr MultiFieldQueryParser::getRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive) { if (field.empty()) { Collection clauses(Collection::newInstance()); for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { clauses.add(newLucene(getRangeQuery(*field, part1, part2, inclusive), BooleanClause::SHOULD)); } return getBooleanQuery(clauses, true); } return QueryParser::getRangeQuery(field, part1, part2, inclusive); } QueryPtr MultiFieldQueryParser::parse(LuceneVersion::Version matchVersion, Collection queries, Collection fields, const AnalyzerPtr& analyzer) { if (queries.size() != fields.size()) { boost::throw_exception(IllegalArgumentException(L"queries.size() != fields.size()")); } BooleanQueryPtr booleanQuery(newLucene()); for (int32_t i = 0; i < fields.size(); ++i) { QueryParserPtr queryParser(newLucene(matchVersion, fields[i], analyzer)); QueryPtr query(queryParser->parse(queries[i])); if (query && (!MiscUtils::typeOf(query) || !boost::dynamic_pointer_cast(query)->getClauses().empty())) { booleanQuery->add(query, BooleanClause::SHOULD); } } return booleanQuery; } QueryPtr MultiFieldQueryParser::parse(LuceneVersion::Version matchVersion, const String& query, Collection fields, Collection flags, const AnalyzerPtr& analyzer) { if (fields.size() != flags.size()) { boost::throw_exception(IllegalArgumentException(L"fields.size() != flags.size()")); } BooleanQueryPtr booleanQuery(newLucene()); for (int32_t i = 0; i < fields.size(); ++i) { QueryParserPtr queryParser(newLucene(matchVersion, fields[i], analyzer)); QueryPtr q(queryParser->parse(query)); if (q && (!MiscUtils::typeOf(q) || !boost::dynamic_pointer_cast(q)->getClauses().empty())) { booleanQuery->add(q, flags[i]); } } return booleanQuery; } QueryPtr MultiFieldQueryParser::parse(LuceneVersion::Version matchVersion, Collection queries, Collection fields, Collection flags, const AnalyzerPtr& analyzer) { if (queries.size() != fields.size() || fields.size() != flags.size()) { boost::throw_exception(IllegalArgumentException(L"queries, fields, and flags array have have different length")); } BooleanQueryPtr booleanQuery(newLucene()); for (int32_t i = 0; i < fields.size(); ++i) { QueryParserPtr queryParser(newLucene(matchVersion, fields[i], analyzer)); QueryPtr query(queryParser->parse(queries[i])); if (query && (!MiscUtils::typeOf(query) || !boost::dynamic_pointer_cast(query)->getClauses().empty())) { booleanQuery->add(query, flags[i]); } } return booleanQuery; } } LucenePlusPlus-rel_3.0.9/src/core/queryparser/QueryParseError.cpp000066400000000000000000000073171456444476200252570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "QueryParseError.h" #include "QueryParserToken.h" #include "StringUtils.h" namespace Lucene { QueryParseError::~QueryParseError() { } String QueryParseError::lexicalError(bool EOFSeen, int32_t lexState, int32_t errorLine, int32_t errorColumn, const String& errorAfter, wchar_t curChar) { StringStream buffer; buffer << L"Lexical error at line " << errorLine << L", column " << errorColumn << L". Encountered:"; if (EOFSeen) { buffer << L""; } else { buffer << L"\"" << addEscapes(String(1, curChar)) << L"\""; } buffer << L" (" << (int32_t)curChar << L"), after : \"" << addEscapes(errorAfter) + L"\""; return buffer.str(); } String QueryParseError::parseError(const QueryParserTokenPtr& currentToken, Collection< Collection > expectedTokenSequences, Collection tokenImage) { StringStream expected; int32_t maxSize = 0; for (int32_t i = 0; i < expectedTokenSequences.size(); ++i) { if (maxSize < expectedTokenSequences[i].size()) { maxSize = expectedTokenSequences[i].size(); } for (int32_t j = 0; j < expectedTokenSequences[i].size(); ++j) { expected << tokenImage[expectedTokenSequences[i][j]] << L" "; } if (expectedTokenSequences[i][expectedTokenSequences[i].size() - 1] != 0) { expected << L"..."; } expected << L"\n "; } StringStream retval; retval << L"Encountered \""; QueryParserTokenPtr token(currentToken->next); for (int32_t i = 0; i < maxSize; ++i) { if (i != 0) { retval << L" "; } if (token->kind == 0) { retval << tokenImage[0]; break; } retval << L" " << tokenImage[token->kind] << L" \"" << addEscapes(token->image) << L" \""; token = token->next; } retval << L"\" at line " << currentToken->next->beginLine << L", column " << currentToken->next->beginColumn; retval << L".\n"; if (expectedTokenSequences.size() == 1) { retval << L"Was expecting:\n "; } else { retval << L"Was expecting one of:\n "; } retval << expected.str(); return retval.str(); } String QueryParseError::addEscapes(const String& str) { StringStream buffer; for (String::const_iterator ch = str.begin(); ch != str.end(); ++ch) { switch (*ch) { case L'\0': continue; case L'\b': buffer << L"\\b"; continue; case L'\t': buffer << L"\\t"; continue; case L'\n': buffer << L"\\n"; continue; case L'\f': buffer << L"\\f"; continue; case L'\r': buffer << L"\\r"; continue; case L'\"': buffer << L"\\\""; continue; case L'\'': buffer << L"\\\'"; continue; case L'\\': buffer << L"\\\\"; continue; default: if (*ch < 0x20 || *ch > 0x7e) { String hexChar(L"0000" + StringUtils::toString(*ch, 16)); buffer << L"\\u" + hexChar.substr(hexChar.length() - 4); } else { buffer << *ch; } continue; } } return buffer.str(); } } LucenePlusPlus-rel_3.0.9/src/core/queryparser/QueryParser.cpp000066400000000000000000001267641456444476200244370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include #include "QueryParser.h" #include "QueryParserTokenManager.h" #include "QueryParserToken.h" #include "QueryParseError.h" #include "MultiTermQuery.h" #include "TermQuery.h" #include "TermRangeQuery.h" #include "FuzzyQuery.h" #include "FastCharStream.h" #include "StringReader.h" #include "BooleanQuery.h" #include "CachingTokenFilter.h" #include "TermAttribute.h" #include "Term.h" #include "PositionIncrementAttribute.h" #include "PhraseQuery.h" #include "MultiPhraseQuery.h" #include "PrefixQuery.h" #include "WildcardQuery.h" #include "MatchAllDocsQuery.h" #include "SimpleAnalyzer.h" #include "DateField.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { const int32_t QueryParser::CONJ_NONE = 0; const int32_t QueryParser::CONJ_AND = 1; const int32_t QueryParser::CONJ_OR = 2; const int32_t QueryParser::MOD_NONE = 0; const int32_t QueryParser::MOD_NOT = 10; const int32_t QueryParser::MOD_REQ = 11; const int32_t QueryParser::jj_la1_0[] = { 0x300, 0x300, 0x1c00, 0x1c00, 0x3ed3f00, 0x90000, 0x20000, 0x3ed2000, 0x2690000, 0x100000, 0x100000, 0x20000, 0x30000000, 0x4000000, 0x30000000, 0x20000, 0x0, 0x40000000, 0x0, 0x20000, 0x100000, 0x20000, 0x3ed0000 }; const int32_t QueryParser::jj_la1_1[] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0 }; QueryParser::QueryParser(LuceneVersion::Version matchVersion, const String& field, const AnalyzerPtr& analyzer) { ConstructParser(newLucene(newLucene(L"")), QueryParserTokenManagerPtr()); this->analyzer = analyzer; this->field = field; this->enablePositionIncrements = LuceneVersion::onOrAfter(matchVersion, LuceneVersion::LUCENE_29); } QueryParser::QueryParser(const QueryParserCharStreamPtr& stream) { ConstructParser(stream, QueryParserTokenManagerPtr()); } QueryParser::QueryParser(const QueryParserTokenManagerPtr& tokenMgr) { ConstructParser(QueryParserCharStreamPtr(), tokenMgr); } QueryParser::~QueryParser() { } void QueryParser::ConstructParser(const QueryParserCharStreamPtr& stream, const QueryParserTokenManagerPtr& tokenMgr) { _operator = OR_OPERATOR; lowercaseExpandedTerms = true; multiTermRewriteMethod = MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT(); allowLeadingWildcard = false; enablePositionIncrements = true; phraseSlop = 0; fuzzyMinSim = FuzzyQuery::defaultMinSimilarity(); fuzzyPrefixLength = FuzzyQuery::defaultPrefixLength; locale = std::locale(); dateResolution = DateTools::RESOLUTION_NULL; token_source = tokenMgr ? tokenMgr : newLucene(stream); token = newLucene(); _jj_ntk = -1; jj_la = 0; jj_gen = 0; jj_rescan = false; jj_gc = 0; jj_la1 = Collection::newInstance(23); jj_2_rtns = Collection::newInstance(1); for (int32_t i = 0; i < 23; ++i) { jj_la1[i] = -1; } for (int32_t i = 0; i < jj_2_rtns.size(); ++i) { jj_2_rtns[i] = newInstance(); } jj_expentries = Collection< Collection >::newInstance(); jj_kind = -1; jj_lasttokens = Collection::newInstance(100); jj_endpos = 0; } QueryPtr QueryParser::parse(const String& query) { ReInit(newLucene(newLucene(query))); try { // TopLevelQuery is a Query followed by the end-of-input (EOF) QueryPtr res(TopLevelQuery(field)); return res ? res : newBooleanQuery(false); } catch (QueryParserError& e) { boost::throw_exception(QueryParserError(L"Cannot parse '" + query + L"': " + e.getError())); } catch (TooManyClausesException&) { boost::throw_exception(QueryParserError(L"Cannot parse '" + query + L"': too many boolean clauses")); } return QueryPtr(); } AnalyzerPtr QueryParser::getAnalyzer() { return analyzer; } String QueryParser::getField() { return field; } double QueryParser::getFuzzyMinSim() { return fuzzyMinSim; } void QueryParser::setFuzzyMinSim(double fuzzyMinSim) { this->fuzzyMinSim = fuzzyMinSim; } int32_t QueryParser::getFuzzyPrefixLength() { return fuzzyPrefixLength; } void QueryParser::setFuzzyPrefixLength(int32_t fuzzyPrefixLength) { this->fuzzyPrefixLength = fuzzyPrefixLength; } void QueryParser::setPhraseSlop(int32_t phraseSlop) { this->phraseSlop = phraseSlop; } int32_t QueryParser::getPhraseSlop() { return phraseSlop; } void QueryParser::setAllowLeadingWildcard(bool allowLeadingWildcard) { this->allowLeadingWildcard = allowLeadingWildcard; } bool QueryParser::getAllowLeadingWildcard() { return allowLeadingWildcard; } void QueryParser::setEnablePositionIncrements(bool enable) { this->enablePositionIncrements = enable; } bool QueryParser::getEnablePositionIncrements() { return enablePositionIncrements; } void QueryParser::setDefaultOperator(Operator op) { this->_operator = op; } QueryParser::Operator QueryParser::getDefaultOperator() { return _operator; } void QueryParser::setLowercaseExpandedTerms(bool lowercaseExpandedTerms) { this->lowercaseExpandedTerms = lowercaseExpandedTerms; } bool QueryParser::getLowercaseExpandedTerms() { return lowercaseExpandedTerms; } void QueryParser::setMultiTermRewriteMethod(const RewriteMethodPtr& method) { multiTermRewriteMethod = method; } RewriteMethodPtr QueryParser::getMultiTermRewriteMethod() { return multiTermRewriteMethod; } void QueryParser::setLocale(std::locale locale) { this->locale = locale; } std::locale QueryParser::getLocale() { return locale; } void QueryParser::setDateResolution(DateTools::Resolution dateResolution) { this->dateResolution = dateResolution; } void QueryParser::setDateResolution(const String& fieldName, DateTools::Resolution dateResolution) { if (fieldName.empty()) { boost::throw_exception(IllegalArgumentException(L"Field cannot be empty.")); } if (!fieldToDateResolution) { // lazily initialize Map fieldToDateResolution = MapStringResolution::newInstance(); } fieldToDateResolution.put(fieldName, dateResolution); } DateTools::Resolution QueryParser::getDateResolution(const String& fieldName) { if (fieldName.empty()) { boost::throw_exception(IllegalArgumentException(L"Field cannot be empty.")); } if (!fieldToDateResolution) { // no field specific date resolutions set; return default date resolution instead return this->dateResolution; } MapStringResolution::iterator resolution = fieldToDateResolution.find(fieldName); if (resolution == fieldToDateResolution.end()) { // no date resolutions set for the given field; return default date resolution instead return this->dateResolution; } return resolution->second; } void QueryParser::setRangeCollator(const CollatorPtr& rc) { rangeCollator = rc; } CollatorPtr QueryParser::getRangeCollator() { return rangeCollator; } void QueryParser::addClause(Collection clauses, int32_t conj, int32_t mods, const QueryPtr& q) { bool required = false; bool prohibited = false; // If this term is introduced by AND, make the preceding term required, unless it's already prohibited if (!clauses.empty() && conj == CONJ_AND) { BooleanClausePtr c(clauses[clauses.size() - 1]); if (!c->isProhibited()) { c->setOccur(BooleanClause::MUST); } } if (!clauses.empty() && _operator == AND_OPERATOR && conj == CONJ_OR) { // If this term is introduced by OR, make the preceding term optional, unless it's prohibited (that // means we leave -a OR b but +a OR b-->a OR b) notice if the input is a OR b, first term is parsed // as required; without this modification a OR b would parsed as +a OR b BooleanClausePtr c(clauses[clauses.size() - 1]); if (!c->isProhibited()) { c->setOccur(BooleanClause::SHOULD); } } // We might have been passed a null query; the term might have been filtered away by the analyzer. if (!q) { return; } if (_operator == OR_OPERATOR) { // We set REQUIRED if we're introduced by AND or +; PROHIBITED if introduced by NOT or -; make // sure not to set both. prohibited = (mods == MOD_NOT); required = (mods == MOD_REQ); if (conj == CONJ_AND && !prohibited) { required = true; } } else { // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED if not PROHIBITED and not // introduced by OR prohibited = (mods == MOD_NOT); required = (!prohibited && conj != CONJ_OR); } if (required && !prohibited) { clauses.add(newBooleanClause(q, BooleanClause::MUST)); } else if (!required && !prohibited) { clauses.add(newBooleanClause(q, BooleanClause::SHOULD)); } else if (!required && prohibited) { clauses.add(newBooleanClause(q, BooleanClause::MUST_NOT)); } else { boost::throw_exception(RuntimeException(L"Clause cannot be both required and prohibited")); } } QueryPtr QueryParser::getFieldQuery(const String& field, const String& queryText) { TokenStreamPtr source; try { source = analyzer->reusableTokenStream(field, newLucene(queryText)); source->reset(); } catch (IOException&) { source = analyzer->tokenStream(field, newLucene(queryText)); } CachingTokenFilterPtr buffer(newLucene(source)); TermAttributePtr termAtt; PositionIncrementAttributePtr posIncrAtt; int32_t numTokens = 0; bool success = false; try { buffer->reset(); success = true; } catch (IOException&) { // success == false if we hit an exception } if (success) { if (buffer->hasAttribute()) { termAtt = buffer->getAttribute(); } if (buffer->hasAttribute()) { posIncrAtt = buffer->getAttribute(); } } int32_t positionCount = 0; bool severalTokensAtSamePosition = false; bool hasMoreTokens = false; if (termAtt) { try { hasMoreTokens = buffer->incrementToken(); while (hasMoreTokens) { ++numTokens; int32_t positionIncrement = posIncrAtt ? posIncrAtt->getPositionIncrement() : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } hasMoreTokens = buffer->incrementToken(); } } catch (IOException&) { // ignore } } try { // rewind the buffer stream buffer->reset(); // close original stream - all tokens buffered source->close(); } catch (IOException&) { // ignore } if (numTokens == 0) { return QueryPtr(); } else if (numTokens == 1) { String term; try { bool hasNext = buffer->incrementToken(); BOOST_ASSERT(hasNext); term = termAtt->term(); } catch (IOException&) { // safe to ignore, because we know the number of tokens } return newTermQuery(newLucene(field, term)); } else { if (severalTokensAtSamePosition) { if (positionCount <= 1) { // no phrase query BooleanQueryPtr q(newBooleanQuery(true)); for (int32_t i = 0; i < numTokens; ++i) { String term; try { bool hasNext = buffer->incrementToken(); BOOST_ASSERT(hasNext); term = termAtt->term(); } catch (IOException&) { // safe to ignore, because we know the number of tokens } QueryPtr currentQuery(newTermQuery(newLucene(field, term))); q->add(currentQuery, BooleanClause::SHOULD); } return q; } else { // phrase query MultiPhraseQueryPtr mpq(newMultiPhraseQuery()); mpq->setSlop(phraseSlop); Collection multiTerms(Collection::newInstance()); int32_t position = -1; for (int32_t i = 0; i < numTokens; ++i) { String term; int32_t positionIncrement = 1; try { bool hasNext = buffer->incrementToken(); BOOST_ASSERT(hasNext); term = termAtt->term(); if (posIncrAtt) { positionIncrement = posIncrAtt->getPositionIncrement(); } } catch (IOException&) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && !multiTerms.empty()) { if (enablePositionIncrements) { mpq->add(Collection::newInstance(multiTerms.begin(), multiTerms.end()), position); } else { mpq->add(Collection::newInstance(multiTerms.begin(), multiTerms.end())); } multiTerms.clear(); } position += positionIncrement; multiTerms.add(newLucene(field, term)); } if (enablePositionIncrements) { mpq->add(Collection::newInstance(multiTerms.begin(), multiTerms.end()), position); } else { mpq->add(Collection::newInstance(multiTerms.begin(), multiTerms.end())); } return mpq; } } else { PhraseQueryPtr pq(newPhraseQuery()); pq->setSlop(phraseSlop); int32_t position = -1; for (int32_t i = 0; i < numTokens; ++i) { String term; int32_t positionIncrement = 1; try { bool hasNext = buffer->incrementToken(); BOOST_ASSERT(hasNext); term = termAtt->term(); if (posIncrAtt) { positionIncrement = posIncrAtt->getPositionIncrement(); } } catch (IOException&) { // safe to ignore, because we know the number of tokens } if (enablePositionIncrements) { position += positionIncrement; pq->add(newLucene(field, term), position); } else { pq->add(newLucene(field, term)); } } return pq; } } } QueryPtr QueryParser::getFieldQuery(const String& field, const String& queryText, int32_t slop) { QueryPtr query(getFieldQuery(field, queryText)); if (MiscUtils::typeOf(query)) { boost::dynamic_pointer_cast(query)->setSlop(slop); } if (MiscUtils::typeOf(query)) { boost::dynamic_pointer_cast(query)->setSlop(slop); } return query; } QueryPtr QueryParser::getRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive) { String date1(part1); String date2(part2); if (lowercaseExpandedTerms) { StringUtils::toLower(date1); StringUtils::toLower(date2); } try { boost::posix_time::ptime d1(DateTools::parseDate(date1, locale)); boost::posix_time::ptime d2; // The user can only specify the date, not the time, so make sure the time is set to // the latest possible time of that date to really include all documents if (inclusive) { d2 = boost::posix_time::ptime(DateTools::parseDate(date2, locale) + boost::posix_time::hours(23) + boost::posix_time::minutes(59) + boost::posix_time::seconds(59) + boost::posix_time::millisec(999)); } else { d2 = boost::posix_time::ptime(DateTools::parseDate(date2, locale)); } DateTools::Resolution resolution = getDateResolution(field); if (resolution == DateTools::RESOLUTION_NULL) { // no default or field specific date resolution has been set, use deprecated // DateField to maintain compatibility with pre-1.9 Lucene versions. date1 = DateField::dateToString(d1); date2 = DateField::dateToString(d2); } else { date1 = DateTools::dateToString(d1, resolution); date2 = DateTools::dateToString(d2, resolution); } } catch (...) { } return newRangeQuery(field, date1, date2, inclusive); } BooleanQueryPtr QueryParser::newBooleanQuery(bool disableCoord) { return newLucene(disableCoord); } BooleanClausePtr QueryParser::newBooleanClause(const QueryPtr& q, BooleanClause::Occur occur) { return newLucene(q, occur); } QueryPtr QueryParser::newTermQuery(const TermPtr& term) { return newLucene(term); } PhraseQueryPtr QueryParser::newPhraseQuery() { return newLucene(); } MultiPhraseQueryPtr QueryParser::newMultiPhraseQuery() { return newLucene(); } QueryPtr QueryParser::newPrefixQuery(const TermPtr& prefix) { PrefixQueryPtr query(newLucene(prefix)); query->setRewriteMethod(multiTermRewriteMethod); return query; } QueryPtr QueryParser::newFuzzyQuery(const TermPtr& term, double minimumSimilarity, int32_t prefixLength) { // FuzzyQuery doesn't yet allow constant score rewrite return newLucene(term, minimumSimilarity, prefixLength); } QueryPtr QueryParser::newRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive) { TermRangeQueryPtr query(newLucene(field, part1, part2, inclusive, inclusive, rangeCollator)); query->setRewriteMethod(multiTermRewriteMethod); return query; } QueryPtr QueryParser::newMatchAllDocsQuery() { return newLucene(); } QueryPtr QueryParser::newWildcardQuery(const TermPtr& term) { WildcardQueryPtr query(newLucene(term)); query->setRewriteMethod(multiTermRewriteMethod); return query; } QueryPtr QueryParser::getBooleanQuery(Collection clauses) { return getBooleanQuery(clauses, false); } QueryPtr QueryParser::getBooleanQuery(Collection clauses, bool disableCoord) { if (clauses.empty()) { return QueryPtr(); // all clause words were filtered away by the analyzer. } BooleanQueryPtr query(newBooleanQuery(disableCoord)); for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { query->add(*clause); } return query; } QueryPtr QueryParser::getWildcardQuery(const String& field, const String& termStr) { if (field == L"*" && termStr == L"*") { return newMatchAllDocsQuery(); } if (!allowLeadingWildcard && (boost::starts_with(termStr, L"*") || boost::starts_with(termStr, L"?"))) { boost::throw_exception(QueryParserError(L"'*' or '?' not allowed as first character in WildcardQuery")); } String queryTerm(termStr); if (lowercaseExpandedTerms) { StringUtils::toLower(queryTerm); } TermPtr term(newLucene(field, queryTerm)); return newWildcardQuery(term); } QueryPtr QueryParser::getPrefixQuery(const String& field, const String& termStr) { if (!allowLeadingWildcard && boost::starts_with(termStr, L"*")) { boost::throw_exception(QueryParserError(L"'*' not allowed as first character in PrefixQuery")); } String queryTerm(termStr); if (lowercaseExpandedTerms) { StringUtils::toLower(queryTerm); } TermPtr term(newLucene(field, queryTerm)); return newPrefixQuery(term); } QueryPtr QueryParser::getFuzzyQuery(const String& field, const String& termStr, double minSimilarity) { String queryTerm(termStr); if (lowercaseExpandedTerms) { StringUtils::toLower(queryTerm); } TermPtr term(newLucene(field, queryTerm)); return newFuzzyQuery(term, minSimilarity, fuzzyPrefixLength); } String QueryParser::discardEscapeChar(const String& input) { // Create char array to hold unescaped char sequence CharArray output(CharArray::newInstance(input.length())); // The length of the output can be less than the input due to discarded escape chars. // This variable holds the actual length of the output int32_t length = 0; // We remember whether the last processed character was an escape character bool lastCharWasEscapeChar = false; // The multiplier the current unicode digit must be multiplied with. eg. the first digit must // be multiplied with 16^3, the second with 16^2 int32_t codePointMultiplier = 0; // Used to calculate the codepoint of the escaped unicode character int32_t codePoint = 0; for (int32_t i = 0; i < (int32_t)input.length(); ++i) { wchar_t curChar = input[i]; if (codePointMultiplier > 0) { codePoint += hexToInt(curChar) * codePointMultiplier; codePointMultiplier = MiscUtils::unsignedShift(codePointMultiplier, 4); if (codePointMultiplier == 0) { output[length++] = (wchar_t)codePoint; codePoint = 0; } } else if (lastCharWasEscapeChar) { if (curChar == L'u') { // found an escaped unicode character codePointMultiplier = 16 * 16 * 16; } else { // this character was escaped output[length++] = curChar; } lastCharWasEscapeChar = false; } else { if (curChar == L'\\') { lastCharWasEscapeChar = true; } else { output[length++] = curChar; } } } if (codePointMultiplier > 0) { boost::throw_exception(QueryParserError(L"Truncated unicode escape sequence.")); } if (lastCharWasEscapeChar) { boost::throw_exception(QueryParserError(L"Term can not end with escape character.")); } return String(output.get(), length); } int32_t QueryParser::hexToInt(wchar_t c) { if (L'0' <= c && c <= L'9') { return c - L'0'; } else if (L'a' <= c && c <= L'f') { return c - L'a' + 10; } else if (L'A' <= c && c <= L'F') { return c - L'A' + 10; } else { boost::throw_exception(QueryParserError(L"None-hex character in unicode escape sequence: " + StringUtils::toString(c))); return 0; } } String QueryParser::escape(const String& s) { StringStream buffer; for (int32_t i = 0; i < (int32_t)s.length(); ++i) { wchar_t c = s[i]; // These characters are part of the query syntax and must be escaped if (c == L'\\' || c == L'+' || c == L'-' || c == L'!' || c == L'(' || c == L')' || c == L':' || c == L'^' || c == L'[' || c == L']' || c == L'\"' || c == L'{' || c == L'}' || c == L'~' || c == L'*' || c == L'?' || c == L'|' || c == L'&') { buffer << L"\\"; } buffer << c; } return buffer.str(); } int QueryParser::main(Collection args) { if (args.empty()) { std::wcout << L"Usage: QueryParser "; return 1; } QueryParserPtr qp(newLucene(LuceneVersion::LUCENE_CURRENT, L"field", newLucene())); QueryPtr q(qp->parse(args[0])); std::wcout << q->toString(L"field"); return 0; } int32_t QueryParser::Conjunction() { int32_t ret = CONJ_NONE; switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case AND: case OR: switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case AND: jj_consume_token(AND); ret = CONJ_AND; break; case OR: jj_consume_token(OR); ret = CONJ_OR; break; default: jj_la1[0] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } break; default: jj_la1[1] = jj_gen; } return ret; } int32_t QueryParser::Modifiers() { int32_t ret = MOD_NONE; switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case NOT: case PLUS: case MINUS: switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case PLUS: jj_consume_token(PLUS); ret = MOD_REQ; break; case MINUS: jj_consume_token(MINUS); ret = MOD_NOT; break; case NOT: jj_consume_token(NOT); ret = MOD_NOT; break; default: jj_la1[2] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } break; default: jj_la1[3] = jj_gen; } return ret; } QueryPtr QueryParser::TopLevelQuery(const String& field) { QueryPtr q(ParseQuery(field)); jj_consume_token(0); return q; } QueryPtr QueryParser::ParseQuery(const String& field) { Collection clauses(Collection::newInstance()); QueryPtr firstQuery; int32_t mods = Modifiers(); QueryPtr q(ParseClause(field)); addClause(clauses, CONJ_NONE, mods, q); if (mods == MOD_NONE) { firstQuery = q; } for (bool more = true; more; ) { switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case AND: case OR: case NOT: case PLUS: case MINUS: case LPAREN: case STAR: case QUOTED: case TERM: case PREFIXTERM: case WILDTERM: case RANGEIN_START: case RANGEEX_START: case NUMBER: break; default: jj_la1[4] = jj_gen; more = false; continue; } int32_t conj = Conjunction(); mods = Modifiers(); q = ParseClause(field); addClause(clauses, conj, mods, q); } if (clauses.size() == 1 && firstQuery) { return firstQuery; } else { return getBooleanQuery(clauses); } } QueryPtr QueryParser::ParseClause(const String& field) { QueryPtr q; QueryParserTokenPtr fieldToken; QueryParserTokenPtr boost; String fieldClause(field); if (jj_2_1(2)) { switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case TERM: fieldToken = jj_consume_token(TERM); jj_consume_token(COLON); fieldClause = discardEscapeChar(fieldToken->image); break; case STAR: jj_consume_token(STAR); jj_consume_token(COLON); fieldClause = L"*"; break; default: jj_la1[5] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } } switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case STAR: case QUOTED: case TERM: case PREFIXTERM: case WILDTERM: case RANGEIN_START: case RANGEEX_START: case NUMBER: q = ParseTerm(fieldClause); break; case LPAREN: jj_consume_token(LPAREN); q = ParseQuery(fieldClause); jj_consume_token(RPAREN); switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case CARAT: jj_consume_token(CARAT); boost = jj_consume_token(NUMBER); break; default: jj_la1[6] = jj_gen; } break; default: jj_la1[7] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } if (boost) { double f = 1.0; try { if (q) { f = StringUtils::toDouble(boost->image); q->setBoost(f); } } catch (...) { } } return q; } QueryPtr QueryParser::ParseTerm(const String& field) { QueryParserTokenPtr term; QueryParserTokenPtr boost; QueryParserTokenPtr fuzzySlop; QueryParserTokenPtr goop1; QueryParserTokenPtr goop2; bool prefix = false; bool wildcard = false; bool fuzzy = false; QueryPtr q; switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case STAR: case TERM: case PREFIXTERM: case WILDTERM: case NUMBER: switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case TERM: term = jj_consume_token(TERM); break; case STAR: term = jj_consume_token(STAR); wildcard = true; break; case PREFIXTERM: term = jj_consume_token(PREFIXTERM); prefix = true; break; case WILDTERM: term = jj_consume_token(WILDTERM); wildcard = true; break; case NUMBER: term = jj_consume_token(NUMBER); break; default: jj_la1[8] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case FUZZY_SLOP: fuzzySlop = jj_consume_token(FUZZY_SLOP); fuzzy = true; break; default: jj_la1[9] = jj_gen; } switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case CARAT: jj_consume_token(CARAT); boost = jj_consume_token(NUMBER); switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case FUZZY_SLOP: fuzzySlop = jj_consume_token(FUZZY_SLOP); fuzzy = true; break; default: jj_la1[10] = jj_gen; } break; default: jj_la1[11] = jj_gen; } { String termImage(discardEscapeChar(term->image)); if (wildcard) { q = getWildcardQuery(field, termImage); } else if (prefix) { q = getPrefixQuery(field, discardEscapeChar(term->image.substr(0, term->image.length() - 1))); } else if (fuzzy) { double fms = fuzzyMinSim; try { fms = StringUtils::toDouble(fuzzySlop->image.substr(1)); } catch (...) { } if (fms < 0.0 || fms > 1.0) { boost::throw_exception(QueryParserError(L"Minimum similarity for a FuzzyQuery has to be between 0.0 and 1.0")); } q = getFuzzyQuery(field, termImage, fms); } else { q = getFieldQuery(field, termImage); } } break; case RANGEIN_START: jj_consume_token(RANGEIN_START); switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case RANGEIN_GOOP: goop1 = jj_consume_token(RANGEIN_GOOP); break; case RANGEIN_QUOTED: goop1 = jj_consume_token(RANGEIN_QUOTED); break; default: jj_la1[12] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case RANGEIN_TO: jj_consume_token(RANGEIN_TO); break; default: jj_la1[13] = jj_gen; } switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case RANGEIN_GOOP: goop2 = jj_consume_token(RANGEIN_GOOP); break; case RANGEIN_QUOTED: goop2 = jj_consume_token(RANGEIN_QUOTED); break; default: jj_la1[14] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } jj_consume_token(RANGEIN_END); switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case CARAT: jj_consume_token(CARAT); boost = jj_consume_token(NUMBER); break; default: jj_la1[15] = jj_gen; } if (goop1->kind == RANGEIN_QUOTED) { goop1->image = goop1->image.substr(1, std::max((int32_t)0, (int32_t)goop1->image.length() - 2)); } if (goop2->kind == RANGEIN_QUOTED) { goop2->image = goop2->image.substr(1, std::max((int32_t)0, (int32_t)goop2->image.length() - 2)); } q = getRangeQuery(field, discardEscapeChar(goop1->image), discardEscapeChar(goop2->image), true); break; case RANGEEX_START: jj_consume_token(RANGEEX_START); switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case RANGEEX_GOOP: goop1 = jj_consume_token(RANGEEX_GOOP); break; case RANGEEX_QUOTED: goop1 = jj_consume_token(RANGEEX_QUOTED); break; default: jj_la1[16] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case RANGEEX_TO: jj_consume_token(RANGEEX_TO); break; default: jj_la1[17] = jj_gen; } switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case RANGEEX_GOOP: goop2 = jj_consume_token(RANGEEX_GOOP); break; case RANGEEX_QUOTED: goop2 = jj_consume_token(RANGEEX_QUOTED); break; default: jj_la1[18] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } jj_consume_token(RANGEEX_END); switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case CARAT: jj_consume_token(CARAT); boost = jj_consume_token(NUMBER); break; default: jj_la1[19] = jj_gen; } if (goop1->kind == RANGEEX_QUOTED) { goop1->image = goop1->image.substr(1, std::max((int32_t)0, (int32_t)goop1->image.length() - 2)); } if (goop2->kind == RANGEEX_QUOTED) { goop2->image = goop2->image.substr(1, std::max((int32_t)0, (int32_t)goop2->image.length() - 2)); } q = getRangeQuery(field, discardEscapeChar(goop1->image), discardEscapeChar(goop2->image), false); break; case QUOTED: term = jj_consume_token(QUOTED); switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case FUZZY_SLOP: fuzzySlop = jj_consume_token(FUZZY_SLOP); break; default: jj_la1[20] = jj_gen; } switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { case CARAT: jj_consume_token(CARAT); boost = jj_consume_token(NUMBER); break; default: jj_la1[21] = jj_gen; } { int32_t s = phraseSlop; if (fuzzySlop) { try { s = StringUtils::toInt(fuzzySlop->image.substr(1)); } catch (...) { } } q = getFieldQuery(field, discardEscapeChar(term->image.substr(1, std::max((int32_t)0, (int32_t)term->image.length() - 2))), s); } break; default: jj_la1[22] = jj_gen; jj_consume_token(-1); boost::throw_exception(QueryParserError()); } if (boost) { double f = 1.0; try { f = StringUtils::toDouble(boost->image); } catch (...) { } // avoid boosting null queries, such as those caused by stop words if (q) { q->setBoost(f); } } return q; } bool QueryParser::jj_2_1(int32_t xla) { jj_la = xla; jj_scanpos = token; jj_lastpos = jj_scanpos; bool _jj_2_1 = false; LuceneException finally; try { _jj_2_1 = !jj_3_1(); } catch (LookaheadSuccess&) { _jj_2_1 = true; } catch (LuceneException& e) { finally = e; } jj_save(0, xla); finally.throwException(); return _jj_2_1; } bool QueryParser::jj_3R_2() { if (jj_scan_token(TERM)) { return true; } if (jj_scan_token(COLON)) { return true; } return false; } bool QueryParser::jj_3_1() { QueryParserTokenPtr xsp(jj_scanpos); if (jj_3R_2()) { jj_scanpos = xsp; if (jj_3R_3()) { return true; } } return false; } bool QueryParser::jj_3R_3() { if (jj_scan_token(STAR)) { return true; } if (jj_scan_token(COLON)) { return true; } return false; } void QueryParser::ReInit(const QueryParserCharStreamPtr& stream) { token_source->ReInit(stream); token = newLucene(); _jj_ntk = -1; jj_gen = 0; for (int32_t i = 0; i < 23; ++i) { jj_la1[i] = -1; } for (int32_t i = 0; i < jj_2_rtns.size(); ++i) { jj_2_rtns[i] = newInstance(); } } void QueryParser::ReInit(const QueryParserTokenManagerPtr& tokenMgr) { token_source = tokenMgr; token = newLucene(); _jj_ntk = -1; jj_gen = 0; for (int32_t i = 0; i < 23; ++i) { jj_la1[i] = -1; } for (int32_t i = 0; i < jj_2_rtns.size(); ++i) { jj_2_rtns[i] = newInstance(); } } QueryParserTokenPtr QueryParser::jj_consume_token(int32_t kind) { QueryParserTokenPtr oldToken(token); if (oldToken->next) { token = token->next; } else { token->next = token_source->getNextToken(); token = token->next; } _jj_ntk = -1; if (token->kind == kind) { ++jj_gen; if (++jj_gc > 100) { jj_gc = 0; for (int32_t i = 0; i < jj_2_rtns.size(); ++i) { JJCallsPtr c(jj_2_rtns[i]); while (c) { if (c->gen < jj_gen) { c->first.reset(); } c = c->next; } } } return token; } token = oldToken; jj_kind = kind; generateParseException(); return QueryParserTokenPtr(); } bool QueryParser::jj_scan_token(int32_t kind) { if (jj_scanpos == jj_lastpos) { --jj_la; if (!jj_scanpos->next) { jj_scanpos->next = token_source->getNextToken(); jj_scanpos = jj_scanpos->next; jj_lastpos = jj_scanpos; } else { jj_scanpos = jj_scanpos->next; jj_lastpos = jj_scanpos; } } else { jj_scanpos = jj_scanpos->next; } if (jj_rescan) { int32_t i = 0; QueryParserTokenPtr tok(token); while (tok && tok != jj_scanpos) { ++i; tok = tok->next; } if (tok) { jj_add_error_token(kind, i); } } if (jj_scanpos->kind != kind) { return true; } if (jj_la == 0 && jj_scanpos == jj_lastpos) { boost::throw_exception(LookaheadSuccess()); } return false; } QueryParserTokenPtr QueryParser::getNextToken() { if (token->next) { token = token->next; } else { token->next = token_source->getNextToken(); token = token->next; } _jj_ntk = -1; ++jj_gen; return token; } QueryParserTokenPtr QueryParser::getToken(int32_t index) { QueryParserTokenPtr t(token); for (int32_t i = 0; i < index; ++i) { if (t->next) { t = t->next; } else { t->next = token_source->getNextToken(); t = t->next; } } return t; } int32_t QueryParser::jj_ntk() { jj_nt = token->next; if (!jj_nt) { token->next = token_source->getNextToken(); _jj_ntk = token->next->kind; return _jj_ntk; } else { _jj_ntk = jj_nt->kind; return _jj_ntk; } } void QueryParser::jj_add_error_token(int32_t kind, int32_t pos) { if (pos >= 100) { return; } if (pos == jj_endpos + 1) { jj_lasttokens[jj_endpos++] = kind; } else if (jj_endpos != 0) { jj_expentry = Collection::newInstance(jj_endpos); for (int32_t i = 0; i < jj_endpos; ++i) { jj_expentry[i] = jj_lasttokens[i]; } for (Collection< Collection >::iterator oldentry = jj_expentries.begin(); oldentry != jj_expentries.end(); ++oldentry) { if (oldentry->size() == jj_expentry.size()) { bool jj_entries_loop = true; for (int32_t i = 0; i < jj_expentry.size(); ++i) { if ((*oldentry)[i] != jj_expentry[i]) { jj_entries_loop = false; break; } } if (!jj_entries_loop) { continue; } jj_expentries.add(jj_expentry); break; } } if (pos != 0) { jj_endpos = pos; jj_lasttokens[jj_endpos - 1] = kind; } } } void QueryParser::generateParseException() { jj_expentries.clear(); Collection la1tokens(Collection::newInstance(34)); if (jj_kind >= 0) { la1tokens[jj_kind] = true; jj_kind = -1; } for (int32_t i = 0; i < 23; ++i) { if (jj_la1[i] == jj_gen) { for (int32_t j = 0; j < 32; ++j) { if ((jj_la1_0[i] & (1 << j)) != 0) { la1tokens[j] = true; } if ((jj_la1_1[i] & (1 << j)) != 0) { la1tokens[32 + j] = true; } } } } for (int32_t i = 0; i < 34; ++i) { if (la1tokens[i]) { jj_expentry = Collection::newInstance(1); jj_expentry[0] = i; jj_expentries.add(jj_expentry); } } jj_endpos = 0; jj_rescan_token(); jj_add_error_token(0, 0); Collection< Collection > exptokseq(Collection< Collection >::newInstance(jj_expentries.size())); for (int32_t i = 0; i < jj_expentries.size(); ++i) { exptokseq[i] = jj_expentries[i]; } boost::throw_exception(QueryParserError(QueryParseError::parseError(token, exptokseq, tokenImage))); } void QueryParser::enable_tracing() { } void QueryParser::disable_tracing() { } void QueryParser::jj_rescan_token() { jj_rescan = true; for (int32_t i = 0; i < 1; ++i) { try { JJCallsPtr p(jj_2_rtns[i]); do { if (p->gen > jj_gen) { jj_la = p->arg; jj_scanpos = p->first; jj_lastpos = jj_scanpos; jj_3_1(); } p = p->next; } while (p); } catch (LookaheadSuccess&) { } } jj_rescan = false; } void QueryParser::jj_save(int32_t index, int32_t xla) { JJCallsPtr p(jj_2_rtns[index]); while (p->gen > jj_gen) { if (!p->next) { p->next = newInstance(); p = p->next; break; } p = p->next; } p->gen = jj_gen + xla - jj_la; p->first = token; p->arg = xla; } } LucenePlusPlus-rel_3.0.9/src/core/queryparser/QueryParserCharStream.cpp000066400000000000000000000031301456444476200263660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "QueryParserCharStream.h" namespace Lucene { wchar_t QueryParserCharStream::readChar() { BOOST_ASSERT(false); return 0; // override } int32_t QueryParserCharStream::getColumn() { BOOST_ASSERT(false); return 0; // override } int32_t QueryParserCharStream::getLine() { BOOST_ASSERT(false); return 0; // override } int32_t QueryParserCharStream::getEndColumn() { BOOST_ASSERT(false); return 0; // override } int32_t QueryParserCharStream::getEndLine() { BOOST_ASSERT(false); return 0; // override } int32_t QueryParserCharStream::getBeginColumn() { BOOST_ASSERT(false); return 0; // override } int32_t QueryParserCharStream::getBeginLine() { BOOST_ASSERT(false); return 0; // override } void QueryParserCharStream::backup(int32_t amount) { BOOST_ASSERT(false); // override } wchar_t QueryParserCharStream::BeginToken() { BOOST_ASSERT(false); return 0; // override } String QueryParserCharStream::GetImage() { BOOST_ASSERT(false); return L""; // override } CharArray QueryParserCharStream::GetSuffix(int32_t length) { BOOST_ASSERT(false); return CharArray(); // override } void QueryParserCharStream::Done() { BOOST_ASSERT(false); // override } } LucenePlusPlus-rel_3.0.9/src/core/queryparser/QueryParserConstants.cpp000066400000000000000000000024571456444476200263240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "QueryParserConstants.h" namespace Lucene { const wchar_t* QueryParserConstants::_tokenImage[] = { L"", L"<_NUM_CHAR>", L"<_ESCAPED_CHAR>", L"<_TERM_START_CHAR>", L"<_TERM_CHAR>", L"<_WHITESPACE>", L"<_QUOTED_CHAR>", L"", L"", L"", L"", L"\"+\"", L"\"-\"", L"\"(\"", L"\")\"", L"\":\"", L"\"*\"", L"\"^\"", L"", L"", L"", L"", L"", L"\"[\"", L"\"{\"", L"", L"\"TO\"", L"\"]\"", L"", L"", L"\"TO\"", L"\"}\"", L"", L"" }; Collection QueryParserConstants::tokenImage = Collection::newInstance(_tokenImage, _tokenImage + SIZEOF_ARRAY(_tokenImage)); QueryParserConstants::QueryParserConstants() { } QueryParserConstants::~QueryParserConstants() { } } LucenePlusPlus-rel_3.0.9/src/core/queryparser/QueryParserToken.cpp000066400000000000000000000015641456444476200254260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "QueryParserToken.h" namespace Lucene { QueryParserToken::QueryParserToken(int32_t kind, const String& image) { this->kind = kind; this->image = image; this->beginLine = 0; this->beginColumn = 0; this->endLine = 0; this->endColumn = 0; } QueryParserToken::~QueryParserToken() { } String QueryParserToken::toString() { return image; } QueryParserTokenPtr QueryParserToken::newToken(int32_t ofKind, const String& image) { return newLucene(ofKind, image); } } LucenePlusPlus-rel_3.0.9/src/core/queryparser/QueryParserTokenManager.cpp000066400000000000000000001215151456444476200267200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "QueryParserTokenManager.h" #include "QueryParserCharStream.h" #include "QueryParserToken.h" #include "QueryParseError.h" #include "InfoStream.h" #include "StringUtils.h" namespace Lucene { const int64_t QueryParserTokenManager::jjbitVec0[] = {0x1LL, 0x0LL, 0x0LL, 0x0LL}; const int64_t QueryParserTokenManager::jjbitVec1[] = {static_cast(0xfffffffffffffffeLL), static_cast(0xffffffffffffffffLL), static_cast(0xffffffffffffffffLL), static_cast(0xffffffffffffffffLL)}; const int64_t QueryParserTokenManager::jjbitVec3[] = {0x0LL, 0x0LL, static_cast(0xffffffffffffffffLL), static_cast(0xffffffffffffffffLL)}; const int64_t QueryParserTokenManager::jjbitVec4[] = {static_cast(0xfffefffffffffffeLL), static_cast(0xffffffffffffffffLL), static_cast(0xffffffffffffffffLL), static_cast(0xffffffffffffffffLL)}; const int32_t QueryParserTokenManager::jjnextStates[] = {15, 16, 18, 29, 32, 23, 33, 30, 20, 21, 32, 23, 33, 31, 34, 27, 2, 4, 5, 0, 1}; /// Token literal values. const wchar_t* QueryParserTokenManager::jjstrLiteralImages[] = { L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"\53", L"\55", L"\50", L"\51", L"\72", L"\52", L"\136", L"", L"", L"", L"", L"", L"\133", L"\173", L"", L"\124\117", L"\135", L"", L"", L"\124\117", L"\175", L"", L"" }; /// Lexer state names. const wchar_t* QueryParserTokenManager::lexStateNames[] = { L"Boost", L"RangeEx", L"RangeIn", L"DEFAULT" }; /// Lex State array. const int32_t QueryParserTokenManager::jjnewLexState[] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1, 3, -1, 3, -1, -1, -1, 3, -1, -1 }; const int64_t QueryParserTokenManager::jjtoToken[] = {0x3ffffff01LL}; const int64_t QueryParserTokenManager::jjtoSkip[] = {0x80LL}; QueryParserTokenManager::QueryParserTokenManager(const QueryParserCharStreamPtr& stream) { debugStream = newLucene(); jjrounds = IntArray::newInstance(36); jjstateSet = IntArray::newInstance(72); curChar = 0; curLexState = 3; defaultLexState = 3; jjnewStateCnt = 0; jjround = 0; jjmatchedPos = 0; jjmatchedKind = 0; input_stream = stream; } QueryParserTokenManager::QueryParserTokenManager(const QueryParserCharStreamPtr& stream, int32_t lexState) { debugStream = newLucene(); jjrounds = IntArray::newInstance(36); jjstateSet = IntArray::newInstance(72); input_stream = stream; curChar = 0; curLexState = 3; defaultLexState = 3; jjnewStateCnt = 0; jjround = 0; jjmatchedPos = 0; jjmatchedKind = 0; SwitchTo(lexState); } QueryParserTokenManager::~QueryParserTokenManager() { } void QueryParserTokenManager::setDebugStream(const InfoStreamPtr& debugStream) { this->debugStream = debugStream; } int32_t QueryParserTokenManager::jjStopStringLiteralDfa_3(int32_t pos, int64_t active0) { return -1; } int32_t QueryParserTokenManager::jjStartNfa_3(int32_t pos, int64_t active0) { return jjMoveNfa_3(jjStopStringLiteralDfa_3(pos, active0), pos + 1); } int32_t QueryParserTokenManager::jjStopAtPos(int32_t pos, int32_t kind) { jjmatchedKind = kind; jjmatchedPos = pos; return pos + 1; } int32_t QueryParserTokenManager::jjMoveStringLiteralDfa0_3() { switch (curChar) { case 40: return jjStopAtPos(0, 13); case 41: return jjStopAtPos(0, 14); case 42: return jjStartNfaWithStates_3(0, 16, 36); case 43: return jjStopAtPos(0, 11); case 45: return jjStopAtPos(0, 12); case 58: return jjStopAtPos(0, 15); case 91: return jjStopAtPos(0, 23); case 94: return jjStopAtPos(0, 17); case 123: return jjStopAtPos(0, 24); default: return jjMoveNfa_3(0, 0); } } int32_t QueryParserTokenManager::jjStartNfaWithStates_3(int32_t pos, int32_t kind, int32_t state) { jjmatchedKind = kind; jjmatchedPos = pos; try { curChar = input_stream->readChar(); } catch (IOException&) { return pos + 1; } return jjMoveNfa_3(state, pos + 1); } int32_t QueryParserTokenManager::jjMoveNfa_3(int32_t startState, int32_t curPos) { int32_t startsAt = 0; jjnewStateCnt = 36; int32_t i = 1; jjstateSet[0] = startState; int32_t kind = 0x7fffffff; while (true) { if (++jjround == 0x7fffffff) { ReInitRounds(); } if (curChar < 64) { int64_t l = (int64_t)1 << curChar; do { switch (jjstateSet[--i]) { case 36: case 25: if ((0xfbfffcf8ffffd9ffLL & l) == 0) { break; } if (kind > 22) { kind = 22; } jjCheckNAddTwoStates(25, 26); break; case 0: if ((0xfbffd4f8ffffd9ffLL & l) != 0) { if (kind > 22) { kind = 22; } jjCheckNAddTwoStates(25, 26); } else if ((0x100002600LL & l) != 0) { if (kind > 7) { kind = 7; } } else if (curChar == 34) { jjCheckNAddStates(0, 2); } else if (curChar == 33) { if (kind > 10) { kind = 10; } } if ((0x7bffd0f8ffffd9ffLL & l) != 0) { if (kind > 19) { kind = 19; } jjCheckNAddStates(3, 7); } else if (curChar == 42) { if (kind > 21) { kind = 21; } } if (curChar == 38) { jjstateSet[jjnewStateCnt++] = 4; } break; case 4: if (curChar == 38 && kind > 8) { kind = 8; } break; case 5: if (curChar == 38) { jjstateSet[jjnewStateCnt++] = 4; } break; case 13: if (curChar == 33 && kind > 10) { kind = 10; } break; case 14: if (curChar == 34) { jjCheckNAddStates(0, 2); } break; case 15: if ((0xfffffffbffffffffLL & l) != 0) { jjCheckNAddStates(0, 2); } break; case 17: jjCheckNAddStates(0, 2); break; case 18: if (curChar == 34 && kind > 18) { kind = 18; } break; case 20: if ((0x3ff000000000000LL & l) == 0) { break; } if (kind > 20) { kind = 20; } jjAddStates(8, 9); break; case 21: if (curChar == 46) { jjCheckNAdd(22); } break; case 22: if ((0x3ff000000000000LL & l) == 0) { break; } if (kind > 20) { kind = 20; } jjCheckNAdd(22); break; case 23: if (curChar == 42 && kind > 21) { kind = 21; } break; case 24: if ((0xfbffd4f8ffffd9ffLL & l) == 0) { break; } if (kind > 22) { kind = 22; } jjCheckNAddTwoStates(25, 26); break; case 27: if (kind > 22) { kind = 22; } jjCheckNAddTwoStates(25, 26); break; case 28: if ((0x7bffd0f8ffffd9ffLL & l) == 0) { break; } if (kind > 19) { kind = 19; } jjCheckNAddStates(3, 7); break; case 29: if ((0x7bfff8f8ffffd9ffLL & l) == 0) { break; } if (kind > 19) { kind = 19; } jjCheckNAddTwoStates(29, 30); break; case 31: if (kind > 19) { kind = 19; } jjCheckNAddTwoStates(29, 30); break; case 32: if ((0x7bfff8f8ffffd9ffLL & l) != 0) { jjCheckNAddStates(10, 12); } break; case 34: jjCheckNAddStates(10, 12); break; default: break; } } while (i != startsAt); } else if (curChar < 128) { int64_t l = (int64_t)1 << (curChar & 077); do { switch (jjstateSet[--i]) { case 36: if ((0x97ffffff87ffffffLL & l) != 0) { if (kind > 22) { kind = 22; } jjCheckNAddTwoStates(25, 26); } else if (curChar == 92) { jjCheckNAddTwoStates(27, 27); } break; case 0: if ((0x97ffffff87ffffffLL & l) != 0) { if (kind > 19) { kind = 19; } jjCheckNAddStates(3, 7); } else if (curChar == 92) { jjCheckNAddStates(13, 15); } else if (curChar == 126) { if (kind > 20) { kind = 20; } jjstateSet[jjnewStateCnt++] = 20; } if ((0x97ffffff87ffffffLL & l) != 0) { if (kind > 22) { kind = 22; } jjCheckNAddTwoStates(25, 26); } if (curChar == 78) { jjstateSet[jjnewStateCnt++] = 11; } else if (curChar == 124) { jjstateSet[jjnewStateCnt++] = 8; } else if (curChar == 79) { jjstateSet[jjnewStateCnt++] = 6; } else if (curChar == 65) { jjstateSet[jjnewStateCnt++] = 2; } break; case 1: if (curChar == 68 && kind > 8) { kind = 8; } break; case 2: if (curChar == 78) { jjstateSet[jjnewStateCnt++] = 1; } break; case 3: if (curChar == 65) { jjstateSet[jjnewStateCnt++] = 2; } break; case 6: if (curChar == 82 && kind > 9) { kind = 9; } break; case 7: if (curChar == 79) { jjstateSet[jjnewStateCnt++] = 6; } break; case 8: if (curChar == 124 && kind > 9) { kind = 9; } break; case 9: if (curChar == 124) { jjstateSet[jjnewStateCnt++] = 8; } break; case 10: if (curChar == 84 && kind > 10) { kind = 10; } break; case 11: if (curChar == 79) { jjstateSet[jjnewStateCnt++] = 10; } break; case 12: if (curChar == 78) { jjstateSet[jjnewStateCnt++] = 11; } break; case 15: if ((0xffffffffefffffffLL & l) != 0) { jjCheckNAddStates(0, 2); } break; case 16: if (curChar == 92) { jjstateSet[jjnewStateCnt++] = 17; } break; case 17: jjCheckNAddStates(0, 2); break; case 19: if (curChar != 126) { break; } if (kind > 20) { kind = 20; } jjstateSet[jjnewStateCnt++] = 20; break; case 24: if ((0x97ffffff87ffffffLL & l) == 0) { break; } if (kind > 22) { kind = 22; } jjCheckNAddTwoStates(25, 26); break; case 25: if ((0x97ffffff87ffffffLL & l) == 0) { break; } if (kind > 22) { kind = 22; } jjCheckNAddTwoStates(25, 26); break; case 26: if (curChar == 92) { jjCheckNAddTwoStates(27, 27); } break; case 27: if (kind > 22) { kind = 22; } jjCheckNAddTwoStates(25, 26); break; case 28: if ((0x97ffffff87ffffffLL & l) == 0) { break; } if (kind > 19) { kind = 19; } jjCheckNAddStates(3, 7); break; case 29: if ((0x97ffffff87ffffffLL & l) == 0) { break; } if (kind > 19) { kind = 19; } jjCheckNAddTwoStates(29, 30); break; case 30: if (curChar == 92) { jjCheckNAddTwoStates(31, 31); } break; case 31: if (kind > 19) { kind = 19; } jjCheckNAddTwoStates(29, 30); break; case 32: if ((0x97ffffff87ffffffLL & l) != 0) { jjCheckNAddStates(10, 12); } break; case 33: if (curChar == 92) { jjCheckNAddTwoStates(34, 34); } break; case 34: jjCheckNAddStates(10, 12); break; case 35: if (curChar == 92) { jjCheckNAddStates(13, 15); } break; default: break; } } while (i != startsAt); } else { int32_t hiByte = (int32_t)(curChar >> 8); int32_t i1 = hiByte >> 6; int64_t l1 = (int64_t)1 << (hiByte & 077); int32_t i2 = (curChar & 0xff) >> 6; int64_t l2 = (int64_t)1 << (curChar & 077); do { switch (jjstateSet[--i]) { case 36: case 25: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) { break; } if (kind > 22) { kind = 22; } jjCheckNAddTwoStates(25, 26); break; case 0: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { if (kind > 7) { kind = 7; } } if (jjCanMove_2(hiByte, i1, i2, l1, l2)) { if (kind > 22) { kind = 22; } jjCheckNAddTwoStates(25, 26); } if (jjCanMove_2(hiByte, i1, i2, l1, l2)) { if (kind > 19) { kind = 19; } jjCheckNAddStates(3, 7); } break; case 15: case 17: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { jjCheckNAddStates(0, 2); } break; case 24: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) { break; } if (kind > 22) { kind = 22; } jjCheckNAddTwoStates(25, 26); break; case 27: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) { break; } if (kind > 22) { kind = 22; } jjCheckNAddTwoStates(25, 26); break; case 28: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) { break; } if (kind > 19) { kind = 19; } jjCheckNAddStates(3, 7); break; case 29: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) { break; } if (kind > 19) { kind = 19; } jjCheckNAddTwoStates(29, 30); break; case 31: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) { break; } if (kind > 19) { kind = 19; } jjCheckNAddTwoStates(29, 30); break; case 32: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) { jjCheckNAddStates(10, 12); } break; case 34: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { jjCheckNAddStates(10, 12); } break; default: break; } } while (i != startsAt); } if (kind != 0x7fffffff) { jjmatchedKind = kind; jjmatchedPos = curPos; kind = 0x7fffffff; } ++curPos; i = jjnewStateCnt; jjnewStateCnt = startsAt; if (i == (startsAt = 36 - jjnewStateCnt)) { return curPos; } try { curChar = input_stream->readChar(); } catch (IOException&) { return curPos; } } } int32_t QueryParserTokenManager::jjStopStringLiteralDfa_1(int32_t pos, int64_t active0) { switch (pos) { case 0: if ((active0 & 0x40000000LL) != 0) { jjmatchedKind = 33; return 6; } return -1; default: return -1; } } int32_t QueryParserTokenManager::jjStartNfa_1(int32_t pos, int64_t active0) { return jjMoveNfa_1(jjStopStringLiteralDfa_1(pos, active0), pos + 1); } int32_t QueryParserTokenManager::jjMoveStringLiteralDfa0_1() { switch (curChar) { case 84: return jjMoveStringLiteralDfa1_1(0x40000000LL); case 125: return jjStopAtPos(0, 31); default: return jjMoveNfa_1(0, 0); } } int32_t QueryParserTokenManager::jjMoveStringLiteralDfa1_1(int64_t active0) { try { curChar = input_stream->readChar(); } catch (IOException&) { jjStopStringLiteralDfa_1(0, active0); return 1; } switch (curChar) { case 79: if ((active0 & 0x40000000LL) != 0) { return jjStartNfaWithStates_1(1, 30, 6); } break; default: break; } return jjStartNfa_1(0, active0); } int32_t QueryParserTokenManager::jjStartNfaWithStates_1(int32_t pos, int32_t kind, int32_t state) { jjmatchedKind = kind; jjmatchedPos = pos; try { curChar = input_stream->readChar(); } catch (IOException&) { return pos + 1; } return jjMoveNfa_1(state, pos + 1); } int32_t QueryParserTokenManager::jjMoveNfa_1(int32_t startState, int32_t curPos) { int32_t startsAt = 0; jjnewStateCnt = 7; int32_t i = 1; jjstateSet[0] = startState; int32_t kind = 0x7fffffff; while (true) { if (++jjround == 0x7fffffff) { ReInitRounds(); } if (curChar < 64) { int64_t l = (int64_t)1 << curChar; do { switch (jjstateSet[--i]) { case 0: if ((0xfffffffeffffffffLL & l) != 0) { if (kind > 33) { kind = 33; } jjCheckNAdd(6); } if ((0x100002600LL & l) != 0) { if (kind > 7) { kind = 7; } } else if (curChar == 34) { jjCheckNAddTwoStates(2, 4); } break; case 1: if (curChar == 34) { jjCheckNAddTwoStates(2, 4); } break; case 2: if ((0xfffffffbffffffffLL & l) != 0) { jjCheckNAddStates(16, 18); } break; case 3: if (curChar == 34) { jjCheckNAddStates(16, 18); } break; case 5: if (curChar == 34 && kind > 32) { kind = 32; } break; case 6: if ((0xfffffffeffffffffLL & l) == 0) { break; } if (kind > 33) { kind = 33; } jjCheckNAdd(6); break; default: break; } } while (i != startsAt); } else if (curChar < 128) { int64_t l = (int64_t)1 << (curChar & 077); do { switch (jjstateSet[--i]) { case 0: case 6: if ((0xdfffffffffffffffLL & l) == 0) { break; } if (kind > 33) { kind = 33; } jjCheckNAdd(6); break; case 2: jjAddStates(16, 18); break; case 4: if (curChar == 92) { jjstateSet[jjnewStateCnt++] = 3; } break; default: break; } } while (i != startsAt); } else { int32_t hiByte = (int32_t)(curChar >> 8); int32_t i1 = hiByte >> 6; int64_t l1 = (int64_t)1 << (hiByte & 077); int32_t i2 = (curChar & 0xff) >> 6; int64_t l2 = (int64_t)1 << (curChar & 077); do { switch (jjstateSet[--i]) { case 0: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { if (kind > 7) { kind = 7; } } if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { if (kind > 33) { kind = 33; } jjCheckNAdd(6); } break; case 2: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { jjAddStates(16, 18); } break; case 6: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) { break; } if (kind > 33) { kind = 33; } jjCheckNAdd(6); break; default: break; } } while (i != startsAt); } if (kind != 0x7fffffff) { jjmatchedKind = kind; jjmatchedPos = curPos; kind = 0x7fffffff; } ++curPos; i = jjnewStateCnt; jjnewStateCnt = startsAt; if (i == (startsAt = 7 - jjnewStateCnt)) { return curPos; } try { curChar = input_stream->readChar(); } catch (IOException&) { return curPos; } } } int32_t QueryParserTokenManager::jjMoveStringLiteralDfa0_0() { return jjMoveNfa_0(0, 0); } int32_t QueryParserTokenManager::jjMoveNfa_0(int32_t startState, int32_t curPos) { int32_t startsAt = 0; jjnewStateCnt = 3; int32_t i = 1; jjstateSet[0] = startState; int32_t kind = 0x7fffffff; while (true) { if (++jjround == 0x7fffffff) { ReInitRounds(); } if (curChar < 64) { int64_t l = (int64_t)1 << curChar; do { switch (jjstateSet[--i]) { case 0: if ((0x3ff000000000000LL & l) == 0) { break; } if (kind > 25) { kind = 25; } jjAddStates(19, 20); break; case 1: if (curChar == 46) { jjCheckNAdd(2); } break; case 2: if ((0x3ff000000000000LL & l) == 0) { break; } if (kind > 25) { kind = 25; } jjCheckNAdd(2); break; default: break; } } while (i != startsAt); } else if (curChar < 128) { int64_t l = (int64_t)1 << (curChar & 077); do { jjstateSet[--i]; } while (i != startsAt); } else { int32_t hiByte = (int32_t)(curChar >> 8); int32_t i1 = hiByte >> 6; int64_t l1 = (int64_t)1 << (hiByte & 077); int32_t i2 = (curChar & 0xff) >> 6; int64_t l2 = (int64_t)1 << (curChar & 077); do { jjstateSet[--i]; } while (i != startsAt); } if (kind != 0x7fffffff) { jjmatchedKind = kind; jjmatchedPos = curPos; kind = 0x7fffffff; } ++curPos; i = jjnewStateCnt; jjnewStateCnt = startsAt; if (i == (startsAt = 3 - jjnewStateCnt)) { return curPos; } try { curChar = input_stream->readChar(); } catch (IOException&) { return curPos; } } } int32_t QueryParserTokenManager::jjStopStringLiteralDfa_2(int32_t pos, int64_t active0) { switch (pos) { case 0: if ((active0 & 0x4000000LL) != 0) { jjmatchedKind = 29; return 6; } return -1; default: return -1; } } int32_t QueryParserTokenManager::jjStartNfa_2(int32_t pos, int64_t active0) { return jjMoveNfa_2(jjStopStringLiteralDfa_2(pos, active0), pos + 1); } int32_t QueryParserTokenManager::jjMoveStringLiteralDfa0_2() { switch (curChar) { case 84: return jjMoveStringLiteralDfa1_2(0x4000000LL); case 93: return jjStopAtPos(0, 27); default: return jjMoveNfa_2(0, 0); } } int32_t QueryParserTokenManager::jjMoveStringLiteralDfa1_2(int64_t active0) { try { curChar = input_stream->readChar(); } catch (IOException&) { jjStopStringLiteralDfa_2(0, active0); return 1; } switch (curChar) { case 79: if ((active0 & 0x4000000LL) != 0) { return jjStartNfaWithStates_2(1, 26, 6); } break; default: break; } return jjStartNfa_2(0, active0); } int32_t QueryParserTokenManager::jjStartNfaWithStates_2(int32_t pos, int32_t kind, int32_t state) { jjmatchedKind = kind; jjmatchedPos = pos; try { curChar = input_stream->readChar(); } catch (IOException&) { return pos + 1; } return jjMoveNfa_2(state, pos + 1); } int32_t QueryParserTokenManager::jjMoveNfa_2(int32_t startState, int32_t curPos) { int32_t startsAt = 0; jjnewStateCnt = 7; int32_t i = 1; jjstateSet[0] = startState; int32_t kind = 0x7fffffff; while (true) { if (++jjround == 0x7fffffff) { ReInitRounds(); } if (curChar < 64) { int64_t l = (int64_t)1 << curChar; do { switch (jjstateSet[--i]) { case 0: if ((0xfffffffeffffffffLL & l) != 0) { if (kind > 29) { kind = 29; } jjCheckNAdd(6); } if ((0x100002600LL & l) != 0) { if (kind > 7) { kind = 7; } } else if (curChar == 34) { jjCheckNAddTwoStates(2, 4); } break; case 1: if (curChar == 34) { jjCheckNAddTwoStates(2, 4); } break; case 2: if ((0xfffffffbffffffffLL & l) != 0) { jjCheckNAddStates(16, 18); } break; case 3: if (curChar == 34) { jjCheckNAddStates(16, 18); } break; case 5: if (curChar == 34 && kind > 28) { kind = 28; } break; case 6: if ((0xfffffffeffffffffLL & l) == 0) { break; } if (kind > 29) { kind = 29; } jjCheckNAdd(6); break; default: break; } } while (i != startsAt); } else if (curChar < 128) { int64_t l = (int64_t)1 << (curChar & 077); do { switch (jjstateSet[--i]) { case 0: case 6: if ((0xffffffffdfffffffLL & l) == 0) { break; } if (kind > 29) { kind = 29; } jjCheckNAdd(6); break; case 2: jjAddStates(16, 18); break; case 4: if (curChar == 92) { jjstateSet[jjnewStateCnt++] = 3; } break; default: break; } } while (i != startsAt); } else { int32_t hiByte = (int32_t)(curChar >> 8); int32_t i1 = hiByte >> 6; int64_t l1 = (int64_t)1 << (hiByte & 077); int32_t i2 = (curChar & 0xff) >> 6; int64_t l2 = (int64_t)1 << (curChar & 077); do { switch (jjstateSet[--i]) { case 0: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { if (kind > 7) { kind = 7; } } if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { if (kind > 29) { kind = 29; } jjCheckNAdd(6); } break; case 2: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { jjAddStates(16, 18); } break; case 6: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) { break; } if (kind > 29) { kind = 29; } jjCheckNAdd(6); break; default: break; } } while (i != startsAt); } if (kind != 0x7fffffff) { jjmatchedKind = kind; jjmatchedPos = curPos; kind = 0x7fffffff; } ++curPos; i = jjnewStateCnt; jjnewStateCnt = startsAt; if (i == (startsAt = 7 - jjnewStateCnt)) { return curPos; } try { curChar = input_stream->readChar(); } catch (IOException&) { return curPos; } } } bool QueryParserTokenManager::jjCanMove_0(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2) { switch (hiByte) { case 48: return ((jjbitVec0[i2] & l2) != 0); default: return false; } } bool QueryParserTokenManager::jjCanMove_1(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2) { switch (hiByte) { case 0: return ((jjbitVec3[i2] & l2) != 0); default: if ((jjbitVec1[i1] & l1) != 0) { return true; } return false; } } bool QueryParserTokenManager::jjCanMove_2(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2) { switch (hiByte) { case 0: return ((jjbitVec3[i2] & l2) != 0); case 48: return ((jjbitVec1[i2] & l2) != 0); default: if ((jjbitVec4[i1] & l1) != 0) { return true; } return false; } } void QueryParserTokenManager::ReInit(const QueryParserCharStreamPtr& stream) { jjmatchedPos = 0; jjnewStateCnt = 0; curLexState = defaultLexState; input_stream = stream; ReInitRounds(); } void QueryParserTokenManager::ReInitRounds() { jjround = 0x80000001; for (int32_t i = 36; i-- > 0;) { jjrounds[i] = 0x80000000; } } void QueryParserTokenManager::ReInit(const QueryParserCharStreamPtr& stream, int32_t lexState) { ReInit(stream); SwitchTo(lexState); } void QueryParserTokenManager::SwitchTo(int32_t lexState) { if (lexState >= 4 || lexState < 0) { boost::throw_exception(QueryParserError(L"Error: Ignoring invalid lexical state : " + StringUtils::toString(lexState) + L". State unchanged.")); } else { curLexState = lexState; } } QueryParserTokenPtr QueryParserTokenManager::jjFillToken() { String im(jjstrLiteralImages[jjmatchedKind]); String curTokenImage(im.empty() ? input_stream->GetImage() : im); int32_t beginLine = input_stream->getBeginLine(); int32_t beginColumn = input_stream->getBeginColumn(); int32_t endLine = input_stream->getEndLine(); int32_t endColumn = input_stream->getEndColumn(); QueryParserTokenPtr t(QueryParserToken::newToken(jjmatchedKind, curTokenImage)); t->beginLine = beginLine; t->endLine = endLine; t->beginColumn = beginColumn; t->endColumn = endColumn; return t; } QueryParserTokenPtr QueryParserTokenManager::getNextToken() { QueryParserTokenPtr matchedToken; int32_t curPos = 0; while (true) { try { curChar = input_stream->BeginToken(); } catch (IOException&) { jjmatchedKind = 0; matchedToken = jjFillToken(); return matchedToken; } switch (curLexState) { case 0: jjmatchedKind = 0x7fffffff; jjmatchedPos = 0; curPos = jjMoveStringLiteralDfa0_0(); break; case 1: jjmatchedKind = 0x7fffffff; jjmatchedPos = 0; curPos = jjMoveStringLiteralDfa0_1(); break; case 2: jjmatchedKind = 0x7fffffff; jjmatchedPos = 0; curPos = jjMoveStringLiteralDfa0_2(); break; case 3: jjmatchedKind = 0x7fffffff; jjmatchedPos = 0; curPos = jjMoveStringLiteralDfa0_3(); break; } if (jjmatchedKind != 0x7fffffff) { if (jjmatchedPos + 1 < curPos) { input_stream->backup(curPos - jjmatchedPos - 1); } if ((jjtoToken[jjmatchedKind >> 6] & ((int64_t)1 << (jjmatchedKind & 077))) != 0) { matchedToken = jjFillToken(); if (jjnewLexState[jjmatchedKind] != -1) { curLexState = jjnewLexState[jjmatchedKind]; } return matchedToken; } else { if (jjnewLexState[jjmatchedKind] != -1) { curLexState = jjnewLexState[jjmatchedKind]; } continue; } } int32_t error_line = input_stream->getEndLine(); int32_t error_column = input_stream->getEndColumn(); String error_after; bool EOFSeen = false; try { input_stream->readChar(); input_stream->backup(1); } catch (IOException&) { EOFSeen = true; error_after = curPos <= 1 ? L"" : input_stream->GetImage(); if (curChar == L'\n' || curChar == L'\r') { ++error_line; error_column = 0; } else { ++error_column; } } if (!EOFSeen) { input_stream->backup(1); error_after = curPos <= 1 ? L"" : input_stream->GetImage(); } boost::throw_exception(QueryParserError(QueryParseError::lexicalError(EOFSeen, curLexState, error_line, error_column, error_after, curChar))); } } void QueryParserTokenManager::jjCheckNAdd(int32_t state) { if (jjrounds[state] != jjround) { jjstateSet[jjnewStateCnt++] = state; jjrounds[state] = jjround; } } void QueryParserTokenManager::jjAddStates(int32_t start, int32_t end) { do { jjstateSet[jjnewStateCnt++] = jjnextStates[start]; } while (start++ != end); } void QueryParserTokenManager::jjCheckNAddTwoStates(int32_t state1, int32_t state2) { jjCheckNAdd(state1); jjCheckNAdd(state2); } void QueryParserTokenManager::jjCheckNAddStates(int32_t start, int32_t end) { do { jjCheckNAdd(jjnextStates[start]); } while (start++ != end); } } LucenePlusPlus-rel_3.0.9/src/core/search/000077500000000000000000000000001456444476200203145ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/search/BooleanClause.cpp000066400000000000000000000032361456444476200235400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BooleanClause.h" #include "Query.h" namespace Lucene { BooleanClause::BooleanClause(const QueryPtr& query, Occur occur) { this->query = query; this->occur = occur; } BooleanClause::~BooleanClause() { } BooleanClause::Occur BooleanClause::getOccur() { return occur; } void BooleanClause::setOccur(BooleanClause::Occur occur) { this->occur = occur; } QueryPtr BooleanClause::getQuery() { return query; } void BooleanClause::setQuery(const QueryPtr& query) { this->query = query; } bool BooleanClause::isProhibited() { return (occur == MUST_NOT); } bool BooleanClause::isRequired() { return (occur == MUST); } bool BooleanClause::equals(const LuceneObjectPtr& other) { BooleanClausePtr otherBooleanClause(boost::dynamic_pointer_cast(other)); if (!otherBooleanClause) { return false; } return (this->query->equals(otherBooleanClause->query) && this->occur == otherBooleanClause->occur); } int32_t BooleanClause::hashCode() { return query->hashCode() ^ (occur == MUST ? 1 : 0) ^ (occur == MUST_NOT ? 2 : 0); } String BooleanClause::toString() { switch (occur) { case MUST: return L"+" + query->toString(); case MUST_NOT: return L"-" + query->toString(); default: return query->toString(); } } } LucenePlusPlus-rel_3.0.9/src/core/search/BooleanQuery.cpp000066400000000000000000000314211456444476200234260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BooleanQuery.h" #include "_BooleanQuery.h" #include "BooleanScorer.h" #include "BooleanScorer2.h" #include "ComplexExplanation.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { int32_t BooleanQuery::maxClauseCount = 1024; BooleanQuery::BooleanQuery(bool disableCoord) { this->disableCoord = disableCoord; this->clauses = Collection::newInstance(); this->minNrShouldMatch = 0; } BooleanQuery::~BooleanQuery() { } int32_t BooleanQuery::getMaxClauseCount() { return maxClauseCount; } void BooleanQuery::setMaxClauseCount(int32_t maxClauseCount) { if (maxClauseCount < 1) { boost::throw_exception(IllegalArgumentException(L"maxClauseCount must be >= 1")); } BooleanQuery::maxClauseCount = maxClauseCount; } bool BooleanQuery::isCoordDisabled() { return disableCoord; } SimilarityPtr BooleanQuery::getSimilarity(const SearcherPtr& searcher) { SimilarityPtr result(Query::getSimilarity(searcher)); if (disableCoord) { // disable coord as requested result = newLucene(result); } return result; } void BooleanQuery::setMinimumNumberShouldMatch(int32_t min) { this->minNrShouldMatch = min; } int32_t BooleanQuery::getMinimumNumberShouldMatch() { return minNrShouldMatch; } void BooleanQuery::add(const QueryPtr& query, BooleanClause::Occur occur) { add(newLucene(query, occur)); } void BooleanQuery::add(const BooleanClausePtr& clause) { if (clauses.size() >= maxClauseCount) { boost::throw_exception(TooManyClausesException(L"maxClauseCount is set to " + StringUtils::toString(maxClauseCount))); } clauses.add(clause); } Collection BooleanQuery::getClauses() { return clauses; } Collection::iterator BooleanQuery::begin() { return clauses.begin(); } Collection::iterator BooleanQuery::end() { return clauses.end(); } WeightPtr BooleanQuery::createWeight(const SearcherPtr& searcher) { return newLucene(shared_from_this(), searcher); } QueryPtr BooleanQuery::rewrite(const IndexReaderPtr& reader) { if (minNrShouldMatch == 0 && clauses.size() == 1) { // optimize 1-clause queries BooleanClausePtr c(clauses[0]); if (!c->isProhibited()) { // just return clause QueryPtr query(c->getQuery()->rewrite(reader)); // rewrite first if (getBoost() != 1.0) { // incorporate boost if (query == c->getQuery()) { // if rewrite was no-op query = boost::dynamic_pointer_cast(query->clone()); // then clone before boost } query->setBoost(getBoost() * query->getBoost()); } return query; } } BooleanQueryPtr clone; // recursively rewrite for (int32_t i = 0; i < clauses.size(); ++i) { BooleanClausePtr c(clauses[i]); QueryPtr query(c->getQuery()->rewrite(reader)); if (query != c->getQuery()) { // clause rewrote: must clone if (!clone) { clone = boost::dynamic_pointer_cast(this->clone()); } clone->clauses[i] = newLucene(query, c->getOccur()); } } if (clone) { return clone; // some clauses rewrote } else { return shared_from_this(); // no clauses rewrote } } void BooleanQuery::extractTerms(SetTerm terms) { for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { (*clause)->getQuery()->extractTerms(terms); } } LuceneObjectPtr BooleanQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = Query::clone(other ? other : newLucene()); BooleanQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->disableCoord = disableCoord; cloneQuery->minNrShouldMatch = minNrShouldMatch; cloneQuery->clauses = Collection::newInstance(clauses.begin(), clauses.end()); return cloneQuery; } String BooleanQuery::toString(const String& field) { String buffer; bool needParens = (getBoost() != 1.0 || getMinimumNumberShouldMatch() > 0); if (needParens) { buffer += L"("; } for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { if (clause != clauses.begin()) { buffer += L" "; } if ((*clause)->isProhibited()) { buffer += L"-"; } else if ((*clause)->isRequired()) { buffer += L"+"; } QueryPtr subQuery((*clause)->getQuery()); if (subQuery) { if (boost::dynamic_pointer_cast(subQuery)) { // wrap sub-bools in parens buffer += L"("; buffer += subQuery->toString(field); buffer += L")"; } else { buffer += subQuery->toString(field); } } else { buffer += L"null"; } } if (needParens) { buffer += L")"; } if (getMinimumNumberShouldMatch() > 0) { buffer += L"~"; buffer += StringUtils::toString(getMinimumNumberShouldMatch()); } if (getBoost() != 1.0) { buffer += boostString(); } return buffer; } bool BooleanQuery::equals(const LuceneObjectPtr& other) { BooleanQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) { return false; } return (getBoost() == otherQuery->getBoost() && clauses.equals(otherQuery->clauses, luceneEquals()) && getMinimumNumberShouldMatch() == otherQuery->getMinimumNumberShouldMatch() && disableCoord == otherQuery->disableCoord); } int32_t BooleanQuery::hashCode() { return MiscUtils::doubleToIntBits(getBoost()) ^ MiscUtils::hashCode(clauses.begin(), clauses.end(), MiscUtils::hashLucene) + getMinimumNumberShouldMatch() + (disableCoord ? 17 : 0); } BooleanWeight::BooleanWeight(const BooleanQueryPtr& query, const SearcherPtr& searcher) { this->query = query; this->similarity = query->getSimilarity(searcher); weights = Collection::newInstance(); for (Collection::iterator clause = query->clauses.begin(); clause != query->clauses.end(); ++clause) { weights.add((*clause)->getQuery()->createWeight(searcher)); } } BooleanWeight::~BooleanWeight() { } QueryPtr BooleanWeight::getQuery() { return query; } double BooleanWeight::getValue() { return query->getBoost(); } double BooleanWeight::sumOfSquaredWeights() { double sum = 0.0; for (int32_t i = 0; i < weights.size(); ++i) { // call sumOfSquaredWeights for all clauses in case of side effects double s = weights[i]->sumOfSquaredWeights(); // sum sub weights if (!query->clauses[i]->isProhibited()) { // only add to sum for non-prohibited clauses sum += s; } } sum *= query->getBoost() * query->getBoost(); // boost each sub-weight return sum; } void BooleanWeight::normalize(double norm) { norm *= query->getBoost(); // incorporate boost for (Collection::iterator w = weights.begin(); w != weights.end(); ++w) { // normalize all clauses, (even if prohibited in case of side affects) (*w)->normalize(norm); } } ExplanationPtr BooleanWeight::explain(const IndexReaderPtr& reader, int32_t doc) { int32_t minShouldMatch = query->getMinimumNumberShouldMatch(); ComplexExplanationPtr sumExpl(newLucene()); sumExpl->setDescription(L"sum of:"); int32_t coord = 0; int32_t maxCoord = 0; double sum = 0.0; bool fail = false; int32_t shouldMatchCount = 0; Collection::iterator c = query->clauses.begin(); for (Collection::iterator w = weights.begin(); w != weights.end(); ++w, ++c) { if (!(*w)->scorer(reader, true, true)) { continue; } ExplanationPtr e((*w)->explain(reader, doc)); if (!(*c)->isProhibited()) { ++maxCoord; } if (e->isMatch()) { if (!(*c)->isProhibited()) { sumExpl->addDetail(e); sum += e->getValue(); ++coord; } else { ExplanationPtr r(newLucene(0.0, L"match on prohibited clause (" + (*c)->getQuery()->toString() + L")")); r->addDetail(e); sumExpl->addDetail(r); fail = true; } if ((*c)->getOccur() == BooleanClause::SHOULD) { ++shouldMatchCount; } } else if ((*c)->isRequired()) { ExplanationPtr r(newLucene(0.0, L"no match on required clause (" + (*c)->getQuery()->toString() + L")")); r->addDetail(e); sumExpl->addDetail(r); fail = true; } } if (fail) { sumExpl->setMatch(false); sumExpl->setValue(0.0); sumExpl->setDescription(L"Failure to meet condition(s) of required/prohibited clause(s)"); return sumExpl; } else if (shouldMatchCount < minShouldMatch) { sumExpl->setMatch(false); sumExpl->setValue(0.0); sumExpl->setDescription(L"Failure to match minimum number of optional clauses: " + StringUtils::toString(minShouldMatch)); return sumExpl; } sumExpl->setMatch(0 < coord); sumExpl->setValue(sum); double coordFactor = similarity->coord(coord, maxCoord); if (coordFactor == 1.0) { // coord is no-op return sumExpl; // eliminate wrapper } else { ComplexExplanationPtr result(newLucene(sumExpl->isMatch(), sum * coordFactor, L"product of:")); result->addDetail(sumExpl); result->addDetail(newLucene(coordFactor, L"coord(" + StringUtils::toString(coord) + L"/" + StringUtils::toString(maxCoord) + L")")); return result; } } ScorerPtr BooleanWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { Collection required(Collection::newInstance()); Collection prohibited(Collection::newInstance()); Collection optional(Collection::newInstance()); Collection::iterator c = query->clauses.begin(); for (Collection::iterator w = weights.begin(); w != weights.end(); ++w, ++c) { ScorerPtr subScorer((*w)->scorer(reader, true, false)); if (!subScorer) { if ((*c)->isRequired()) { return ScorerPtr(); } } else if ((*c)->isRequired()) { required.add(subScorer); } else if ((*c)->isProhibited()) { prohibited.add(subScorer); } else { optional.add(subScorer); } } // Check if we can return a BooleanScorer if (!scoreDocsInOrder && topScorer && required.empty() && prohibited.size() < 32) { return newLucene(similarity, query->minNrShouldMatch, optional, prohibited); } if (required.empty() && optional.empty()) { // no required and optional clauses. return ScorerPtr(); } else if (optional.size() < query->minNrShouldMatch) { // either >1 req scorer, or there are 0 req scorers and at least 1 optional scorer. Therefore if there // are not enough optional scorers no documents will be matched by the query return ScorerPtr(); } // Return a BooleanScorer2 return newLucene(similarity, query->minNrShouldMatch, required, prohibited, optional); } bool BooleanWeight::scoresDocsOutOfOrder() { int32_t numProhibited = 0; for (Collection::iterator c = query->clauses.begin(); c != query->clauses.end(); ++c) { if ((*c)->isRequired()) { return false; // BS2 (in-order) will be used by scorer() } else if ((*c)->isProhibited()) { ++numProhibited; } } if (numProhibited > 32) { // cannot use BS return false; } // scorer() will return an out-of-order scorer if requested. return true; } SimilarityDisableCoord::SimilarityDisableCoord(const SimilarityPtr& delegee) : SimilarityDelegator(delegee) { } SimilarityDisableCoord::~SimilarityDisableCoord() { } double SimilarityDisableCoord::coord(int32_t overlap, int32_t maxOverlap) { return 1.0; // disable coord } } LucenePlusPlus-rel_3.0.9/src/core/search/BooleanScorer.cpp000066400000000000000000000203051456444476200235550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BooleanScorer.h" #include "Similarity.h" namespace Lucene { BooleanScorer::BooleanScorer(const SimilarityPtr& similarity, int32_t minNrShouldMatch, Collection optionalScorers, Collection prohibitedScorers) : Scorer(similarity) { this->bucketTable = newLucene(); this->maxCoord = 1; this->requiredMask = 0; this->prohibitedMask = 0; this->nextMask = 1; this->minNrShouldMatch = minNrShouldMatch; this->end = 0; this->doc = -1; if (optionalScorers && !optionalScorers.empty()) { for (Collection::iterator scorer = optionalScorers.begin(); scorer != optionalScorers.end(); ++scorer) { ++maxCoord; if ((*scorer)->nextDoc() != NO_MORE_DOCS) { scorers = newLucene(*scorer, false, false, bucketTable->newCollector(0), scorers); } } } if (prohibitedScorers && !prohibitedScorers.empty()) { for (Collection::iterator scorer = prohibitedScorers.begin(); scorer != prohibitedScorers.end(); ++scorer) { int32_t mask = nextMask; nextMask = nextMask << 1; prohibitedMask |= mask; // update prohibited mask if ((*scorer)->nextDoc() != NO_MORE_DOCS) { scorers = newLucene(*scorer, false, true, bucketTable->newCollector(mask), scorers); } } } coordFactors = Collection::newInstance(maxCoord); SimilarityPtr sim(getSimilarity()); for (int32_t i = 0; i < maxCoord; ++i) { coordFactors[i] = sim->coord(i, maxCoord - 1); } } BooleanScorer::~BooleanScorer() { } bool BooleanScorer::score(const CollectorPtr& collector, int32_t max, int32_t firstDocID) { bool more = false; Bucket* __tmp; BucketScorerPtr bs(newLucene()); // The internal loop will set the score and doc before calling collect. collector->setScorer(bs); do { bucketTable->__first = nullptr; while (__current) { // more queued // check prohibited & required if ((__current->bits & prohibitedMask) == 0 && (__current->bits & requiredMask) == requiredMask) { if (__current->doc >= max) { __tmp = __current; __current = __current->__next; __tmp->__next = bucketTable->__first; bucketTable->__first = __tmp; continue; } if (__current->coord >= minNrShouldMatch) { auto s = coordFactors.size(); bs->_score = __current->score * coordFactors[__current->coord]; bs->doc = __current->doc; bs->freq = __current->coord; collector->collect(__current->doc); } } __current = __current->__next; // pop the queue } if (bucketTable->__first) { __current = bucketTable->__first; bucketTable->__first = __current->__next; return true; } // refill the queue more = false; end += BucketTable::SIZE; for (SubScorerPtr sub(scorers); sub; sub = sub->next) { int32_t subScorerDocID = sub->scorer->docID(); if (subScorerDocID != NO_MORE_DOCS) { if (sub->scorer->score(sub->collector, end, subScorerDocID)) { more = true; } } } __current = bucketTable->__first; } while (__current || more); return false; } int32_t BooleanScorer::advance(int32_t target) { boost::throw_exception(UnsupportedOperationException()); return 0; } int32_t BooleanScorer::docID() { return doc; } int32_t BooleanScorer::nextDoc() { bool more = false; do { while (bucketTable->__first) { // more queued __current = bucketTable->__first; bucketTable->__first = __current->__next; // pop the queue // check prohibited & required and minNrShouldMatch if ((__current->bits & prohibitedMask) == 0 && (__current->bits & requiredMask) == requiredMask && __current->coord >= minNrShouldMatch) { doc = __current->doc; return doc; } } // refill the queue more = false; end += BucketTable::SIZE; for (SubScorerPtr sub(scorers); sub; sub = sub->next) { ScorerPtr scorer(sub->scorer); sub->collector->setScorer(scorer); int32_t doc = scorer->docID(); while (doc < end) { sub->collector->collect(doc); doc = scorer->nextDoc(); } if (doc != NO_MORE_DOCS) { more = true; } } } while (bucketTable->__first || more); doc = NO_MORE_DOCS; return doc; } inline double BooleanScorer::score() { return __current->score * coordFactors[__current->coord]; } void BooleanScorer::score(const CollectorPtr& collector) { score(collector, INT_MAX, nextDoc()); } String BooleanScorer::toString() { StringStream buffer; buffer << L"boolean("; for (SubScorerPtr sub(scorers); sub; sub = sub->next) { buffer << sub->scorer->toString() << L" "; } buffer << L")"; return buffer.str(); } BooleanScorerCollector::BooleanScorerCollector(int32_t mask, const BucketTablePtr& bucketTable) { this->mask = mask; this->_bucketTable = bucketTable; this->__bucketTable = bucketTable.get(); } BooleanScorerCollector::~BooleanScorerCollector() { } void BooleanScorerCollector::collect(int32_t doc) { auto* table = __bucketTable; int32_t i = doc & BucketTable::MASK; auto& bucket = table->buckets[i]; if (!bucket) { bucket = newLucene(); } auto* __bucket = bucket.get(); if (__bucket->doc != doc) { // invalid bucket __bucket->doc = doc; // set doc __bucket->score = __scorer->score(); // initialize score __bucket->bits = mask; // initialize mask __bucket->coord = 1; // initialize coord __bucket->__next = table->__first; // push onto valid list table->__first = __bucket; } else { __bucket->score += __scorer->score(); // increment score __bucket->bits |= mask; // add bits in mask ++__bucket->coord; // increment coord } } void BooleanScorerCollector::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { // not needed by this implementation } void BooleanScorerCollector::setScorer(const ScorerPtr& scorer) { this->_scorer = scorer; this->__scorer = scorer.get(); } bool BooleanScorerCollector::acceptsDocsOutOfOrder() { return true; } BucketScorer::BucketScorer() : Scorer(SimilarityPtr()) { _score = 0; doc = NO_MORE_DOCS; } BucketScorer::~BucketScorer() { } int32_t BucketScorer::advance(int32_t target) { return NO_MORE_DOCS; } int32_t BucketScorer::docID() { return doc; } int32_t BucketScorer::nextDoc() { return NO_MORE_DOCS; } double BucketScorer::score() { return _score; } Bucket::Bucket() { doc = -1; score = 0; bits = 0; coord = 0; } Bucket::~Bucket() { } const int32_t BucketTable::SIZE = 1 << 11; const int32_t BucketTable::MASK = BucketTable::SIZE - 1; BucketTable::BucketTable() { buckets = Collection::newInstance(SIZE); } BucketTable::~BucketTable() { } CollectorPtr BucketTable::newCollector(int32_t mask) { return newLucene(mask, shared_from_this()); } int32_t BucketTable::size() { return SIZE; } SubScorer::SubScorer(const ScorerPtr& scorer, bool required, bool prohibited, const CollectorPtr& collector, const SubScorerPtr& next) { this->scorer = scorer; this->required = required; this->prohibited = prohibited; this->collector = collector; this->next = next; } SubScorer::~SubScorer() { } } LucenePlusPlus-rel_3.0.9/src/core/search/BooleanScorer2.cpp000066400000000000000000000211111456444476200236330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BooleanScorer2.h" #include "ReqOptSumScorer.h" #include "ReqExclScorer.h" #include "Similarity.h" #include "Collector.h" namespace Lucene { BooleanScorer2::BooleanScorer2(const SimilarityPtr& similarity, int32_t minNrShouldMatch, Collection required, Collection prohibited, Collection optional) : Scorer(similarity) { this->minNrShouldMatch = minNrShouldMatch; this->requiredScorers = required; this->prohibitedScorers = prohibited; this->optionalScorers = optional; this->doc = -1; } BooleanScorer2::~BooleanScorer2() { } void BooleanScorer2::initialize() { if (minNrShouldMatch < 0) { boost::throw_exception(IllegalArgumentException(L"Minimum number of optional scorers should not be negative")); } coordinator = newLucene(shared_from_this()); coordinator->maxCoord += optionalScorers.size(); coordinator->maxCoord += requiredScorers.size(); coordinator->init(); countingSumScorer = makeCountingSumScorer(); } ScorerPtr BooleanScorer2::countingDisjunctionSumScorer(Collection scorers, int32_t minNrShouldMatch) { // each scorer from the list counted as a single matcher return newLucene(shared_from_this(), scorers, minNrShouldMatch); } ScorerPtr BooleanScorer2::countingConjunctionSumScorer(Collection requiredScorers) { // each scorer from the list counted as a single matcher return newLucene(shared_from_this(), Similarity::getDefault(), requiredScorers); } ScorerPtr BooleanScorer2::dualConjunctionSumScorer(const ScorerPtr& req1, const ScorerPtr& req2) { Collection scorers(newCollection(req1, req2)); // All scorers match, so Similarity::getDefault() always has 1 as the coordination factor. // Therefore the sum of the scores of two scorers is used as score. return newLucene(Similarity::getDefault(), scorers); } ScorerPtr BooleanScorer2::makeCountingSumScorer() { return requiredScorers.empty() ? makeCountingSumScorerNoReq() : makeCountingSumScorerSomeReq(); } ScorerPtr BooleanScorer2::makeCountingSumScorerNoReq() { // minNrShouldMatch optional scorers are required, but at least 1 int32_t nrOptRequired = minNrShouldMatch < 1 ? 1 : minNrShouldMatch; ScorerPtr requiredCountingSumScorer; if (optionalScorers.size() > nrOptRequired) { requiredCountingSumScorer = countingDisjunctionSumScorer(optionalScorers, nrOptRequired); } else if (optionalScorers.size() == 1) { requiredCountingSumScorer = newLucene(optionalScorers[0], coordinator); } else { requiredCountingSumScorer = countingConjunctionSumScorer(optionalScorers); } return addProhibitedScorers(requiredCountingSumScorer); } ScorerPtr BooleanScorer2::makeCountingSumScorerSomeReq() { if (optionalScorers.size() == minNrShouldMatch) { // all optional scorers also required. Collection allReq(Collection::newInstance(requiredScorers.begin(), requiredScorers.end())); allReq.addAll(optionalScorers.begin(), optionalScorers.end()); return addProhibitedScorers(countingConjunctionSumScorer(allReq)); } else { // optionalScorers.size() > minNrShouldMatch, and at least one required scorer ScorerPtr requiredCountingSumScorer = requiredScorers.size() == 1 ? newLucene(requiredScorers[0], coordinator) : countingConjunctionSumScorer(requiredScorers); if (minNrShouldMatch > 0) { // use a required disjunction scorer over the optional scorers return addProhibitedScorers(dualConjunctionSumScorer(requiredCountingSumScorer, countingDisjunctionSumScorer(optionalScorers, minNrShouldMatch))); } else { // minNrShouldMatch == 0 return newLucene(addProhibitedScorers(requiredCountingSumScorer), optionalScorers.size() == 1 ? newLucene(optionalScorers[0], coordinator) : countingDisjunctionSumScorer(optionalScorers, 1)); } } } ScorerPtr BooleanScorer2::addProhibitedScorers(const ScorerPtr& requiredCountingSumScorer) { return prohibitedScorers.empty() ? requiredCountingSumScorer : newLucene(requiredCountingSumScorer, (prohibitedScorers.size() == 1 ? prohibitedScorers[0] : newLucene(prohibitedScorers))); } void BooleanScorer2::score(const CollectorPtr& collector) { collector->setScorer(shared_from_this()); while ((doc = countingSumScorer->nextDoc()) != NO_MORE_DOCS) { collector->collect(doc); } } bool BooleanScorer2::score(const CollectorPtr& collector, int32_t max, int32_t firstDocID) { doc = firstDocID; collector->setScorer(shared_from_this()); while (doc < max) { collector->collect(doc); doc = countingSumScorer->nextDoc(); } return (doc != NO_MORE_DOCS); } int32_t BooleanScorer2::docID() { return doc; } int32_t BooleanScorer2::nextDoc() { doc = countingSumScorer->nextDoc(); return doc; } double BooleanScorer2::score() { coordinator->nrMatchers = 0; double sum = countingSumScorer->score(); return sum * coordinator->coordFactors[coordinator->nrMatchers]; } int32_t BooleanScorer2::advance(int32_t target) { doc = countingSumScorer->advance(target); return doc; } Coordinator::Coordinator(const BooleanScorer2Ptr& scorer) { _scorer = scorer; maxCoord = 0; nrMatchers = 0; } Coordinator::~Coordinator() { } void Coordinator::init() { coordFactors = Collection::newInstance(maxCoord + 1); SimilarityPtr sim(BooleanScorer2Ptr(_scorer)->getSimilarity()); for (int32_t i = 0; i <= maxCoord; ++i) { coordFactors[i] = sim->coord(i, maxCoord); } } SingleMatchScorer::SingleMatchScorer(const ScorerPtr& scorer, const CoordinatorPtr& coordinator) : Scorer(scorer->getSimilarity()) { lastScoredDoc = -1; lastDocScore = std::numeric_limits::quiet_NaN(); this->scorer = scorer; this->coordinator = coordinator; } SingleMatchScorer::~SingleMatchScorer() { } double SingleMatchScorer::score() { int32_t doc = docID(); if (doc >= lastScoredDoc) { if (doc > lastScoredDoc) { lastDocScore = scorer->score(); lastScoredDoc = doc; } ++coordinator->nrMatchers; } return lastDocScore; } int32_t SingleMatchScorer::docID() { return scorer->docID(); } int32_t SingleMatchScorer::nextDoc() { return scorer->nextDoc(); } int32_t SingleMatchScorer::advance(int32_t target) { return scorer->advance(target); } CountingDisjunctionSumScorer::CountingDisjunctionSumScorer(const BooleanScorer2Ptr& scorer, Collection subScorers, int32_t minimumNrMatchers) : DisjunctionSumScorer(subScorers, minimumNrMatchers) { _scorer = scorer; lastScoredDoc = -1; lastDocScore = std::numeric_limits::quiet_NaN(); } CountingDisjunctionSumScorer::~CountingDisjunctionSumScorer() { } double CountingDisjunctionSumScorer::score() { int32_t doc = docID(); if (doc >= lastScoredDoc) { if (doc > lastScoredDoc) { lastDocScore = DisjunctionSumScorer::score(); lastScoredDoc = doc; } BooleanScorer2Ptr(_scorer)->coordinator->nrMatchers += DisjunctionSumScorer::_nrMatchers; } return lastDocScore; } CountingConjunctionSumScorer::CountingConjunctionSumScorer(const BooleanScorer2Ptr& scorer, const SimilarityPtr& similarity, Collection scorers) : ConjunctionScorer(similarity, scorers) { _scorer = scorer; lastScoredDoc = -1; requiredNrMatchers = scorers.size(); lastDocScore = std::numeric_limits::quiet_NaN(); } CountingConjunctionSumScorer::~CountingConjunctionSumScorer() { } double CountingConjunctionSumScorer::score() { int32_t doc = docID(); if (doc >= lastScoredDoc) { if (doc > lastScoredDoc) { lastDocScore = ConjunctionScorer::score(); lastScoredDoc = doc; } BooleanScorer2Ptr(_scorer)->coordinator->nrMatchers += requiredNrMatchers; } // All scorers match, so Similarity::getDefault() ConjunctionScorer::score() always has 1 as the /// coordination factor. Therefore the sum of the scores of the requiredScorers is used as score. return lastDocScore; } } LucenePlusPlus-rel_3.0.9/src/core/search/CachingSpanFilter.cpp000066400000000000000000000054471456444476200243560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CachingSpanFilter.h" #include "_CachingSpanFilter.h" #include "SpanFilterResult.h" #include "IndexReader.h" namespace Lucene { CachingSpanFilter::CachingSpanFilter(const SpanFilterPtr& filter, CachingWrapperFilter::DeletesMode deletesMode) { this->filter = filter; if (deletesMode == CachingWrapperFilter::DELETES_DYNAMIC) { boost::throw_exception(IllegalArgumentException(L"DeletesMode::DYNAMIC is not supported")); } this->cache = newLucene(deletesMode); this->hitCount = 0; this->missCount = 0; } CachingSpanFilter::~CachingSpanFilter() { } DocIdSetPtr CachingSpanFilter::getDocIdSet(const IndexReaderPtr& reader) { SpanFilterResultPtr result(getCachedResult(reader)); return result ? result->getDocIdSet() : DocIdSetPtr(); } SpanFilterResultPtr CachingSpanFilter::getCachedResult(const IndexReaderPtr& reader) { LuceneObjectPtr coreKey = reader->getFieldCacheKey(); LuceneObjectPtr delCoreKey = reader->hasDeletions() ? reader->getDeletesCacheKey() : coreKey; SpanFilterResultPtr result(boost::dynamic_pointer_cast(cache->get(reader, coreKey, delCoreKey))); if (result) { ++hitCount; return result; } ++missCount; result = filter->bitSpans(reader); cache->put(coreKey, delCoreKey, result); return result; } SpanFilterResultPtr CachingSpanFilter::bitSpans(const IndexReaderPtr& reader) { return getCachedResult(reader); } String CachingSpanFilter::toString() { return L"CachingSpanFilter(" + filter->toString() + L")"; } bool CachingSpanFilter::equals(const LuceneObjectPtr& other) { if (SpanFilter::equals(other)) { return true; } CachingSpanFilterPtr otherCachingSpanFilter(boost::dynamic_pointer_cast(other)); if (!otherCachingSpanFilter) { return false; } return this->filter->equals(otherCachingSpanFilter->filter); } int32_t CachingSpanFilter::hashCode() { return filter->hashCode() ^ 0x1117bf25; } FilterCacheSpanFilterResult::FilterCacheSpanFilterResult(CachingWrapperFilter::DeletesMode deletesMode) : FilterCache(deletesMode) { } FilterCacheSpanFilterResult::~FilterCacheSpanFilterResult() { } LuceneObjectPtr FilterCacheSpanFilterResult::mergeDeletes(const IndexReaderPtr& reader, const LuceneObjectPtr& value) { boost::throw_exception(IllegalStateException(L"DeletesMode::DYNAMIC is not supported")); return LuceneObjectPtr(); } } LucenePlusPlus-rel_3.0.9/src/core/search/CachingWrapperFilter.cpp000066400000000000000000000114611456444476200250660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CachingWrapperFilter.h" #include "_CachingWrapperFilter.h" #include "OpenBitSetDISI.h" #include "IndexReader.h" namespace Lucene { CachingWrapperFilter::CachingWrapperFilter(const FilterPtr& filter, DeletesMode deletesMode) { this->filter = filter; this->cache = newLucene(deletesMode); this->hitCount = 0; this->missCount = 0; } CachingWrapperFilter::~CachingWrapperFilter() { } DocIdSetPtr CachingWrapperFilter::docIdSetToCache(const DocIdSetPtr& docIdSet, const IndexReaderPtr& reader) { if (!docIdSet) { // this is better than returning null, as the nonnull result can be cached return DocIdSet::EMPTY_DOCIDSET(); } else if (docIdSet->isCacheable()) { return docIdSet; } else { DocIdSetIteratorPtr it(docIdSet->iterator()); // null is allowed to be returned by iterator(), in this case we wrap with the empty set, // which is cacheable. return !it ? DocIdSet::EMPTY_DOCIDSET() : newLucene(it, reader->maxDoc()); } } DocIdSetPtr CachingWrapperFilter::getDocIdSet(const IndexReaderPtr& reader) { LuceneObjectPtr coreKey = reader->getFieldCacheKey(); LuceneObjectPtr delCoreKey = reader->hasDeletions() ? reader->getDeletesCacheKey() : coreKey; DocIdSetPtr docIdSet(boost::dynamic_pointer_cast(cache->get(reader, coreKey, delCoreKey))); if (docIdSet) { ++hitCount; return docIdSet; } ++missCount; // cache miss docIdSet = docIdSetToCache(filter->getDocIdSet(reader), reader); if (docIdSet) { cache->put(coreKey, delCoreKey, docIdSet); } return docIdSet; } String CachingWrapperFilter::toString() { return L"CachingWrapperFilter(" + filter->toString() + L")"; } bool CachingWrapperFilter::equals(const LuceneObjectPtr& other) { if (Filter::equals(other)) { return true; } CachingWrapperFilterPtr otherCachingWrapperFilter(boost::dynamic_pointer_cast(other)); if (!otherCachingWrapperFilter) { return false; } return this->filter->equals(otherCachingWrapperFilter->filter); } int32_t CachingWrapperFilter::hashCode() { return filter->hashCode() ^ 0x1117bf25; } FilterCache::FilterCache(CachingWrapperFilter::DeletesMode deletesMode) { this->deletesMode = deletesMode; } FilterCache::~FilterCache() { } LuceneObjectPtr FilterCache::get(const IndexReaderPtr& reader, const LuceneObjectPtr& coreKey, const LuceneObjectPtr& delCoreKey) { SyncLock syncLock(this); if (!cache) { cache = WeakMapObjectObject::newInstance(); } LuceneObjectPtr value; if (deletesMode == CachingWrapperFilter::DELETES_IGNORE) { // key on core value = cache.get(coreKey); } else if (deletesMode == CachingWrapperFilter::DELETES_RECACHE) { // key on deletes, if any, else core value = cache.get(delCoreKey); } else { BOOST_ASSERT(deletesMode == CachingWrapperFilter::DELETES_DYNAMIC); // first try for exact match value = cache.get(delCoreKey); if (!value) { // now for core match, but dynamically AND NOT deletions value = cache.get(coreKey); if (value && reader->hasDeletions()) { value = mergeDeletes(reader, value); } } } return value; } void FilterCache::put(const LuceneObjectPtr& coreKey, const LuceneObjectPtr& delCoreKey, const LuceneObjectPtr& value) { SyncLock syncLock(this); if (deletesMode == CachingWrapperFilter::DELETES_IGNORE) { cache.put(coreKey, value); } else if (deletesMode == CachingWrapperFilter::DELETES_RECACHE) { cache.put(delCoreKey, value); } else { cache.put(coreKey, value); cache.put(delCoreKey, value); } } FilterCacheDocIdSet::FilterCacheDocIdSet(CachingWrapperFilter::DeletesMode deletesMode) : FilterCache(deletesMode) { } FilterCacheDocIdSet::~FilterCacheDocIdSet() { } LuceneObjectPtr FilterCacheDocIdSet::mergeDeletes(const IndexReaderPtr& reader, const LuceneObjectPtr& value) { return newLucene(reader, boost::dynamic_pointer_cast(value)); } FilteredCacheDocIdSet::FilteredCacheDocIdSet(const IndexReaderPtr& reader, const DocIdSetPtr& innerSet) : FilteredDocIdSet(innerSet) { this->reader = reader; } FilteredCacheDocIdSet::~FilteredCacheDocIdSet() { } bool FilteredCacheDocIdSet::match(int32_t docid) { return !reader->isDeleted(docid); } } LucenePlusPlus-rel_3.0.9/src/core/search/Collector.cpp000066400000000000000000000006651456444476200227550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Collector.h" namespace Lucene { Collector::~Collector() { } } LucenePlusPlus-rel_3.0.9/src/core/search/ComplexExplanation.cpp000066400000000000000000000017601456444476200246360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ComplexExplanation.h" #include "StringUtils.h" namespace Lucene { ComplexExplanation::ComplexExplanation(bool match, double value, const String& description) : Explanation(value, description) { this->match = match; } ComplexExplanation::~ComplexExplanation() { } bool ComplexExplanation::getMatch() { return match; } void ComplexExplanation::setMatch(bool match) { this->match = match; } bool ComplexExplanation::isMatch() { return getMatch(); } String ComplexExplanation::getSummary() { return StringUtils::toString(getValue()) + L" = " + (isMatch() ? L"(MATCH) " : L"(NON-MATCH) ") + getDescription(); } } LucenePlusPlus-rel_3.0.9/src/core/search/ConjunctionScorer.cpp000066400000000000000000000077351456444476200245030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ConjunctionScorer.h" #include "Similarity.h" namespace Lucene { struct lessScorerDocId { inline bool operator()(const ScorerPtr& first, const ScorerPtr& second) const { return (first->docID() < second->docID()); } }; ConjunctionScorer::ConjunctionScorer(const SimilarityPtr& similarity, Collection scorers) : Scorer(similarity) { this->lastDoc = -1; this->scorers = scorers; this->coord = similarity->coord(scorers.size(), scorers.size()); for (Collection::iterator scorer = scorers.begin(); scorer != scorers.end(); ++scorer) { if ((*scorer)->nextDoc() == NO_MORE_DOCS) { // If even one of the sub-scorers does not have any documents, this scorer should not attempt // to do any more work. lastDoc = NO_MORE_DOCS; return; } } // Sort the array the first time... // We don't need to sort the array in any future calls because we know it will already start off // sorted (all scorers on same doc). std::sort(scorers.begin(), scorers.end(), lessScorerDocId()); // NOTE: doNext() must be called before the re-sorting of the array later on. The reason is this: // assume there are 5 scorers, whose first docs are 1, 2, 3, 5, 5 respectively. Sorting (above) leaves // the array as is. Calling doNext() here advances all the first scorers to 5 (or a larger doc ID // they all agree on). // However, if we re-sort before doNext() is called, the order will be 5, 3, 2, 1, 5 and then doNext() // will stop immediately, since the first scorer's docs equals the last one. So the invariant that after // calling doNext() all scorers are on the same doc ID is broken. if (doNext() == NO_MORE_DOCS) { // The scorers did not agree on any document. lastDoc = NO_MORE_DOCS; return; } // If first-time skip distance is any predictor of scorer sparseness, then we should always try to skip // first on those scorers. Keep last scorer in it's last place (it will be the first to be skipped on), // but reverse all of the others so that they will be skipped on in order of original high skip. int32_t end = scorers.size() - 1; int32_t max = end >> 1; for (int32_t i = 0; i < max; ++i) { ScorerPtr tmp(scorers[i]); int32_t idx = end - i - 1; scorers[i] = scorers[idx]; scorers[idx] = tmp; } } ConjunctionScorer::~ConjunctionScorer() { } int32_t ConjunctionScorer::doNext() { int32_t first = 0; int32_t doc = scorers[scorers.size() - 1]->docID(); Scorer* __firstScorer; // TODO: __firstScore nullptr ?? while ((__firstScorer = scorers[first].get())->docID() < doc) { doc = __firstScorer->advance(doc); first = first == scorers.size() - 1 ? 0 : first + 1; } return doc; } int32_t ConjunctionScorer::advance(int32_t target) { if (lastDoc == NO_MORE_DOCS) { return lastDoc; } auto& scorer = scorers[(scorers.size() - 1)]; if (scorer->docID() < target) { scorer->advance(target); } lastDoc = doNext(); return lastDoc; } inline int32_t ConjunctionScorer::docID() { return lastDoc; } int32_t ConjunctionScorer::nextDoc() { if (lastDoc == NO_MORE_DOCS) { return lastDoc; } else if (lastDoc == -1) { lastDoc = scorers[scorers.size() - 1]->docID(); return lastDoc; } scorers[(scorers.size() - 1)]->nextDoc(); lastDoc = doNext(); return lastDoc; } double ConjunctionScorer::score() { double sum = 0.0; for (auto& scorer : scorers){ sum += scorer->score(); } return sum * coord; } } LucenePlusPlus-rel_3.0.9/src/core/search/ConstantScoreQuery.cpp000066400000000000000000000117761456444476200246470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ConstantScoreQuery.h" #include "_ConstantScoreQuery.h" #include "Filter.h" #include "ComplexExplanation.h" #include "DocIdSet.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { ConstantScoreQuery::ConstantScoreQuery(const FilterPtr& filter) { this->filter = filter; } ConstantScoreQuery::~ConstantScoreQuery() { } FilterPtr ConstantScoreQuery::getFilter() { return filter; } QueryPtr ConstantScoreQuery::rewrite(const IndexReaderPtr& reader) { return shared_from_this(); } void ConstantScoreQuery::extractTerms(SetTerm terms) { // OK to not add any terms when used for MultiSearcher, but may not be OK for highlighting } WeightPtr ConstantScoreQuery::createWeight(const SearcherPtr& searcher) { return newLucene(shared_from_this(), searcher); } String ConstantScoreQuery::toString(const String& field) { return L"ConstantScore(" + filter->toString() + (getBoost() == 1.0 ? L")" : L"^" + StringUtils::toString(getBoost())); } bool ConstantScoreQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } ConstantScoreQueryPtr otherConstantScoreQuery(boost::dynamic_pointer_cast(other)); if (!otherConstantScoreQuery) { return false; } return (this->getBoost() == otherConstantScoreQuery->getBoost() && this->filter->equals(otherConstantScoreQuery->filter)); } int32_t ConstantScoreQuery::hashCode() { // Simple add is OK since no existing filter hashcode has a float component. return filter->hashCode() + MiscUtils::doubleToIntBits(getBoost()); } LuceneObjectPtr ConstantScoreQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(filter); ConstantScoreQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); cloneQuery->filter = filter; return cloneQuery; } ConstantWeight::ConstantWeight(const ConstantScoreQueryPtr& constantScorer, const SearcherPtr& searcher) { this->constantScorer = constantScorer; this->similarity = constantScorer->getSimilarity(searcher); this->queryNorm = 0; this->queryWeight = 0; } ConstantWeight::~ConstantWeight() { } QueryPtr ConstantWeight::getQuery() { return constantScorer; } double ConstantWeight::getValue() { return queryWeight; } double ConstantWeight::sumOfSquaredWeights() { queryWeight = constantScorer->getBoost(); return queryWeight * queryWeight; } void ConstantWeight::normalize(double norm) { this->queryNorm = norm; queryWeight *= this->queryNorm; } ScorerPtr ConstantWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { return newLucene(constantScorer, similarity, reader, shared_from_this()); } ExplanationPtr ConstantWeight::explain(const IndexReaderPtr& reader, int32_t doc) { ConstantScorerPtr cs(newLucene(constantScorer, similarity, reader, shared_from_this())); bool exists = (cs->docIdSetIterator->advance(doc) == doc); ComplexExplanationPtr result(newLucene()); if (exists) { result->setDescription(L"ConstantScoreQuery(" + constantScorer->filter->toString() + L"), product of:"); result->setValue(queryWeight); result->setMatch(true); result->addDetail(newLucene(constantScorer->getBoost(), L"boost")); result->addDetail(newLucene(queryNorm, L"queryNorm")); } else { result->setDescription(L"ConstantScoreQuery(" + constantScorer->filter->toString() + L") doesn't match id " + StringUtils::toString(doc)); result->setValue(0); result->setMatch(false); } return result; } ConstantScorer::ConstantScorer(const ConstantScoreQueryPtr& constantScorer, const SimilarityPtr& similarity, const IndexReaderPtr& reader, const WeightPtr& w) : Scorer(similarity) { doc = -1; theScore = w->getValue(); DocIdSetPtr docIdSet(constantScorer->filter->getDocIdSet(reader)); if (!docIdSet) { docIdSetIterator = DocIdSet::EMPTY_DOCIDSET()->iterator(); } else { DocIdSetIteratorPtr iter(docIdSet->iterator()); if (!iter) { docIdSetIterator = DocIdSet::EMPTY_DOCIDSET()->iterator(); } else { docIdSetIterator = iter; } } } ConstantScorer::~ConstantScorer() { } int32_t ConstantScorer::nextDoc() { return docIdSetIterator->nextDoc(); } int32_t ConstantScorer::docID() { return docIdSetIterator->docID(); } double ConstantScorer::score() { return theScore; } int32_t ConstantScorer::advance(int32_t target) { return docIdSetIterator->advance(target); } } LucenePlusPlus-rel_3.0.9/src/core/search/DefaultSimilarity.cpp000066400000000000000000000034731456444476200244620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DefaultSimilarity.h" #include "FieldInvertState.h" namespace Lucene { DefaultSimilarity::DefaultSimilarity() { discountOverlaps = false; } DefaultSimilarity::~DefaultSimilarity() { } double DefaultSimilarity::computeNorm(const String& fieldName, const FieldInvertStatePtr& state) { int32_t numTerms; if (discountOverlaps) { numTerms = state->getLength() - state->getNumOverlap(); } else { numTerms = state->getLength(); } return (state->getBoost() * lengthNorm(fieldName, numTerms)); } inline double DefaultSimilarity::lengthNorm(const String& fieldName, int32_t numTokens) { return (double)(1.0 / std::sqrt((double)numTokens)); } inline double DefaultSimilarity::queryNorm(double sumOfSquaredWeights) { return (double)(1.0 / std::sqrt(sumOfSquaredWeights)); } inline double DefaultSimilarity::tf(double freq) { return (double)std::sqrt(freq); } inline double DefaultSimilarity::sloppyFreq(int32_t distance) { return (1.0 / (double)(distance + 1)); } inline double DefaultSimilarity::idf(int32_t docFreq, int32_t numDocs) { return (double)(std::log((double)numDocs / (double)(docFreq + 1)) + 1.0); } inline double DefaultSimilarity::coord(int32_t overlap, int32_t maxOverlap) { return (double)overlap / (double)maxOverlap; } inline void DefaultSimilarity::setDiscountOverlaps(bool v) { discountOverlaps = v; } inline bool DefaultSimilarity::getDiscountOverlaps() { return discountOverlaps; } } LucenePlusPlus-rel_3.0.9/src/core/search/DisjunctionMaxQuery.cpp000066400000000000000000000172051456444476200250120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DisjunctionMaxQuery.h" #include "_DisjunctionMaxQuery.h" #include "BooleanQuery.h" #include "DocIdSetIterator.h" #include "ComplexExplanation.h" #include "Searcher.h" #include "DisjunctionMaxScorer.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { DisjunctionMaxQuery::DisjunctionMaxQuery(double tieBreakerMultiplier) { this->tieBreakerMultiplier = tieBreakerMultiplier; this->disjuncts = Collection::newInstance(); } DisjunctionMaxQuery::DisjunctionMaxQuery(Collection disjuncts, double tieBreakerMultiplier) { this->tieBreakerMultiplier = tieBreakerMultiplier; this->disjuncts = Collection::newInstance(); add(disjuncts); } DisjunctionMaxQuery::~DisjunctionMaxQuery() { } void DisjunctionMaxQuery::add(const QueryPtr& query) { disjuncts.add(query); } void DisjunctionMaxQuery::add(Collection disjuncts) { this->disjuncts.addAll(disjuncts.begin(), disjuncts.end()); } Collection::iterator DisjunctionMaxQuery::begin() { return disjuncts.begin(); } Collection::iterator DisjunctionMaxQuery::end() { return disjuncts.end(); } WeightPtr DisjunctionMaxQuery::createWeight(const SearcherPtr& searcher) { return newLucene(shared_from_this(), searcher); } QueryPtr DisjunctionMaxQuery::rewrite(const IndexReaderPtr& reader) { int32_t numDisjunctions = disjuncts.size(); if (numDisjunctions == 1) { QueryPtr singleton(disjuncts[0]); QueryPtr result(singleton->rewrite(reader)); if (getBoost() != 1.0) { if (result == singleton) { result = boost::dynamic_pointer_cast(result->clone()); } result->setBoost(getBoost() * result->getBoost()); } return result; } DisjunctionMaxQueryPtr clone; for (int32_t i = 0; i < numDisjunctions; ++i) { QueryPtr clause(disjuncts[i]); QueryPtr rewrite(clause->rewrite(reader)); if (rewrite != clause) { if (!clone) { clone = boost::dynamic_pointer_cast(this->clone()); } clone->disjuncts[i] = rewrite; } } return clone ? clone : shared_from_this(); } LuceneObjectPtr DisjunctionMaxQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = Query::clone(other ? other : newLucene()); DisjunctionMaxQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->tieBreakerMultiplier = tieBreakerMultiplier; cloneQuery->disjuncts = Collection::newInstance(disjuncts.begin(), disjuncts.end()); return cloneQuery; } void DisjunctionMaxQuery::extractTerms(SetTerm terms) { for (Collection::iterator query = disjuncts.begin(); query != disjuncts.end(); ++query) { (*query)->extractTerms(terms); } } String DisjunctionMaxQuery::toString(const String& field) { String buffer(L"("); for (Collection::iterator query = disjuncts.begin(); query != disjuncts.end(); ++query) { if (query != disjuncts.begin()) { buffer += L" | "; } if (boost::dynamic_pointer_cast(*query)) { // wrap sub-bools in parens buffer += L"(" + (*query)->toString(field) + L")"; } else { buffer += (*query)->toString(field); } } buffer += L")"; if (tieBreakerMultiplier != 0.0) { buffer += L"~" + StringUtils::toString(tieBreakerMultiplier); } if (getBoost() != 1.0) { buffer += L"^" + StringUtils::toString(getBoost()); } return buffer; } bool DisjunctionMaxQuery::equals(const LuceneObjectPtr& other) { if (!Query::equals(other)) { return false; } DisjunctionMaxQueryPtr otherDisjunctionMaxQuery(boost::dynamic_pointer_cast(other)); if (!otherDisjunctionMaxQuery) { return false; } return (tieBreakerMultiplier == otherDisjunctionMaxQuery->tieBreakerMultiplier && disjuncts.equals(otherDisjunctionMaxQuery->disjuncts, luceneEquals())); } int32_t DisjunctionMaxQuery::hashCode() { return MiscUtils::doubleToIntBits(getBoost()) + MiscUtils::doubleToIntBits(tieBreakerMultiplier) + MiscUtils::hashCode(disjuncts.begin(), disjuncts.end(), MiscUtils::hashLucene); } DisjunctionMaxWeight::DisjunctionMaxWeight(const DisjunctionMaxQueryPtr& query, const SearcherPtr& searcher) { this->query = query; this->similarity = searcher->getSimilarity(); this->weights = Collection::newInstance(); for (Collection::iterator disjunctQuery = query->disjuncts.begin(); disjunctQuery != query->disjuncts.end(); ++disjunctQuery) { this->weights.add((*disjunctQuery)->createWeight(searcher)); } } DisjunctionMaxWeight::~DisjunctionMaxWeight() { } QueryPtr DisjunctionMaxWeight::getQuery() { return query; } double DisjunctionMaxWeight::getValue() { return query->getBoost(); } double DisjunctionMaxWeight::sumOfSquaredWeights() { double max = 0.0; double sum = 0.0; for (Collection::iterator currentWeight = weights.begin(); currentWeight != weights.end(); ++currentWeight) { double sub = (*currentWeight)->sumOfSquaredWeights(); sum += sub; max = std::max(max, sub); } double boost = query->getBoost(); return (((sum - max) * query->tieBreakerMultiplier * query->tieBreakerMultiplier) + max) * boost * boost; } void DisjunctionMaxWeight::normalize(double norm) { norm *= query->getBoost(); // Incorporate our boost for (Collection::iterator wt = weights.begin(); wt != weights.end(); ++wt) { (*wt)->normalize(norm); } } ScorerPtr DisjunctionMaxWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { Collection scorers(Collection::newInstance(weights.size())); int32_t idx = 0; for (Collection::iterator wt = weights.begin(); wt != weights.end(); ++wt) { ScorerPtr subScorer((*wt)->scorer(reader, true, false)); if (subScorer && subScorer->nextDoc() != DocIdSetIterator::NO_MORE_DOCS) { scorers[idx++] = subScorer; } } if (idx == 0) { return ScorerPtr(); // all scorers did not have documents } DisjunctionMaxScorerPtr result(newLucene(query->tieBreakerMultiplier, similarity, scorers, idx)); return result; } ExplanationPtr DisjunctionMaxWeight::explain(const IndexReaderPtr& reader, int32_t doc) { if (query->disjuncts.size() == 1) { return weights[0]->explain(reader, doc); } ComplexExplanationPtr result(newLucene()); double max = 0.0; double sum = 0.0; result->setDescription(query->tieBreakerMultiplier == 0.0 ? L"max of:" : (L"max plus " + StringUtils::toString(query->tieBreakerMultiplier) + L" times others of:")); for (Collection::iterator wt = weights.begin(); wt != weights.end(); ++wt) { ExplanationPtr e = (*wt)->explain(reader, doc); if (e->isMatch()) { result->setMatch(true); result->addDetail(e); sum += e->getValue(); max = std::max(max, e->getValue()); } } result->setValue(max + (sum - max) * query->tieBreakerMultiplier); return result; } } LucenePlusPlus-rel_3.0.9/src/core/search/DisjunctionMaxScorer.cpp000066400000000000000000000103501456444476200251340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DisjunctionMaxScorer.h" namespace Lucene { DisjunctionMaxScorer::DisjunctionMaxScorer(double tieBreakerMultiplier, const SimilarityPtr& similarity, Collection subScorers, int32_t numScorers) : Scorer(similarity) { this->doc = -1; this->tieBreakerMultiplier = tieBreakerMultiplier; // The passed subScorers array includes only scorers which have documents (DisjunctionMaxQuery takes care // of that), and their nextDoc() was already called. this->subScorers = subScorers; this->numScorers = numScorers; heapify(); } DisjunctionMaxScorer::~DisjunctionMaxScorer() { } int32_t DisjunctionMaxScorer::nextDoc() { if (numScorers == 0) { doc = NO_MORE_DOCS; return doc; } while (subScorers[0]->docID() == doc) { if (subScorers[0]->nextDoc() != NO_MORE_DOCS) { heapAdjust(0); } else { heapRemoveRoot(); if (numScorers == 0) { doc = NO_MORE_DOCS; return doc; } } } doc = subScorers[0]->docID(); return doc; } int32_t DisjunctionMaxScorer::docID() { return doc; } double DisjunctionMaxScorer::score() { int32_t doc = subScorers[0]->docID(); Collection sum(newCollection(subScorers[0]->score())); Collection max(Collection::newInstance(sum.begin(), sum.end())); int32_t size = numScorers; scoreAll(1, size, doc, sum, max); scoreAll(2, size, doc, sum, max); return max[0] + (sum[0] - max[0]) * tieBreakerMultiplier; } void DisjunctionMaxScorer::scoreAll(int32_t root, int32_t size, int32_t doc, Collection sum, Collection max) { if (root < size && subScorers[root]->docID() == doc) { double sub = subScorers[root]->score(); sum[0] += sub; max[0] = std::max(max[0], sub); scoreAll((root << 1) + 1, size, doc, sum, max); scoreAll((root << 1) + 2, size, doc, sum, max); } } int32_t DisjunctionMaxScorer::advance(int32_t target) { if (numScorers == 0) { doc = NO_MORE_DOCS; return doc; } while (subScorers[0]->docID() < target) { if (subScorers[0]->advance(target) != NO_MORE_DOCS) { heapAdjust(0); } else { heapRemoveRoot(); if (numScorers == 0) { doc = NO_MORE_DOCS; return doc; } } } doc = subScorers[0]->docID(); return doc; } void DisjunctionMaxScorer::heapify() { for (int32_t i = (numScorers >> 1) - 1; i >= 0; --i) { heapAdjust(i); } } void DisjunctionMaxScorer::heapAdjust(int32_t root) { ScorerPtr scorer(subScorers[root]); int32_t doc = scorer->docID(); int32_t i = root; while (i <= (numScorers >> 1) - 1) { int32_t lchild = (i << 1) + 1; ScorerPtr lscorer(subScorers[lchild]); int32_t ldoc = lscorer->docID(); int32_t rdoc = INT_MAX; int32_t rchild = (i << 1) + 2; ScorerPtr rscorer; if (rchild < numScorers) { rscorer = subScorers[rchild]; rdoc = rscorer->docID(); } if (ldoc < doc) { if (rdoc < ldoc) { subScorers[i] = rscorer; subScorers[rchild] = scorer; i = rchild; } else { subScorers[i] = lscorer; subScorers[lchild] = scorer; i = lchild; } } else if (rdoc < doc) { subScorers[i] = rscorer; subScorers[rchild] = scorer; i = rchild; } else { return; } } } void DisjunctionMaxScorer::heapRemoveRoot() { if (numScorers == 1) { subScorers[0].reset(); numScorers = 0; } else { subScorers[0] = subScorers[numScorers - 1]; subScorers[numScorers - 1].reset(); --numScorers; heapAdjust(0); } } } LucenePlusPlus-rel_3.0.9/src/core/search/DisjunctionSumScorer.cpp000066400000000000000000000101371456444476200251560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DisjunctionSumScorer.h" #include "ScorerDocQueue.h" #include "Collector.h" namespace Lucene { DisjunctionSumScorer::DisjunctionSumScorer(Collection subScorers, int32_t minimumNrMatchers) : Scorer(SimilarityPtr()) { this->currentDoc = -1; this->_nrMatchers = -1; this->currentScore = std::numeric_limits::quiet_NaN(); this->nrScorers = subScorers.size(); if (minimumNrMatchers <= 0) { boost::throw_exception(IllegalArgumentException(L"Minimum nr of matchers must be positive")); } if (nrScorers <= 1) { boost::throw_exception(IllegalArgumentException(L"There must be at least 2 subScorers")); } this->minimumNrMatchers = minimumNrMatchers; this->subScorers = subScorers; } DisjunctionSumScorer::~DisjunctionSumScorer() { } void DisjunctionSumScorer::initialize() { initScorerDocQueue(); } void DisjunctionSumScorer::initScorerDocQueue() { scorerDocQueue = newLucene(nrScorers); for (Collection::iterator se = subScorers.begin(); se != subScorers.end(); ++se) { if ((*se)->nextDoc() != NO_MORE_DOCS) { scorerDocQueue->insert(*se); } } } void DisjunctionSumScorer::score(const CollectorPtr& collector) { collector->setScorer(shared_from_this()); while (nextDoc() != NO_MORE_DOCS) { collector->collect(currentDoc); } } bool DisjunctionSumScorer::score(const CollectorPtr& collector, int32_t max, int32_t firstDocID) { // firstDocID is ignored since nextDoc() sets 'currentDoc' collector->setScorer(shared_from_this()); while (currentDoc < max) { collector->collect(currentDoc); if (nextDoc() == NO_MORE_DOCS) { return false; } } return true; } int32_t DisjunctionSumScorer::nextDoc() { if (scorerDocQueue->size() < minimumNrMatchers || !advanceAfterCurrent()) { currentDoc = NO_MORE_DOCS; } return currentDoc; } bool DisjunctionSumScorer::advanceAfterCurrent() { do { // repeat until minimum nr of matchers currentDoc = scorerDocQueue->topDoc(); currentScore = scorerDocQueue->topScore(); _nrMatchers = 1; do { // Until all subscorers are after currentDoc if (!scorerDocQueue->topNextAndAdjustElsePop()) { if (scorerDocQueue->size() == 0) { break; // nothing more to advance, check for last match. } } if (scorerDocQueue->topDoc() != currentDoc) { break; // All remaining subscorers are after currentDoc. } currentScore += scorerDocQueue->topScore(); ++_nrMatchers; } while (true); if (_nrMatchers >= minimumNrMatchers) { return true; } else if (scorerDocQueue->size() < minimumNrMatchers) { return false; } } while (true); } double DisjunctionSumScorer::score() { return currentScore; } int32_t DisjunctionSumScorer::docID() { return currentDoc; } int32_t DisjunctionSumScorer::nrMatchers() { return _nrMatchers; } int32_t DisjunctionSumScorer::advance(int32_t target) { if (scorerDocQueue->size() < minimumNrMatchers) { currentDoc = NO_MORE_DOCS; return currentDoc; } if (target <= currentDoc) { return currentDoc; } do { if (scorerDocQueue->topDoc() >= target) { if (!advanceAfterCurrent()) { currentDoc = NO_MORE_DOCS; } return currentDoc; } else if (!scorerDocQueue->topSkipToAndAdjustElsePop(target)) { if (scorerDocQueue->size() < minimumNrMatchers) { currentDoc = NO_MORE_DOCS; return currentDoc; } } } while (true); } } LucenePlusPlus-rel_3.0.9/src/core/search/DocIdSet.cpp000066400000000000000000000023041456444476200224550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocIdSet.h" #include "_DocIdSet.h" namespace Lucene { DocIdSet::~DocIdSet() { } bool DocIdSet::isCacheable() { return false; } DocIdSetPtr DocIdSet::EMPTY_DOCIDSET() { static DocIdSetPtr _EMPTY_DOCIDSET; LUCENE_RUN_ONCE( _EMPTY_DOCIDSET = newLucene(); CycleCheck::addStatic(_EMPTY_DOCIDSET); ); return _EMPTY_DOCIDSET; } EmptyDocIdSetIterator::~EmptyDocIdSetIterator() { } int32_t EmptyDocIdSetIterator::advance(int32_t target) { return NO_MORE_DOCS; } int32_t EmptyDocIdSetIterator::docID() { return NO_MORE_DOCS; } int32_t EmptyDocIdSetIterator::nextDoc() { return NO_MORE_DOCS; } EmptyDocIdSet::~EmptyDocIdSet() { } DocIdSetIteratorPtr EmptyDocIdSet::iterator() { return newLucene(); } bool EmptyDocIdSet::isCacheable() { return true; } } LucenePlusPlus-rel_3.0.9/src/core/search/DocIdSetIterator.cpp000066400000000000000000000012001456444476200241610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocIdSetIterator.h" namespace Lucene { /// When returned by {@link #nextDoc()}, {@link #advance(int)} and {@link #docID()} it means there /// docs in the iterator. const int32_t DocIdSetIterator::NO_MORE_DOCS = INT_MAX; DocIdSetIterator::~DocIdSetIterator() { } } LucenePlusPlus-rel_3.0.9/src/core/search/ExactPhraseScorer.cpp000066400000000000000000000030701456444476200244050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ExactPhraseScorer.h" #include "PhrasePositions.h" #include "PhraseQueue.h" namespace Lucene { ExactPhraseScorer::ExactPhraseScorer(const WeightPtr& weight, Collection tps, Collection offsets, const SimilarityPtr& similarity, ByteArray norms) : PhraseScorer(weight, tps, offsets, similarity, norms) { } ExactPhraseScorer::~ExactPhraseScorer() { } double ExactPhraseScorer::phraseFreq() { // sort list with pq pq->clear(); for (auto* __pp = __first; more && __pp; __pp = __pp->__next) { __pp->firstPosition(); pq->add(__pp); } pqToList(); // rebuild list from pq // For counting how many times the exact phrase is found in current document, just count how many // times all PhrasePosition's have exactly the same position. int32_t freq = 0; do { while (__first->position < __last->position) { // scan forward in first do { if (!__first->nextPosition()) { return freq; } } while (__first->position < __last->position); firstToLast(); } ++freq; // all equal: a match } while (__last->nextPosition()); return freq; } } LucenePlusPlus-rel_3.0.9/src/core/search/Explanation.cpp000066400000000000000000000042211456444476200233010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Explanation.h" #include "StringUtils.h" namespace Lucene { Explanation::Explanation(double value, const String& description) { this->value = value; this->description = description; } Explanation::~Explanation() { } bool Explanation::isMatch() { return (0.0 < getValue()); } double Explanation::getValue() { return value; } void Explanation::setValue(double value) { this->value = value; } String Explanation::getDescription() { return description; } void Explanation::setDescription(const String& description) { this->description = description; } String Explanation::getSummary() { return StringUtils::toString(getValue()) + L" = " + getDescription(); } Collection Explanation::getDetails() { if (!details) { return Collection(); } return Collection::newInstance(this->details.begin(), this->details.end()); } void Explanation::addDetail(const ExplanationPtr& detail) { if (!details) { details = Collection::newInstance(); } details.add(detail); } String Explanation::toString() { return toString(0); } String Explanation::toString(int32_t depth) { String buffer; for (int32_t i = 0; i < depth; ++i) { buffer += L" "; } buffer += getSummary() + L"\n"; if (details) { for (int32_t i = 0; i < details.size(); ++i) { buffer += details[i]->toString(depth + 1); } } return buffer; } String Explanation::toHtml() { String buffer(L"
    \n
  • " + getSummary() + L"
    \n"); if (details) { for (int32_t i = 0; i < details.size(); ++i) { buffer += details[i]->toHtml(); } } buffer += L"
  • \n
\n"; return buffer; } IDFExplanation::~IDFExplanation() { } } LucenePlusPlus-rel_3.0.9/src/core/search/FieldCache.cpp000066400000000000000000000207561456444476200230010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldCache.h" #include "_FieldCache.h" #include "FieldCacheImpl.h" #include "NumericUtils.h" #include "StringUtils.h" namespace Lucene { /// Indicator for StringIndex values in the cache. const int32_t FieldCache::STRING_INDEX = -1; FieldCache::~FieldCache() { } FieldCachePtr FieldCache::DEFAULT() { static FieldCacheImplPtr _DEFAULT; LUCENE_RUN_ONCE( _DEFAULT = newLucene(); CycleCheck::addStatic(_DEFAULT); ); return _DEFAULT; } ByteParserPtr FieldCache::DEFAULT_BYTE_PARSER() { static DefaultByteParserPtr _DEFAULT_BYTE_PARSER; LUCENE_RUN_ONCE( _DEFAULT_BYTE_PARSER = newLucene(); CycleCheck::addStatic(_DEFAULT_BYTE_PARSER); ); return _DEFAULT_BYTE_PARSER; } IntParserPtr FieldCache::DEFAULT_INT_PARSER() { static DefaultIntParserPtr _DEFAULT_INT_PARSER; LUCENE_RUN_ONCE( _DEFAULT_INT_PARSER = newLucene(); CycleCheck::addStatic(_DEFAULT_INT_PARSER); ); return _DEFAULT_INT_PARSER; } LongParserPtr FieldCache::DEFAULT_LONG_PARSER() { static DefaultLongParserPtr _DEFAULT_LONG_PARSER; LUCENE_RUN_ONCE( _DEFAULT_LONG_PARSER = newLucene(); CycleCheck::addStatic(_DEFAULT_LONG_PARSER); ); return _DEFAULT_LONG_PARSER; } DoubleParserPtr FieldCache::DEFAULT_DOUBLE_PARSER() { static DefaultDoubleParserPtr _DEFAULT_DOUBLE_PARSER; LUCENE_RUN_ONCE( _DEFAULT_DOUBLE_PARSER = newLucene(); CycleCheck::addStatic(_DEFAULT_DOUBLE_PARSER); ); return _DEFAULT_DOUBLE_PARSER; } IntParserPtr FieldCache::NUMERIC_UTILS_INT_PARSER() { static NumericUtilsIntParserPtr _NUMERIC_UTILS_INT_PARSER; LUCENE_RUN_ONCE( _NUMERIC_UTILS_INT_PARSER = newLucene(); CycleCheck::addStatic(_NUMERIC_UTILS_INT_PARSER); ); return _NUMERIC_UTILS_INT_PARSER; } LongParserPtr FieldCache::NUMERIC_UTILS_LONG_PARSER() { static NumericUtilsLongParserPtr _NUMERIC_UTILS_LONG_PARSER; LUCENE_RUN_ONCE( _NUMERIC_UTILS_LONG_PARSER = newLucene(); CycleCheck::addStatic(_NUMERIC_UTILS_LONG_PARSER); ); return _NUMERIC_UTILS_LONG_PARSER; } DoubleParserPtr FieldCache::NUMERIC_UTILS_DOUBLE_PARSER() { static NumericUtilsDoubleParserPtr _NUMERIC_UTILS_DOUBLE_PARSER; LUCENE_RUN_ONCE( _NUMERIC_UTILS_DOUBLE_PARSER = newLucene(); CycleCheck::addStatic(_NUMERIC_UTILS_DOUBLE_PARSER); ); return _NUMERIC_UTILS_DOUBLE_PARSER; } Collection FieldCache::getBytes(const IndexReaderPtr& reader, const String& field) { BOOST_ASSERT(false); return Collection(); // override } Collection FieldCache::getBytes(const IndexReaderPtr& reader, const String& field, const ByteParserPtr& parser) { BOOST_ASSERT(false); return Collection(); // override } Collection FieldCache::getInts(const IndexReaderPtr& reader, const String& field) { BOOST_ASSERT(false); return Collection(); // override } Collection FieldCache::getInts(const IndexReaderPtr& reader, const String& field, const IntParserPtr& parser) { BOOST_ASSERT(false); return Collection(); // override } Collection FieldCache::getLongs(const IndexReaderPtr& reader, const String& field) { BOOST_ASSERT(false); return Collection(); // override } Collection FieldCache::getLongs(const IndexReaderPtr& reader, const String& field, const LongParserPtr& parser) { BOOST_ASSERT(false); return Collection(); // override } Collection FieldCache::getDoubles(const IndexReaderPtr& reader, const String& field) { BOOST_ASSERT(false); return Collection(); // override } Collection FieldCache::getDoubles(const IndexReaderPtr& reader, const String& field, const DoubleParserPtr& parser) { BOOST_ASSERT(false); return Collection(); // override } Collection FieldCache::getStrings(const IndexReaderPtr& reader, const String& field) { BOOST_ASSERT(false); return Collection(); // override } StringIndexPtr FieldCache::getStringIndex(const IndexReaderPtr& reader, const String& field) { BOOST_ASSERT(false); return StringIndexPtr(); // override } void FieldCache::setInfoStream(const InfoStreamPtr& stream) { BOOST_ASSERT(false); // override } InfoStreamPtr FieldCache::getInfoStream() { BOOST_ASSERT(false); return InfoStreamPtr(); // override } CreationPlaceholder::~CreationPlaceholder() { } StringIndex::StringIndex(Collection values, Collection lookup) { this->order = values; this->lookup = lookup; } StringIndex::~StringIndex() { } int32_t StringIndex::binarySearchLookup(const String& key) { Collection::iterator search = std::lower_bound(lookup.begin(), lookup.end(), key); int32_t keyPos = std::distance(lookup.begin(), search); return (search == lookup.end() || key < *search) ? -(keyPos + 1) : keyPos; } Parser::~Parser() { } ByteParser::~ByteParser() { } uint8_t ByteParser::parseByte(const String& string) { return 0; // override } DefaultByteParser::~DefaultByteParser() { } uint8_t DefaultByteParser::parseByte(const String& string) { return (uint8_t)StringUtils::toInt(string); } String DefaultByteParser::toString() { return FieldCache::_getClassName() + L".DEFAULT_BYTE_PARSER"; } IntParser::~IntParser() { } int32_t IntParser::parseInt(const String& string) { return 0; // override } DefaultIntParser::~DefaultIntParser() { } int32_t DefaultIntParser::parseInt(const String& string) { return StringUtils::toInt(string); } String DefaultIntParser::toString() { return FieldCache::_getClassName() + L".DEFAULT_INT_PARSER"; } NumericUtilsIntParser::~NumericUtilsIntParser() { } int32_t NumericUtilsIntParser::parseInt(const String& string) { int32_t shift = string[0] - NumericUtils::SHIFT_START_INT; if (shift > 0 && shift <= 31) { boost::throw_exception(StopFillCacheException()); } return NumericUtils::prefixCodedToInt(string); } String NumericUtilsIntParser::toString() { return FieldCache::_getClassName() + L".NUMERIC_UTILS_INT_PARSER"; } LongParser::~LongParser() { } int64_t LongParser::parseLong(const String& string) { return 0; // override } DefaultLongParser::~DefaultLongParser() { } int64_t DefaultLongParser::parseLong(const String& string) { return StringUtils::toLong(string); } String DefaultLongParser::toString() { return FieldCache::_getClassName() + L".DEFAULT_LONG_PARSER"; } NumericUtilsLongParser::~NumericUtilsLongParser() { } int64_t NumericUtilsLongParser::parseLong(const String& string) { int32_t shift = string[0] - NumericUtils::SHIFT_START_LONG; if (shift > 0 && shift <= 63) { boost::throw_exception(StopFillCacheException()); } return NumericUtils::prefixCodedToLong(string); } String NumericUtilsLongParser::toString() { return FieldCache::_getClassName() + L".NUMERIC_UTILS_LONG_PARSER"; } DoubleParser::~DoubleParser() { } double DoubleParser::parseDouble(const String& string) { return 0; // override } DefaultDoubleParser::~DefaultDoubleParser() { } double DefaultDoubleParser::parseDouble(const String& string) { return StringUtils::toDouble(string); } String DefaultDoubleParser::toString() { return FieldCache::_getClassName() + L".DEFAULT_DOUBLE_PARSER"; } NumericUtilsDoubleParser::~NumericUtilsDoubleParser() { } double NumericUtilsDoubleParser::parseDouble(const String& string) { int32_t shift = string[0] - NumericUtils::SHIFT_START_LONG; if (shift > 0 && shift <= 63) { boost::throw_exception(StopFillCacheException()); } return NumericUtils::sortableLongToDouble(NumericUtils::prefixCodedToLong(string)); } String NumericUtilsDoubleParser::toString() { return FieldCache::_getClassName() + L".NUMERIC_UTILS_DOUBLE_PARSER"; } FieldCacheEntry::~FieldCacheEntry() { } String FieldCacheEntry::toString() { StringStream buffer; buffer << L"'" << getReaderKey()->toString() << L"'=>" << getFieldName() << L"'," << getCacheType(); return buffer.str(); } } LucenePlusPlus-rel_3.0.9/src/core/search/FieldCacheImpl.cpp000066400000000000000000000430341456444476200236150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldCacheImpl.h" #include "FieldCacheSanityChecker.h" #include "IndexReader.h" #include "InfoStream.h" #include "TermEnum.h" #include "TermDocs.h" #include "Term.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { FieldCacheImpl::FieldCacheImpl() { } FieldCacheImpl::~FieldCacheImpl() { } void FieldCacheImpl::initialize() { caches = MapStringCache::newInstance(); caches.put(CACHE_BYTE, newLucene(shared_from_this())); caches.put(CACHE_INT, newLucene(shared_from_this())); caches.put(CACHE_LONG, newLucene(shared_from_this())); caches.put(CACHE_DOUBLE, newLucene(shared_from_this())); caches.put(CACHE_STRING, newLucene(shared_from_this())); caches.put(CACHE_STRING_INDEX, newLucene(shared_from_this())); } void FieldCacheImpl::purgeAllCaches() { initialize(); } void FieldCacheImpl::purge(const IndexReaderPtr& r) { for (MapStringCache::iterator cache = caches.begin(); cache != caches.end(); ++cache) { cache->second->purge(r); } } Collection FieldCacheImpl::getCacheEntries() { Collection result(Collection::newInstance()); for (MapStringCache::iterator cache = caches.begin(); cache != caches.end(); ++cache) { for (WeakMapLuceneObjectMapEntryAny::iterator key = cache->second->readerCache.begin(); key != cache->second->readerCache.end(); ++key) { LuceneObjectPtr readerKey(key->first.lock()); // we've now materialized a hard ref if (readerKey) { for (MapEntryAny::iterator mapEntry = key->second.begin(); mapEntry != key->second.end(); ++mapEntry) { result.add(newLucene(readerKey, mapEntry->first->field, cache->first, mapEntry->first->custom, mapEntry->second)); } } } } return result; } Collection FieldCacheImpl::getBytes(const IndexReaderPtr& reader, const String& field) { return getBytes(reader, field, ByteParserPtr()); } Collection FieldCacheImpl::getBytes(const IndexReaderPtr& reader, const String& field, const ByteParserPtr& parser) { return VariantUtils::get< Collection >(caches.get(CACHE_BYTE)->get(reader, newLucene(field, parser))); } Collection FieldCacheImpl::getInts(const IndexReaderPtr& reader, const String& field) { return getInts(reader, field, IntParserPtr()); } Collection FieldCacheImpl::getInts(const IndexReaderPtr& reader, const String& field, const IntParserPtr& parser) { return VariantUtils::get< Collection >(caches.get(CACHE_INT)->get(reader, newLucene(field, parser))); } Collection FieldCacheImpl::getLongs(const IndexReaderPtr& reader, const String& field) { return getLongs(reader, field, LongParserPtr()); } Collection FieldCacheImpl::getLongs(const IndexReaderPtr& reader, const String& field, const LongParserPtr& parser) { return VariantUtils::get< Collection >(caches.get(CACHE_LONG)->get(reader, newLucene(field, parser))); } Collection FieldCacheImpl::getDoubles(const IndexReaderPtr& reader, const String& field) { return getDoubles(reader, field, DoubleParserPtr()); } Collection FieldCacheImpl::getDoubles(const IndexReaderPtr& reader, const String& field, const DoubleParserPtr& parser) { return VariantUtils::get< Collection >(caches.get(CACHE_DOUBLE)->get(reader, newLucene(field, parser))); } Collection FieldCacheImpl::getStrings(const IndexReaderPtr& reader, const String& field) { return VariantUtils::get< Collection >(caches.get(CACHE_STRING)->get(reader, newLucene(field, ParserPtr()))); } StringIndexPtr FieldCacheImpl::getStringIndex(const IndexReaderPtr& reader, const String& field) { return VariantUtils::get< StringIndexPtr >(caches.get(CACHE_STRING_INDEX)->get(reader, newLucene(field, ParserPtr()))); } void FieldCacheImpl::setInfoStream(const InfoStreamPtr& stream) { infoStream = stream; } InfoStreamPtr FieldCacheImpl::getInfoStream() { return infoStream; } Entry::Entry(const String& field, const boost::any& custom) { this->field = field; this->custom = custom; } Entry::~Entry() { } bool Entry::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } EntryPtr otherEntry(boost::dynamic_pointer_cast(other)); if (otherEntry) { if (otherEntry->field == field) { return VariantUtils::equalsType(custom, otherEntry->custom); } } return false; } int32_t Entry::hashCode() { return StringUtils::hashCode(field) ^ VariantUtils::hashCode(custom); } Cache::Cache(const FieldCachePtr& wrapper) { this->_wrapper = wrapper; this->readerCache = WeakMapLuceneObjectMapEntryAny::newInstance(); } Cache::~Cache() { } void Cache::purge(const IndexReaderPtr& r) { LuceneObjectPtr readerKey(r->getFieldCacheKey()); SyncLock cacheLock(&readerCache); readerCache.remove(readerKey); } boost::any Cache::get(const IndexReaderPtr& reader, const EntryPtr& key) { MapEntryAny innerCache; boost::any value; LuceneObjectPtr readerKey(reader->getFieldCacheKey()); { SyncLock cacheLock(&readerCache); innerCache = readerCache.get(readerKey); if (!innerCache) { innerCache = MapEntryAny::newInstance(); readerCache.put(readerKey, innerCache); } else if (innerCache.contains(key)) { value = innerCache[key]; } if (VariantUtils::isNull(value)) { value = newLucene(); innerCache.put(key, value); } } if (VariantUtils::typeOf(value)) { CreationPlaceholderPtr progress(VariantUtils::get(value)); SyncLock valueLock(progress); if (VariantUtils::isNull(progress->value)) { progress->value = createValue(reader, key); { SyncLock cacheLock(&readerCache); innerCache.put(key, progress->value); } FieldCachePtr wrapper(_wrapper); // Only check if key.custom (the parser) is non-null; else, we check twice for a single // call to FieldCache.getXXX if (!VariantUtils::isNull(key->custom) && wrapper) { InfoStreamPtr infoStream(wrapper->getInfoStream()); if (infoStream) { printNewInsanity(infoStream, progress->value); } } } return progress->value; } return value; } void Cache::printNewInsanity(const InfoStreamPtr& infoStream, const boost::any& value) { Collection insanities(FieldCacheSanityChecker::checkSanity(FieldCachePtr(_wrapper))); for (Collection::iterator insanity = insanities.begin(); insanity != insanities.end(); ++insanity) { Collection entries((*insanity)->getCacheEntries()); for (Collection::iterator entry = entries.begin(); entry != entries.end(); ++entry) { if (VariantUtils::equalsType((*entry)->getValue(), value)) { // OK this insanity involves our entry *infoStream << L"WARNING: new FieldCache insanity created\nDetails: " + (*insanity)->toString() << L"\n"; break; } } } } ByteCache::ByteCache(const FieldCachePtr& wrapper) : Cache(wrapper) { } ByteCache::~ByteCache() { } boost::any ByteCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) { EntryPtr entry(key); String field(entry->field); ByteParserPtr parser(VariantUtils::get(entry->custom)); if (!parser) { return FieldCachePtr(_wrapper)->getBytes(reader, field, FieldCache::DEFAULT_BYTE_PARSER()); } Collection retArray(Collection::newInstance(reader->maxDoc())); TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) { break; } uint8_t termval = parser->parseByte(term->text()); termDocs->seek(termEnum); while (termDocs->next()) { retArray[termDocs->doc()] = termval; } } while (termEnum->next()); } catch (StopFillCacheException&) { } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); return retArray; } IntCache::IntCache(const FieldCachePtr& wrapper) : Cache(wrapper) { } IntCache::~IntCache() { } boost::any IntCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) { EntryPtr entry(key); String field(entry->field); IntParserPtr parser(VariantUtils::get(entry->custom)); if (!parser) { FieldCachePtr wrapper(_wrapper); boost::any ints; try { ints = wrapper->getInts(reader, field, FieldCache::DEFAULT_INT_PARSER()); } catch (NumberFormatException&) { ints = wrapper->getInts(reader, field, FieldCache::NUMERIC_UTILS_INT_PARSER()); } return ints; } Collection retArray; TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) { break; } int32_t termval = parser->parseInt(term->text()); if (!retArray) { // late init retArray = Collection::newInstance(reader->maxDoc()); } termDocs->seek(termEnum); while (termDocs->next()) { retArray[termDocs->doc()] = termval; } } while (termEnum->next()); } catch (StopFillCacheException&) { } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); if (!retArray) { // no values retArray = Collection::newInstance(reader->maxDoc()); } return retArray; } LongCache::LongCache(const FieldCachePtr& wrapper) : Cache(wrapper) { } LongCache::~LongCache() { } boost::any LongCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) { EntryPtr entry(key); String field(entry->field); LongParserPtr parser(VariantUtils::get(entry->custom)); if (!parser) { FieldCachePtr wrapper(_wrapper); boost::any longs; try { longs = wrapper->getLongs(reader, field, FieldCache::DEFAULT_LONG_PARSER()); } catch (NumberFormatException&) { longs = wrapper->getLongs(reader, field, FieldCache::NUMERIC_UTILS_LONG_PARSER()); } return longs; } Collection retArray; TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) { break; } int64_t termval = parser->parseLong(term->text()); if (!retArray) { // late init retArray = Collection::newInstance(reader->maxDoc()); } termDocs->seek(termEnum); while (termDocs->next()) { retArray[termDocs->doc()] = termval; } } while (termEnum->next()); } catch (StopFillCacheException&) { } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); if (!retArray) { // no values retArray = Collection::newInstance(reader->maxDoc()); } return retArray; } DoubleCache::DoubleCache(const FieldCachePtr& wrapper) : Cache(wrapper) { } DoubleCache::~DoubleCache() { } boost::any DoubleCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) { EntryPtr entry(key); String field(entry->field); DoubleParserPtr parser(VariantUtils::get(entry->custom)); if (!parser) { FieldCachePtr wrapper(_wrapper); boost::any doubles; try { doubles = wrapper->getDoubles(reader, field, FieldCache::DEFAULT_DOUBLE_PARSER()); } catch (NumberFormatException&) { doubles = wrapper->getDoubles(reader, field, FieldCache::NUMERIC_UTILS_DOUBLE_PARSER()); } return doubles; } Collection retArray; TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) { break; } double termval = parser->parseDouble(term->text()); if (!retArray) { // late init retArray = Collection::newInstance(reader->maxDoc()); } termDocs->seek(termEnum); while (termDocs->next()) { retArray[termDocs->doc()] = termval; } } while (termEnum->next()); } catch (StopFillCacheException&) { } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); if (!retArray) { // no values retArray = Collection::newInstance(reader->maxDoc()); } return retArray; } StringCache::StringCache(const FieldCachePtr& wrapper) : Cache(wrapper) { } StringCache::~StringCache() { } boost::any StringCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) { EntryPtr entry(key); String field(entry->field); Collection retArray(Collection::newInstance(reader->maxDoc())); TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene(field))); LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field) { break; } String termval(term->text()); termDocs->seek(termEnum); while (termDocs->next()) { retArray[termDocs->doc()] = termval; } } while (termEnum->next()); } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); return retArray; } StringIndexCache::StringIndexCache(const FieldCachePtr& wrapper) : Cache(wrapper) { } StringIndexCache::~StringIndexCache() { } boost::any StringIndexCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) { EntryPtr entry(key); String field(entry->field); Collection retArray(Collection::newInstance(reader->maxDoc())); Collection mterms(Collection::newInstance(reader->maxDoc() + 1)); TermDocsPtr termDocs(reader->termDocs()); TermEnumPtr termEnum(reader->terms(newLucene(field))); int32_t t = 0; // current term number // an entry for documents that have no terms in this field should a document with no terms be at // top or bottom? This puts them at the top - if it is changed, FieldDocSortedHitQueue needs to // change as well. mterms[t++] = L""; LuceneException finally; try { do { TermPtr term(termEnum->term()); if (!term || term->field() != field || t >= mterms.size() ) { break; } // store term text mterms[t] = term->text(); termDocs->seek(termEnum); while (termDocs->next()) { retArray[termDocs->doc()] = t; } ++t; } while (termEnum->next()); } catch (LuceneException& e) { finally = e; } termDocs->close(); termEnum->close(); finally.throwException(); if (t == 0) { // if there are no terms, make the term array have a single null entry mterms = Collection::newInstance(1); } else if (t < mterms.size()) { // if there are less terms than documents, trim off the dead array space mterms.resize(t); } return newLucene(retArray, mterms); } FieldCacheEntryImpl::FieldCacheEntryImpl(const LuceneObjectPtr& readerKey, const String& fieldName, int32_t cacheType, const boost::any& custom, const boost::any& value) { this->readerKey = readerKey; this->fieldName = fieldName; this->cacheType = cacheType; this->custom = custom; this->value = value; } FieldCacheEntryImpl::~FieldCacheEntryImpl() { } LuceneObjectPtr FieldCacheEntryImpl::getReaderKey() { return readerKey; } String FieldCacheEntryImpl::getFieldName() { return fieldName; } int32_t FieldCacheEntryImpl::getCacheType() { return cacheType; } boost::any FieldCacheEntryImpl::getCustom() { return custom; } boost::any FieldCacheEntryImpl::getValue() { return value; } } LucenePlusPlus-rel_3.0.9/src/core/search/FieldCacheRangeFilter.cpp000066400000000000000000000343741456444476200251250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldCacheRangeFilter.h" #include "_FieldCacheRangeFilter.h" #include "FieldCache.h" #include "IndexReader.h" #include "TermDocs.h" #include "NumericUtils.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { FieldCacheRangeFilter::FieldCacheRangeFilter(const String& field, const ParserPtr& parser, bool includeLower, bool includeUpper) { this->field = field; this->parser = parser; this->includeLower = includeLower; this->includeUpper = includeUpper; } FieldCacheRangeFilter::~FieldCacheRangeFilter() { } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newStringRange(const String& field, const String& lowerVal, const String& upperVal, bool includeLower, bool includeUpper) { return newLucene(field, ParserPtr(), lowerVal, upperVal, includeLower, includeUpper); } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newByteRange(const String& field, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper) { return newByteRange(field, ByteParserPtr(), lowerVal, upperVal, includeLower, includeUpper); } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newByteRange(const String& field, const ByteParserPtr& parser, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper) { return newLucene(field, parser, lowerVal, upperVal, includeLower, includeUpper); } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newIntRange(const String& field, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper) { return newIntRange(field, IntParserPtr(), lowerVal, upperVal, includeLower, includeUpper); } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newIntRange(const String& field, const IntParserPtr& parser, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper) { return newLucene(field, parser, lowerVal, upperVal, includeLower, includeUpper); } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newLongRange(const String& field, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper) { return newLongRange(field, LongParserPtr(), lowerVal, upperVal, includeLower, includeUpper); } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newLongRange(const String& field, const LongParserPtr& parser, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper) { return newLucene(field, parser, lowerVal, upperVal, includeLower, includeUpper); } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newDoubleRange(const String& field, double lowerVal, double upperVal, bool includeLower, bool includeUpper) { return newDoubleRange(field, DoubleParserPtr(), lowerVal, upperVal, includeLower, includeUpper); } FieldCacheRangeFilterPtr FieldCacheRangeFilter::newDoubleRange(const String& field, const DoubleParserPtr& parser, double lowerVal, double upperVal, bool includeLower, bool includeUpper) { return newLucene(field, parser, lowerVal, upperVal, includeLower, includeUpper); } String FieldCacheRangeFilter::getField() { return field; } bool FieldCacheRangeFilter::includesLower() { return includeLower; } bool FieldCacheRangeFilter::includesUpper() { return includeUpper; } ParserPtr FieldCacheRangeFilter::getParser() { return parser; } FieldCacheRangeFilterString::FieldCacheRangeFilterString(const String& field, const ParserPtr& parser, const String& lowerVal, const String& upperVal, bool includeLower, bool includeUpper) : FieldCacheRangeFilter(field, parser, includeLower, includeUpper) { this->lowerVal = lowerVal; this->upperVal = upperVal; } FieldCacheRangeFilterString::~FieldCacheRangeFilterString() { } DocIdSetPtr FieldCacheRangeFilterString::getDocIdSet(const IndexReaderPtr& reader) { StringIndexPtr fcsi(FieldCache::DEFAULT()->getStringIndex(reader, field)); int32_t lowerPoint = fcsi->binarySearchLookup(lowerVal); int32_t upperPoint = fcsi->binarySearchLookup(upperVal); int32_t inclusiveLowerPoint = 0; int32_t inclusiveUpperPoint = 0; // Hints: // * binarySearchLookup returns 0, if value was null. // * the value is <0 if no exact hit was found, the returned value is (-(insertion point) - 1) if (lowerPoint == 0) { BOOST_ASSERT(lowerVal.empty()); inclusiveLowerPoint = 1; } else if (includeLower && lowerPoint > 0) { inclusiveLowerPoint = lowerPoint; } else if (lowerPoint > 0) { inclusiveLowerPoint = lowerPoint + 1; } else { inclusiveLowerPoint = std::max((int32_t)1, -lowerPoint - 1); } if (upperPoint == 0) { BOOST_ASSERT(upperVal.empty()); inclusiveUpperPoint = INT_MAX; } else if (includeUpper && upperPoint > 0) { inclusiveUpperPoint = upperPoint; } else if (upperPoint > 0) { inclusiveUpperPoint = upperPoint - 1; } else { inclusiveUpperPoint = -upperPoint - 2; } if (inclusiveUpperPoint <= 0 || inclusiveLowerPoint > inclusiveUpperPoint) { return DocIdSet::EMPTY_DOCIDSET(); } BOOST_ASSERT(inclusiveLowerPoint > 0 && inclusiveUpperPoint > 0); // for this DocIdSet, we never need to use TermDocs, because deleted docs have an order of 0 // (null entry in StringIndex) return newLucene(reader, false, fcsi, inclusiveLowerPoint, inclusiveUpperPoint); } String FieldCacheRangeFilterString::toString() { StringStream buffer; buffer << field << L":" << (includeLower ? L"[" : L"{"); buffer << lowerVal << L" TO " << lowerVal; buffer << (includeLower ? L"]" : L"}"); return buffer.str(); } bool FieldCacheRangeFilterString::equals(const LuceneObjectPtr& other) { if (Filter::equals(other)) { return true; } FieldCacheRangeFilterStringPtr otherFilter(boost::dynamic_pointer_cast(other)); if (!otherFilter) { return false; } if (field != otherFilter->field || includeLower != otherFilter->includeLower || includeUpper != otherFilter->includeUpper) { return false; } if (lowerVal != otherFilter->lowerVal || upperVal != otherFilter->upperVal) { return false; } if (parser.get() != NULL ? !parser->equals(otherFilter->parser) : otherFilter->parser.get() != NULL) { return false; } return true; } int32_t FieldCacheRangeFilterString::hashCode() { int32_t code = StringUtils::hashCode(field); code ^= lowerVal.empty() ? 550356204 : StringUtils::hashCode(lowerVal); code = (code << 1) | MiscUtils::unsignedShift(code, 31); // rotate to distinguish lower from upper code ^= upperVal.empty() ? -1674416163 : StringUtils::hashCode(upperVal); code ^= parser ? parser->hashCode() : -1572457324; code ^= (includeLower ? 1549299360 : -365038026) ^ (includeUpper ? 1721088258 : 1948649653); return code; } FieldCacheRangeFilterByte::FieldCacheRangeFilterByte(const String& field, const ParserPtr& parser, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper) : FieldCacheRangeFilterNumeric(field, parser, lowerVal, upperVal, UCHAR_MAX, includeLower, includeUpper) { } FieldCacheRangeFilterByte::~FieldCacheRangeFilterByte() { } Collection FieldCacheRangeFilterByte::getValues(const IndexReaderPtr& reader) { return FieldCache::DEFAULT()->getBytes(reader, field, boost::static_pointer_cast(parser)); } FieldCacheRangeFilterInt::FieldCacheRangeFilterInt(const String& field, const ParserPtr& parser, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper) : FieldCacheRangeFilterNumeric(field, parser, lowerVal, upperVal, INT_MAX, includeLower, includeUpper) { } FieldCacheRangeFilterInt::~FieldCacheRangeFilterInt() { } Collection FieldCacheRangeFilterInt::getValues(const IndexReaderPtr& reader) { return FieldCache::DEFAULT()->getInts(reader, field, boost::static_pointer_cast(parser)); } FieldCacheRangeFilterLong::FieldCacheRangeFilterLong(const String& field, const ParserPtr& parser, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper) : FieldCacheRangeFilterNumeric(field, parser, lowerVal, upperVal, std::numeric_limits::max(), includeLower, includeUpper) { } FieldCacheRangeFilterLong::~FieldCacheRangeFilterLong() { } Collection FieldCacheRangeFilterLong::getValues(const IndexReaderPtr& reader) { return FieldCache::DEFAULT()->getLongs(reader, field, boost::static_pointer_cast(parser)); } FieldCacheRangeFilterDouble::FieldCacheRangeFilterDouble(const String& field, const ParserPtr& parser, double lowerVal, double upperVal, bool includeLower, bool includeUpper) : FieldCacheRangeFilterNumeric(field, parser, lowerVal, upperVal, std::numeric_limits::infinity(), includeLower, includeUpper) { } FieldCacheRangeFilterDouble::~FieldCacheRangeFilterDouble() { } DocIdSetPtr FieldCacheRangeFilterDouble::getDocIdSet(const IndexReaderPtr& reader) { if (!includeLower && lowerVal > 0.0 && MiscUtils::isInfinite(lowerVal)) { return DocIdSet::EMPTY_DOCIDSET(); } int64_t lower = NumericUtils::doubleToSortableLong(lowerVal); double inclusiveLowerPoint = NumericUtils::sortableLongToDouble(includeLower ? lower : (lower + 1)); if (!includeUpper && upperVal < 0.0 && MiscUtils::isInfinite(upperVal)) { return DocIdSet::EMPTY_DOCIDSET(); } int64_t upper = NumericUtils::doubleToSortableLong(upperVal); double inclusiveUpperPoint = NumericUtils::sortableLongToDouble(includeUpper ? upper : (upper - 1)); if (inclusiveLowerPoint > inclusiveUpperPoint) { return DocIdSet::EMPTY_DOCIDSET(); } // we only request the usage of termDocs, if the range contains 0 return newLucene< FieldCacheDocIdSetNumeric >(reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0), getValues(reader), inclusiveLowerPoint, inclusiveUpperPoint); } Collection FieldCacheRangeFilterDouble::getValues(const IndexReaderPtr& reader) { return FieldCache::DEFAULT()->getDoubles(reader, field, boost::static_pointer_cast(parser)); } FieldCacheDocIdSet::FieldCacheDocIdSet(const IndexReaderPtr& reader, bool mayUseTermDocs) { this->reader = reader; this->mayUseTermDocs = mayUseTermDocs; } FieldCacheDocIdSet::~FieldCacheDocIdSet() { } bool FieldCacheDocIdSet::isCacheable() { return !(mayUseTermDocs && reader->hasDeletions()); } DocIdSetIteratorPtr FieldCacheDocIdSet::iterator() { // Synchronization needed because deleted docs BitVector can change after call to hasDeletions until // TermDocs creation. We only use an iterator with termDocs, when this was requested (eg. range // contains 0) and the index has deletions TermDocsPtr termDocs; { SyncLock instancesLock(reader); termDocs = isCacheable() ? TermDocsPtr() : reader->termDocs(TermPtr()); } if (termDocs) { // a DocIdSetIterator using TermDocs to iterate valid docIds return newLucene(shared_from_this(), termDocs); } else { // a DocIdSetIterator generating docIds by incrementing a variable - this one can be used if there // are no deletions are on the index return newLucene(shared_from_this()); } } FieldCacheDocIdSetString::FieldCacheDocIdSetString(const IndexReaderPtr& reader, bool mayUseTermDocs, const StringIndexPtr& fcsi, int32_t inclusiveLowerPoint, int32_t inclusiveUpperPoint) : FieldCacheDocIdSet(reader, mayUseTermDocs) { this->fcsi = fcsi; this->inclusiveLowerPoint = inclusiveLowerPoint; this->inclusiveUpperPoint = inclusiveUpperPoint; } FieldCacheDocIdSetString::~FieldCacheDocIdSetString() { } bool FieldCacheDocIdSetString::matchDoc(int32_t doc) { if (doc < 0 || doc >= fcsi->order.size()) { boost::throw_exception(IndexOutOfBoundsException()); } return (fcsi->order[doc] >= inclusiveLowerPoint && fcsi->order[doc] <= inclusiveUpperPoint); } FieldDocIdSetIteratorTermDocs::FieldDocIdSetIteratorTermDocs(const FieldCacheDocIdSetPtr& cacheDocIdSet, const TermDocsPtr& termDocs) { this->_cacheDocIdSet = cacheDocIdSet; this->termDocs = termDocs; this->doc = -1; } FieldDocIdSetIteratorTermDocs::~FieldDocIdSetIteratorTermDocs() { } int32_t FieldDocIdSetIteratorTermDocs::docID() { return doc; } int32_t FieldDocIdSetIteratorTermDocs::nextDoc() { FieldCacheDocIdSetPtr cacheDocIdSet(_cacheDocIdSet); do { if (!termDocs->next()) { doc = NO_MORE_DOCS; return doc; } } while (!cacheDocIdSet->matchDoc(doc = termDocs->doc())); return doc; } int32_t FieldDocIdSetIteratorTermDocs::advance(int32_t target) { FieldCacheDocIdSetPtr cacheDocIdSet(_cacheDocIdSet); if (!termDocs->skipTo(target)) { doc = NO_MORE_DOCS; return doc; } while (!cacheDocIdSet->matchDoc(doc = termDocs->doc())) { if (!termDocs->next()) { doc = NO_MORE_DOCS; return doc; } } return doc; } FieldDocIdSetIteratorIncrement::FieldDocIdSetIteratorIncrement(const FieldCacheDocIdSetPtr& cacheDocIdSet) { this->_cacheDocIdSet = cacheDocIdSet; this->doc = -1; } FieldDocIdSetIteratorIncrement::~FieldDocIdSetIteratorIncrement() { } int32_t FieldDocIdSetIteratorIncrement::docID() { return doc; } int32_t FieldDocIdSetIteratorIncrement::nextDoc() { FieldCacheDocIdSetPtr cacheDocIdSet(_cacheDocIdSet); try { do { ++doc; } while (!cacheDocIdSet->matchDoc(doc)); return doc; } catch (IndexOutOfBoundsException&) { doc = NO_MORE_DOCS; return doc; } } int32_t FieldDocIdSetIteratorIncrement::advance(int32_t target) { FieldCacheDocIdSetPtr cacheDocIdSet(_cacheDocIdSet); try { doc = target; while (!cacheDocIdSet->matchDoc(doc)) { ++doc; } return doc; } catch (IndexOutOfBoundsException&) { doc = NO_MORE_DOCS; return doc; } } } LucenePlusPlus-rel_3.0.9/src/core/search/FieldCacheTermsFilter.cpp000066400000000000000000000060541456444476200251550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldCacheTermsFilter.h" #include "_FieldCacheTermsFilter.h" #include "FieldCache.h" #include "OpenBitSet.h" namespace Lucene { FieldCacheTermsFilter::FieldCacheTermsFilter(const String& field, Collection terms) { this->field = field; this->terms = terms; } FieldCacheTermsFilter::~FieldCacheTermsFilter() { } FieldCachePtr FieldCacheTermsFilter::getFieldCache() { return FieldCache::DEFAULT(); } DocIdSetPtr FieldCacheTermsFilter::getDocIdSet(const IndexReaderPtr& reader) { return newLucene(terms, getFieldCache()->getStringIndex(reader, field)); } FieldCacheTermsFilterDocIdSet::FieldCacheTermsFilterDocIdSet(Collection terms, const StringIndexPtr& fcsi) { this->fcsi = fcsi; openBitSet = newLucene(this->fcsi->lookup.size()); for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) { int32_t termNumber = this->fcsi->binarySearchLookup(*term); if (termNumber > 0) { openBitSet->set(termNumber); } } } FieldCacheTermsFilterDocIdSet::~FieldCacheTermsFilterDocIdSet() { } DocIdSetIteratorPtr FieldCacheTermsFilterDocIdSet::iterator() { return newLucene(fcsi, openBitSet); } bool FieldCacheTermsFilterDocIdSet::isCacheable() { return true; } FieldCacheTermsFilterDocIdSetIterator::FieldCacheTermsFilterDocIdSetIterator(const StringIndexPtr& fcsi, const OpenBitSetPtr& openBitSet) { this->fcsi = fcsi; this->openBitSet = openBitSet; this->doc = -1; } FieldCacheTermsFilterDocIdSetIterator::~FieldCacheTermsFilterDocIdSetIterator() { } int32_t FieldCacheTermsFilterDocIdSetIterator::docID() { return doc; } int32_t FieldCacheTermsFilterDocIdSetIterator::nextDoc() { try { if (++doc >= fcsi->order.size()) { boost::throw_exception(IndexOutOfBoundsException()); } while (!openBitSet->fastGet(fcsi->order[doc])) { if (++doc >= fcsi->order.size()) { boost::throw_exception(IndexOutOfBoundsException()); } } } catch (IndexOutOfBoundsException&) { doc = NO_MORE_DOCS; } return doc; } int32_t FieldCacheTermsFilterDocIdSetIterator::advance(int32_t target) { try { doc = target; if (doc < 0 || doc >= fcsi->order.size()) { boost::throw_exception(IndexOutOfBoundsException()); } while (!openBitSet->fastGet(fcsi->order[doc])) { if (++doc >= fcsi->order.size()) { boost::throw_exception(IndexOutOfBoundsException()); } } } catch (IndexOutOfBoundsException&) { doc = NO_MORE_DOCS; } return doc; } } LucenePlusPlus-rel_3.0.9/src/core/search/FieldComparator.cpp000066400000000000000000000237121456444476200241000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldComparator.h" #include "FieldCache.h" #include "ScoreCachingWrappingScorer.h" #include "Collator.h" namespace Lucene { FieldComparator::~FieldComparator() { } void FieldComparator::setScorer(const ScorerPtr& scorer) { // Empty implementation since most comparators don't need the score. // This can be overridden by those that need it. } ByteComparator::ByteComparator(int32_t numHits, const String& field, const ParserPtr& parser) : NumericComparator(numHits, field) { this->parser = boost::static_pointer_cast(parser); } ByteComparator::~ByteComparator() { } void ByteComparator::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { currentReaderValues = FieldCache::DEFAULT()->getBytes(reader, field, parser); } DocComparator::DocComparator(int32_t numHits) : NumericComparator(numHits) { this->docBase = 0; } DocComparator::~DocComparator() { } int32_t DocComparator::compareBottom(int32_t doc) { // No overflow risk because docIDs are non-negative return (bottom - (docBase + doc)); } void DocComparator::copy(int32_t slot, int32_t doc) { values[slot] = docBase + doc; } void DocComparator::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { this->docBase = docBase; } DoubleComparator::DoubleComparator(int32_t numHits, const String& field, const ParserPtr& parser) : NumericComparator(numHits, field) { this->parser = boost::static_pointer_cast(parser); } DoubleComparator::~DoubleComparator() { } int32_t DoubleComparator::compare(int32_t slot1, int32_t slot2) { double v1 = values[slot1]; double v2 = values[slot2]; return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); } int32_t DoubleComparator::compareBottom(int32_t doc) { double v2 = currentReaderValues[doc]; return bottom > v2 ? 1 : (bottom < v2 ? -1 : 0); } void DoubleComparator::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { currentReaderValues = FieldCache::DEFAULT()->getDoubles(reader, field, parser); } IntComparator::IntComparator(int32_t numHits, const String& field, const ParserPtr& parser) : NumericComparator(numHits, field) { this->parser = boost::static_pointer_cast(parser); } IntComparator::~IntComparator() { } int32_t IntComparator::compare(int32_t slot1, int32_t slot2) { int32_t v1 = values[slot1]; int32_t v2 = values[slot2]; return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); } int32_t IntComparator::compareBottom(int32_t doc) { int32_t v2 = currentReaderValues[doc]; return bottom > v2 ? 1 : (bottom < v2 ? -1 : 0); } void IntComparator::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { currentReaderValues = FieldCache::DEFAULT()->getInts(reader, field, parser); } LongComparator::LongComparator(int32_t numHits, const String& field, const ParserPtr& parser) : NumericComparator(numHits, field) { this->parser = boost::static_pointer_cast(parser); } LongComparator::~LongComparator() { } int32_t LongComparator::compare(int32_t slot1, int32_t slot2) { int64_t v1 = values[slot1]; int64_t v2 = values[slot2]; return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); } int32_t LongComparator::compareBottom(int32_t doc) { int64_t v2 = currentReaderValues[doc]; return bottom > v2 ? 1 : (bottom < v2 ? -1 : 0); } void LongComparator::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { currentReaderValues = FieldCache::DEFAULT()->getLongs(reader, field, parser); } RelevanceComparator::RelevanceComparator(int32_t numHits) : NumericComparator(numHits) { } RelevanceComparator::~RelevanceComparator() { } int32_t RelevanceComparator::compare(int32_t slot1, int32_t slot2) { double score1 = values[slot1]; double score2 = values[slot2]; return score1 > score2 ? -1 : (score1 < score2 ? 1 : 0); } int32_t RelevanceComparator::compareBottom(int32_t doc) { double score = scorer->score(); return bottom > score ? -1 : (bottom < score ? 1 : 0); } void RelevanceComparator::copy(int32_t slot, int32_t doc) { values[slot] = scorer->score(); } void RelevanceComparator::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { } void RelevanceComparator::setScorer(const ScorerPtr& scorer) { this->scorer = newLucene(scorer); } StringComparatorLocale::StringComparatorLocale(int32_t numHits, const String& field, const std::locale& locale) : collator(newLucene(locale)) { this->values = Collection::newInstance(numHits); this->field = field; } StringComparatorLocale::~StringComparatorLocale() { } int32_t StringComparatorLocale::compare(int32_t slot1, int32_t slot2) { return collator->compare(values[slot1], values[slot2]); } int32_t StringComparatorLocale::compareBottom(int32_t doc) { return collator->compare(bottom, currentReaderValues[doc]); } void StringComparatorLocale::copy(int32_t slot, int32_t doc) { values[slot] = currentReaderValues[doc]; } void StringComparatorLocale::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { currentReaderValues = FieldCache::DEFAULT()->getStrings(reader, field); } void StringComparatorLocale::setBottom(int32_t slot) { bottom = values[slot]; } ComparableValue StringComparatorLocale::value(int32_t slot) { return values[slot]; } StringOrdValComparator::StringOrdValComparator(int32_t numHits, const String& field, int32_t sortPos, bool reversed) { this->ords = Collection::newInstance(numHits); this->values = Collection::newInstance(numHits); this->readerGen = Collection::newInstance(numHits); this->sortPos = sortPos; this->reversed = reversed; this->field = field; this->currentReaderGen = -1; this->bottomSlot = -1; this->bottomOrd = 0; } StringOrdValComparator::~StringOrdValComparator() { } int32_t StringOrdValComparator::compare(int32_t slot1, int32_t slot2) { if (readerGen[slot1] == readerGen[slot2]) { int32_t cmp = ords[slot1] - ords[slot2]; if (cmp != 0) { return cmp; } } return values[slot1].compare(values[slot2]); } int32_t StringOrdValComparator::compareBottom(int32_t doc) { BOOST_ASSERT(bottomSlot != -1); int32_t order = this->order[doc]; int32_t cmp = bottomOrd - order; if (cmp != 0) { return cmp; } return bottomValue.compare(lookup[order]); } void StringOrdValComparator::convert(int32_t slot) { readerGen[slot] = currentReaderGen; int32_t index = 0; String value(values[slot]); if (value.empty()) { ords[slot] = 0; return; } if (sortPos == 0 && bottomSlot != -1 && bottomSlot != slot) { // Since we are the primary sort, the entries in the queue are bounded by bottomOrd BOOST_ASSERT(bottomOrd < lookup.size()); if (reversed) { index = binarySearch(lookup, value, bottomOrd, lookup.size() - 1); } else { index = binarySearch(lookup, value, 0, bottomOrd); } } else { // Full binary search index = binarySearch(lookup, value, 0, lookup.size() - 1); } if (index < 0) { index = -index - 2; } ords[slot] = index; } int32_t StringOrdValComparator::binarySearch(Collection lookup, const String& key, int32_t low, int32_t high) { Collection::iterator search = std::lower_bound(lookup.begin() + low, lookup.begin() + high, key); int32_t keyPos = std::distance(lookup.begin(), search); return (search == lookup.end() || key < *search) ? -(keyPos + 1) : keyPos; } void StringOrdValComparator::copy(int32_t slot, int32_t doc) { int32_t ord = order[doc]; ords[slot] = ord; BOOST_ASSERT(ord >= 0); values[slot] = lookup[ord]; readerGen[slot] = currentReaderGen; } void StringOrdValComparator::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { StringIndexPtr currentReaderValues(FieldCache::DEFAULT()->getStringIndex(reader, field)); ++currentReaderGen; order = currentReaderValues->order; lookup = currentReaderValues->lookup; BOOST_ASSERT(!lookup.empty()); if (bottomSlot != -1) { convert(bottomSlot); bottomOrd = ords[bottomSlot]; } } void StringOrdValComparator::setBottom(int32_t slot) { bottomSlot = slot; if (readerGen[slot] != currentReaderGen) { convert(bottomSlot); } bottomOrd = ords[slot]; BOOST_ASSERT(bottomOrd >= 0); BOOST_ASSERT(bottomOrd < lookup.size()); bottomValue = values[slot]; } ComparableValue StringOrdValComparator::value(int32_t slot) { return values[slot]; } Collection StringOrdValComparator::getValues() { return values; } int32_t StringOrdValComparator::getBottomSlot() { return bottomSlot; } String StringOrdValComparator::getField() { return field; } StringValComparator::StringValComparator(int32_t numHits, const String& field) { this->values = Collection::newInstance(numHits); this->field = field; } StringValComparator::~StringValComparator() { } int32_t StringValComparator::compare(int32_t slot1, int32_t slot2) { return values[slot1].compare(values[slot2]); } int32_t StringValComparator::compareBottom(int32_t doc) { return bottom.compare(currentReaderValues[doc]); } void StringValComparator::copy(int32_t slot, int32_t doc) { values[slot] = currentReaderValues[doc]; } void StringValComparator::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { currentReaderValues = FieldCache::DEFAULT()->getStrings(reader, field); } void StringValComparator::setBottom(int32_t slot) { bottom = values[slot]; } ComparableValue StringValComparator::value(int32_t slot) { return values[slot]; } } LucenePlusPlus-rel_3.0.9/src/core/search/FieldComparatorSource.cpp000066400000000000000000000007311456444476200252550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldComparatorSource.h" namespace Lucene { FieldComparatorSource::~FieldComparatorSource() { } } LucenePlusPlus-rel_3.0.9/src/core/search/FieldDoc.cpp000066400000000000000000000016451456444476200224770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldDoc.h" namespace Lucene { FieldDoc::FieldDoc(int32_t doc, double score, Collection fields) : ScoreDoc(doc, score) { this->fields = fields; } FieldDoc::~FieldDoc() { } String FieldDoc::toString() { StringStream buffer; buffer << ScoreDoc::toString() << L"["; for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if (field != fields.begin()) { buffer << L", "; } buffer << *field; } buffer << L"]"; return buffer.str(); } } LucenePlusPlus-rel_3.0.9/src/core/search/FieldDocSortedHitQueue.cpp000066400000000000000000000045461456444476200253350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldDocSortedHitQueue.h" #include "FieldDoc.h" #include "SortField.h" #include "Collator.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { FieldDocSortedHitQueue::FieldDocSortedHitQueue(int32_t size) : PriorityQueue(size) { } FieldDocSortedHitQueue::~FieldDocSortedHitQueue() { } void FieldDocSortedHitQueue::setFields(Collection fields) { this->fields = fields; this->collators = hasCollators(fields); } Collection FieldDocSortedHitQueue::getFields() { return fields; } Collection FieldDocSortedHitQueue::hasCollators(Collection fields) { if (!fields) { return Collection(); } Collection ret(Collection::newInstance(fields.size())); for (int32_t i = 0; i < fields.size(); ++i) { localePtr locale(fields[i]->getLocale()); if (locale) { ret[i] = newInstance(*locale); } } return ret; } bool FieldDocSortedHitQueue::lessThan(const FieldDocPtr& first, const FieldDocPtr& second) { int32_t n = fields.size(); int32_t c = 0; for (int32_t i = 0; i < n && c == 0; ++i) { int32_t type = fields[i]->getType(); if (type == SortField::STRING) { String s1(VariantUtils::get(first->fields[i])); String s2(VariantUtils::get(second->fields[i])); if (!fields[i]->getLocale()) { c = s1.compare(s2); } else { c = collators[i]->compare(s1, s2); } } else { c = VariantUtils::compareTo(first->fields[i], second->fields[i]); if (type == SortField::SCORE) { c = -c; } } // reverse sort if (fields[i]->getReverse()) { c = -c; } } // avoid random sort order that could lead to duplicates if (c == 0) { return (first->doc > second->doc); } return (c > 0); } } LucenePlusPlus-rel_3.0.9/src/core/search/FieldValueHitQueue.cpp000066400000000000000000000117321456444476200245160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldValueHitQueue.h" #include "_FieldValueHitQueue.h" #include "FieldComparator.h" #include "FieldDoc.h" #include "SortField.h" namespace Lucene { FieldValueHitQueue::FieldValueHitQueue(Collection fields, int32_t size) : HitQueueBase(size) { // When we get here, fields.size() is guaranteed to be > 0, therefore no need to check it again. // All these are required by this class's API - need to return arrays. Therefore even in the case // of a single comparator, create an array anyway. this->fields = fields; int32_t numComparators = fields.size(); comparators = Collection::newInstance(numComparators); reverseMul = Collection::newInstance(numComparators); } FieldValueHitQueue::~FieldValueHitQueue() { } FieldValueHitQueuePtr FieldValueHitQueue::create(Collection fields, int32_t size) { if (fields.empty()) { boost::throw_exception(IllegalArgumentException(L"Sort must contain at least one field")); } if (fields.size() == 1) { return newLucene(fields, size); } else { return newLucene(fields, size); } } Collection FieldValueHitQueue::getComparators() { return comparators; } Collection FieldValueHitQueue::getReverseMul() { return reverseMul; } FieldDocPtr FieldValueHitQueue::fillFields(const FieldValueHitQueueEntryPtr& entry) { int32_t n = comparators.size(); Collection fields(Collection::newInstance(n)); for (int32_t i = 0; i < n; ++i) { fields[i] = comparators[i]->value(entry->slot); } return newLucene(entry->doc, entry->score, fields); } Collection FieldValueHitQueue::getFields() { return fields; } FieldValueHitQueueEntry::FieldValueHitQueueEntry(int32_t slot, int32_t doc, double score) : ScoreDoc(doc, score) { this->slot = slot; } FieldValueHitQueueEntry::~FieldValueHitQueueEntry() { } String FieldValueHitQueueEntry::toString() { StringStream buffer; buffer << L"slot:" << slot << L" " << ScoreDoc::toString(); return buffer.str(); } OneComparatorFieldValueHitQueue::OneComparatorFieldValueHitQueue(Collection fields, int32_t size) : FieldValueHitQueue(fields, size) { if (fields.empty()) { boost::throw_exception(IllegalArgumentException(L"Sort must contain at least one field")); } SortFieldPtr field(fields[0]); comparator = field->getComparator(size, 0); oneReverseMul = field->reverse ? -1 : 1; comparators[0] = comparator; reverseMul[0] = oneReverseMul; } OneComparatorFieldValueHitQueue::~OneComparatorFieldValueHitQueue() { } bool OneComparatorFieldValueHitQueue::lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) { FieldValueHitQueueEntryPtr firstEntry(boost::static_pointer_cast(first)); FieldValueHitQueueEntryPtr secondEntry(boost::static_pointer_cast(second)); BOOST_ASSERT(firstEntry != secondEntry); BOOST_ASSERT(firstEntry->slot != secondEntry->slot); int32_t c = oneReverseMul * comparator->compare(firstEntry->slot, secondEntry->slot); // avoid random sort order that could lead to duplicates return c != 0 ? (c > 0) : (firstEntry->doc > secondEntry->doc); } MultiComparatorsFieldValueHitQueue::MultiComparatorsFieldValueHitQueue(Collection fields, int32_t size) : FieldValueHitQueue(fields, size) { int32_t numComparators = comparators.size(); for (int32_t i = 0; i < numComparators; ++i) { SortFieldPtr field(fields[i]); reverseMul[i] = field->reverse ? -1 : 1; comparators[i] = field->getComparator(size, i); } } MultiComparatorsFieldValueHitQueue::~MultiComparatorsFieldValueHitQueue() { } bool MultiComparatorsFieldValueHitQueue::lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) { FieldValueHitQueueEntryPtr firstEntry(boost::static_pointer_cast(first)); FieldValueHitQueueEntryPtr secondEntry(boost::static_pointer_cast(second)); BOOST_ASSERT(firstEntry != secondEntry); BOOST_ASSERT(firstEntry->slot != secondEntry->slot); int32_t numComparators = comparators.size(); for (int32_t i = 0; i < numComparators; ++i) { int32_t c = reverseMul[i] * comparators[i]->compare(firstEntry->slot, secondEntry->slot); if (c != 0) { return (c > 0); // Short circuit } } // avoid random sort order that could lead to duplicates return (firstEntry->doc > secondEntry->doc); } } LucenePlusPlus-rel_3.0.9/src/core/search/Filter.cpp000066400000000000000000000006541456444476200222520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Filter.h" namespace Lucene { Filter::~Filter() { } } LucenePlusPlus-rel_3.0.9/src/core/search/FilterManager.cpp000066400000000000000000000066551456444476200235540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FilterManager.h" #include "_FilterManager.h" #include "Filter.h" #include "MiscUtils.h" namespace Lucene { /// The default maximum number of Filters in the cache const int32_t FilterManager::DEFAULT_CACHE_CLEAN_SIZE = 100; /// The default frequency of cache cleanup const int64_t FilterManager::DEFAULT_CACHE_SLEEP_TIME = 1000 * 60 * 10; FilterManager::FilterManager() { } FilterManager::~FilterManager() { } void FilterManager::initialize() { cache = MapIntFilterItem::newInstance(); cacheCleanSize = DEFAULT_CACHE_CLEAN_SIZE; // Let the cache get to 100 items cleanSleepTime = DEFAULT_CACHE_SLEEP_TIME; // 10 minutes between cleanings filterCleaner = newLucene(shared_from_this()); filterCleaner->start(); } FilterManagerPtr FilterManager::getInstance() { static FilterManagerPtr manager; LUCENE_RUN_ONCE( manager = newLucene(); CycleCheck::addStatic(manager); ); return manager; } void FilterManager::setCacheSize(int32_t cacheCleanSize) { this->cacheCleanSize = cacheCleanSize; } void FilterManager::setCleanThreadSleepTime(int64_t cleanSleepTime) { this->cleanSleepTime = cleanSleepTime; } FilterPtr FilterManager::getFilter(const FilterPtr& filter) { SyncLock parentLock(&cache); FilterItemPtr fi(cache.get(filter->hashCode())); if (fi) { fi->timestamp = MiscUtils::currentTimeMillis(); return fi->filter; } cache.put(filter->hashCode(), newLucene(filter)); return filter; } FilterItem::FilterItem(const FilterPtr& filter) { this->filter = filter; this->timestamp = MiscUtils::currentTimeMillis(); } FilterItem::~FilterItem() { } FilterCleaner::FilterCleaner(const FilterManagerPtr& manager) { _manager = manager; running = true; } FilterCleaner::~FilterCleaner() { } void FilterCleaner::run() { while (running) { FilterManagerPtr manager(_manager); // sort items from oldest to newest we delete the oldest filters if (manager->cache.size() > manager->cacheCleanSize) { // empty the temporary set sortedFilterItems.clear(); { SyncLock parentLock(&manager->cache); for (MapIntFilterItem::iterator item = manager->cache.begin(); item != manager->cache.end(); ++item) { sortedFilterItems.put(item->second->timestamp, item->first); } int32_t numToDelete = (int32_t)((double)(sortedFilterItems.size() - manager->cacheCleanSize) * 1.5); int32_t counter = 0; // loop over the set and delete all of the cache entries not used in a while for (MapLongInt::iterator item = sortedFilterItems.begin(); item != sortedFilterItems.end() && counter++ < numToDelete; ++item) { manager->cache.remove(item->second); } } // empty the set so we don't tie up the memory sortedFilterItems.clear(); } // take a nap LuceneThread::threadSleep(manager->cleanSleepTime); } } } LucenePlusPlus-rel_3.0.9/src/core/search/FilteredDocIdSet.cpp000066400000000000000000000022641456444476200241410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FilteredDocIdSet.h" #include "_FilteredDocIdSet.h" namespace Lucene { FilteredDocIdSet::FilteredDocIdSet(const DocIdSetPtr& innerSet) { this->innerSet = innerSet; } FilteredDocIdSet::~FilteredDocIdSet() { } bool FilteredDocIdSet::isCacheable() { return innerSet->isCacheable(); } DocIdSetIteratorPtr FilteredDocIdSet::iterator() { return newLucene(shared_from_this(), innerSet->iterator()); } DefaultFilteredDocIdSetIterator::DefaultFilteredDocIdSetIterator(const FilteredDocIdSetPtr& filtered, const DocIdSetIteratorPtr& innerIter) : FilteredDocIdSetIterator(innerIter) { this->filtered = filtered; } DefaultFilteredDocIdSetIterator::~DefaultFilteredDocIdSetIterator() { } bool DefaultFilteredDocIdSetIterator::match(int32_t docid) { return filtered->match(docid); } } LucenePlusPlus-rel_3.0.9/src/core/search/FilteredDocIdSetIterator.cpp000066400000000000000000000025761456444476200256610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FilteredDocIdSetIterator.h" namespace Lucene { FilteredDocIdSetIterator::FilteredDocIdSetIterator(const DocIdSetIteratorPtr& innerIter) { if (!innerIter) { boost::throw_exception(IllegalArgumentException(L"null iterator")); } this->innerIter = innerIter; this->doc = -1; } FilteredDocIdSetIterator::~FilteredDocIdSetIterator() { } int32_t FilteredDocIdSetIterator::docID() { return doc; } int32_t FilteredDocIdSetIterator::nextDoc() { while ((doc = innerIter->nextDoc()) != NO_MORE_DOCS) { if (match(doc)) { return doc; } } return doc; } int32_t FilteredDocIdSetIterator::advance(int32_t target) { doc = innerIter->advance(target); if (doc != NO_MORE_DOCS) { if (match(doc)) { return doc; } else { while ((doc = innerIter->nextDoc()) != NO_MORE_DOCS) { if (match(doc)) { return doc; } } return doc; } } return doc; } } LucenePlusPlus-rel_3.0.9/src/core/search/FilteredQuery.cpp000066400000000000000000000142321456444476200236060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FilteredQuery.h" #include "_FilteredQuery.h" #include "Explanation.h" #include "Filter.h" #include "DocIdSet.h" #include "MiscUtils.h" namespace Lucene { FilteredQuery::FilteredQuery(const QueryPtr& query, const FilterPtr& filter) { this->query = query; this->filter = filter; } FilteredQuery::~FilteredQuery() { } WeightPtr FilteredQuery::createWeight(const SearcherPtr& searcher) { WeightPtr weight(query->createWeight(searcher)); SimilarityPtr similarity(query->getSimilarity(searcher)); return newLucene(shared_from_this(), weight, similarity); } QueryPtr FilteredQuery::rewrite(const IndexReaderPtr& reader) { QueryPtr rewritten(query->rewrite(reader)); if (rewritten != query) { FilteredQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone())); cloneQuery->query = rewritten; return cloneQuery; } else { return shared_from_this(); } } QueryPtr FilteredQuery::getQuery() { return query; } FilterPtr FilteredQuery::getFilter() { return filter; } void FilteredQuery::extractTerms(SetTerm terms) { getQuery()->extractTerms(terms); } String FilteredQuery::toString(const String& field) { StringStream buffer; buffer << L"filtered(" << query->toString(field) << L")->" << filter->toString() << boostString(); return buffer.str(); } bool FilteredQuery::equals(const LuceneObjectPtr& other) { FilteredQueryPtr otherFilteredQuery(boost::dynamic_pointer_cast(other)); if (!otherFilteredQuery) { return false; } return (Query::equals(other) && query->equals(otherFilteredQuery->query) && filter->equals(otherFilteredQuery->filter)); } int32_t FilteredQuery::hashCode() { return query->hashCode() ^ filter->hashCode() + MiscUtils::doubleToIntBits(getBoost()); } LuceneObjectPtr FilteredQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(query, filter); FilteredQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); cloneQuery->query = query; cloneQuery->filter = filter; return cloneQuery; } FilteredQueryWeight::FilteredQueryWeight(const FilteredQueryPtr& query, const WeightPtr& weight, const SimilarityPtr& similarity) { this->query = query; this->weight = weight; this->similarity = similarity; value = 0.0; } FilteredQueryWeight::~FilteredQueryWeight() { } double FilteredQueryWeight::getValue() { return value; } double FilteredQueryWeight::sumOfSquaredWeights() { return weight->sumOfSquaredWeights() * query->getBoost() * query->getBoost(); } void FilteredQueryWeight::normalize(double norm) { weight->normalize(norm); value = weight->getValue() * query->getBoost(); } ExplanationPtr FilteredQueryWeight::explain(const IndexReaderPtr& reader, int32_t doc) { ExplanationPtr inner(weight->explain(reader, doc)); if (query->getBoost() !=1) { ExplanationPtr preBoost(inner); inner = newLucene(inner->getValue() * query->getBoost(), L"product of:"); inner->addDetail(newLucene(query->getBoost(), L"boost")); inner->addDetail(preBoost); } FilterPtr f(query->filter); DocIdSetPtr docIdSet(f->getDocIdSet(reader)); DocIdSetIteratorPtr docIdSetIterator(!docIdSet ? DocIdSet::EMPTY_DOCIDSET()->iterator() : docIdSet->iterator()); if (!docIdSetIterator) { docIdSetIterator = DocIdSet::EMPTY_DOCIDSET()->iterator(); } if (docIdSetIterator->advance(doc) == doc) { return inner; } else { ExplanationPtr result(newLucene(0.0, L"failure to match filter: " + f->toString())); result->addDetail(inner); return result; } } QueryPtr FilteredQueryWeight::getQuery() { return query; } ScorerPtr FilteredQueryWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { ScorerPtr scorer(weight->scorer(reader, true, false)); if (!scorer) { return ScorerPtr(); } DocIdSetPtr docIdSet(query->filter->getDocIdSet(reader)); if (!docIdSet) { return ScorerPtr(); } DocIdSetIteratorPtr docIdSetIterator(docIdSet->iterator()); if (!docIdSetIterator) { return ScorerPtr(); } return newLucene(shared_from_this(), scorer, docIdSetIterator, similarity); } FilteredQueryWeightScorer::FilteredQueryWeightScorer(const FilteredQueryWeightPtr& weight, const ScorerPtr& scorer, const DocIdSetIteratorPtr& docIdSetIterator, const SimilarityPtr& similarity) : Scorer(similarity) { this->weight = weight; this->scorer = scorer; this->docIdSetIterator = docIdSetIterator; doc = -1; } FilteredQueryWeightScorer::~FilteredQueryWeightScorer() { } int32_t FilteredQueryWeightScorer::advanceToCommon(int32_t scorerDoc, int32_t disiDoc) { while (scorerDoc != disiDoc) { if (scorerDoc < disiDoc) { scorerDoc = scorer->advance(disiDoc); } else { disiDoc = docIdSetIterator->advance(scorerDoc); } } return scorerDoc; } int32_t FilteredQueryWeightScorer::nextDoc() { int32_t disiDoc = docIdSetIterator->nextDoc(); int32_t scorerDoc = scorer->nextDoc(); doc = (scorerDoc != NO_MORE_DOCS && advanceToCommon(scorerDoc, disiDoc) != NO_MORE_DOCS) ? scorer->docID() : NO_MORE_DOCS; return doc; } int32_t FilteredQueryWeightScorer::docID() { return doc; } int32_t FilteredQueryWeightScorer::advance(int32_t target) { int32_t disiDoc = docIdSetIterator->advance(target); int32_t scorerDoc = scorer->advance(target); doc = (scorerDoc != NO_MORE_DOCS && advanceToCommon(scorerDoc, disiDoc) != NO_MORE_DOCS) ? scorer->docID() : NO_MORE_DOCS; return doc; } double FilteredQueryWeightScorer::score() { return weight->query->getBoost() * scorer->score(); } } LucenePlusPlus-rel_3.0.9/src/core/search/FilteredTermEnum.cpp000066400000000000000000000031501456444476200242320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FilteredTermEnum.h" namespace Lucene { FilteredTermEnum::~FilteredTermEnum() { } void FilteredTermEnum::setEnum(const TermEnumPtr& actualEnum) { this->actualEnum = actualEnum; // Find the first term that matches TermPtr term(actualEnum->term()); if (term && termCompare(term)) { currentTerm = term; } else { next(); } } int32_t FilteredTermEnum::docFreq() { if (!currentTerm) { return -1; } BOOST_ASSERT(actualEnum); return actualEnum->docFreq(); } bool FilteredTermEnum::next() { if (!actualEnum) { return false; // the actual enumerator is not initialized } currentTerm.reset(); while (!currentTerm) { if (endEnum()) { return false; } if (actualEnum->next()) { TermPtr term(actualEnum->term()); if (termCompare(term)) { currentTerm = term; return true; } } else { return false; } } currentTerm.reset(); return false; } TermPtr FilteredTermEnum::term() { return currentTerm; } void FilteredTermEnum::close() { if (actualEnum) { actualEnum->close(); } currentTerm.reset(); actualEnum.reset(); } } LucenePlusPlus-rel_3.0.9/src/core/search/FuzzyQuery.cpp000066400000000000000000000145121456444476200232000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FuzzyQuery.h" #include "_FuzzyQuery.h" #include "FuzzyTermEnum.h" #include "Term.h" #include "TermQuery.h" #include "BooleanQuery.h" #include "BooleanClause.h" #include "MiscUtils.h" namespace Lucene { const int32_t FuzzyQuery::defaultPrefixLength = 0; FuzzyQuery::FuzzyQuery(const TermPtr& term, double minimumSimilarity, int32_t prefixLength) { ConstructQuery(term, minimumSimilarity, prefixLength); } FuzzyQuery::FuzzyQuery(const TermPtr& term, double minimumSimilarity) { ConstructQuery(term, minimumSimilarity, defaultPrefixLength); } FuzzyQuery::FuzzyQuery(const TermPtr& term) { ConstructQuery(term, defaultMinSimilarity(), defaultPrefixLength); } FuzzyQuery::~FuzzyQuery() { } void FuzzyQuery::ConstructQuery(const TermPtr& term, double minimumSimilarity, int32_t prefixLength) { this->term = term; if (minimumSimilarity >= 1.0) { boost::throw_exception(IllegalArgumentException(L"minimumSimilarity >= 1")); } else if (minimumSimilarity < 0.0) { boost::throw_exception(IllegalArgumentException(L"minimumSimilarity < 0")); } if (prefixLength < 0) { boost::throw_exception(IllegalArgumentException(L"prefixLength < 0")); } this->termLongEnough = ((int32_t)term->text().length() > (int32_t)(1.0 / (1.0 - minimumSimilarity))); this->minimumSimilarity = minimumSimilarity; this->prefixLength = prefixLength; rewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE(); } double FuzzyQuery::defaultMinSimilarity() { const double _defaultMinSimilarity = 0.5; return _defaultMinSimilarity; } double FuzzyQuery::getMinSimilarity() { return minimumSimilarity; } int32_t FuzzyQuery::getPrefixLength() { return prefixLength; } FilteredTermEnumPtr FuzzyQuery::getEnum(const IndexReaderPtr& reader) { return newLucene(reader, getTerm(), minimumSimilarity, prefixLength); } TermPtr FuzzyQuery::getTerm() { return term; } void FuzzyQuery::setRewriteMethod(const RewriteMethodPtr& method) { boost::throw_exception(UnsupportedOperationException(L"FuzzyQuery cannot change rewrite method")); } QueryPtr FuzzyQuery::rewrite(const IndexReaderPtr& reader) { if (!termLongEnough) { // can only match if it's exact return newLucene(term); } int32_t maxSize = BooleanQuery::getMaxClauseCount(); ScoreTermQueuePtr stQueue(newLucene(maxSize + 1)); FilteredTermEnumPtr enumerator(getEnum(reader)); LuceneException finally; try { ScoreTermPtr st = newLucene(); do { TermPtr t(enumerator->term()); if (!t) { break; } double score = enumerator->difference(); // ignore uncompetitive hits if (stQueue->size() >= maxSize && score <= stQueue->top()->score) { continue; } // add new entry in PQ st->term = t; st->score = score; stQueue->add(st); // possibly drop entries from queue st = (stQueue->size() > maxSize) ? stQueue->pop() : newLucene(); } while (enumerator->next()); } catch (LuceneException& e) { finally = e; } enumerator->close(); finally.throwException(); BooleanQueryPtr query(newLucene(true)); int32_t size = stQueue->size(); for (int32_t i = 0; i < size; ++i) { ScoreTermPtr st(stQueue->pop()); TermQueryPtr tq(newLucene(st->term)); // found a match tq->setBoost(getBoost() * st->score); // set the boost query->add(tq, BooleanClause::SHOULD); // add to query } return query; } LuceneObjectPtr FuzzyQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(term)); FuzzyQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->minimumSimilarity = minimumSimilarity; cloneQuery->prefixLength = prefixLength; cloneQuery->termLongEnough = termLongEnough; cloneQuery->term = term; return cloneQuery; } String FuzzyQuery::toString(const String& field) { StringStream buffer; if (term->field() != field) { buffer << term->field() << L":"; } buffer << term->text() << L"~" << minimumSimilarity << boostString(); return buffer.str(); } int32_t FuzzyQuery::hashCode() { int32_t prime = 31; int32_t result = MultiTermQuery::hashCode(); result = prime * result + MiscUtils::doubleToIntBits(minimumSimilarity); result = prime * result + prefixLength; result = prime * result + (term ? term->hashCode() : 0); return result; } bool FuzzyQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } if (!MultiTermQuery::equals(other)) { return false; } if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } FuzzyQueryPtr otherFuzzyQuery(boost::dynamic_pointer_cast(other)); if (!otherFuzzyQuery) { return false; } if (MiscUtils::doubleToIntBits(minimumSimilarity) != MiscUtils::doubleToIntBits(otherFuzzyQuery->minimumSimilarity)) { return false; } if (prefixLength != otherFuzzyQuery->prefixLength) { return false; } if (!term) { if (otherFuzzyQuery->term) { return false; } } else if (!term->equals(otherFuzzyQuery->term)) { return false; } return true; } ScoreTerm::~ScoreTerm() { } int32_t ScoreTerm::compareTo(const ScoreTermPtr& other) { if (this->score == other->score) { return other->term->compareTo(this->term); } else { return this->score < other->score ? -1 : (this->score > other->score ? 1 : 0); } } ScoreTermQueue::ScoreTermQueue(int32_t size) : PriorityQueue(size) { } ScoreTermQueue::~ScoreTermQueue() { } bool ScoreTermQueue::lessThan(const ScoreTermPtr& first, const ScoreTermPtr& second) { return (first->compareTo(second) < 0); } } LucenePlusPlus-rel_3.0.9/src/core/search/FuzzyTermEnum.cpp000066400000000000000000000142401456444476200236250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "FuzzyTermEnum.h" #include "FuzzyQuery.h" #include "Term.h" #include "IndexReader.h" namespace Lucene { FuzzyTermEnum::FuzzyTermEnum(const IndexReaderPtr& reader, const TermPtr& term, double minSimilarity, int32_t prefixLength) { ConstructTermEnum(reader, term, minSimilarity, prefixLength); } FuzzyTermEnum::FuzzyTermEnum(const IndexReaderPtr& reader, const TermPtr& term, double minSimilarity) { ConstructTermEnum(reader, term, minSimilarity, FuzzyQuery::defaultPrefixLength); } FuzzyTermEnum::FuzzyTermEnum(const IndexReaderPtr& reader, const TermPtr& term) { ConstructTermEnum(reader, term, FuzzyQuery::defaultMinSimilarity(), FuzzyQuery::defaultPrefixLength); } FuzzyTermEnum::~FuzzyTermEnum() { } void FuzzyTermEnum::ConstructTermEnum(const IndexReaderPtr& reader, const TermPtr& term, double minSimilarity, int32_t prefixLength) { if (minSimilarity >= 1.0) { boost::throw_exception(IllegalArgumentException(L"minimumSimilarity cannot be greater than or equal to 1")); } else if (minSimilarity < 0.0) { boost::throw_exception(IllegalArgumentException(L"minimumSimilarity cannot be less than 0")); } if (prefixLength < 0) { boost::throw_exception(IllegalArgumentException(L"prefixLength cannot be less than 0")); } this->minimumSimilarity = minSimilarity; this->scale_factor = 1.0 / (1.0 - minimumSimilarity); this->searchTerm = term; this->field = searchTerm->field(); this->_endEnum = false; this->_similarity = 0.0; // The prefix could be longer than the word. // It's kind of silly though. It means we must match the entire word. int32_t fullSearchTermLength = searchTerm->text().length(); int32_t realPrefixLength = prefixLength > fullSearchTermLength ? fullSearchTermLength : prefixLength; this->text = searchTerm->text().substr(realPrefixLength); this->prefix = searchTerm->text().substr(0, realPrefixLength); this->p = Collection::newInstance(this->text.length() + 1); this->d = Collection::newInstance(this->text.length() + 1); setEnum(reader->terms(newLucene(searchTerm->field(), prefix))); } bool FuzzyTermEnum::termCompare(const TermPtr& term) { if (field == term->field() && boost::starts_with(term->text(), prefix)) { String target(term->text().substr(prefix.length())); this->_similarity = similarity(target); return (_similarity > minimumSimilarity); } _endEnum = true; return false; } double FuzzyTermEnum::difference() { return (_similarity - minimumSimilarity) * scale_factor; } bool FuzzyTermEnum::endEnum() { return _endEnum; } double FuzzyTermEnum::similarity(const String& target) { int32_t m = target.length(); int32_t n = text.length(); if (n == 0) { // We don't have anything to compare. That means if we just add the letters for m we get the new word return prefix.empty() ? 0.0 : 1.0 - ((double)m / (double)prefix.length()); } if (m == 0) { return prefix.empty() ? 0.0 : 1.0 - ((double)n / (double)prefix.length()); } int32_t maxDistance = calculateMaxDistance(m); if (maxDistance < std::abs(m - n)) { // Just adding the characters of m to n or vice-versa results in too many edits for example "pre" length // is 3 and "prefixes" length is 8. We can see that given this optimal circumstance, the edit distance // cannot be less than 5. which is 8-3 or more precisely std::abs(3 - 8). if our maximum edit distance // is 4, then we can discard this word without looking at it. return 0.0; } // init matrix d for (int32_t i = 0; i <= n; ++i) { p[i] = i; } // start computing edit distance for (int32_t j = 1; j <= m; ++j) { // iterates through target int32_t bestPossibleEditDistance = m; wchar_t t_j = target[j - 1]; // jth character of t d[0] = j; for (int32_t i = 1; i <= n; ++i) { // iterates through text // minimum of cell to the left+1, to the top+1, diagonally left and up +(0|1) if (t_j != text[i - 1]) { d[i] = std::min(std::min(d[i - 1], p[i]), p[i - 1]) + 1; } else { d[i] = std::min(std::min(d[i - 1] + 1, p[i] + 1), p[i - 1]); } bestPossibleEditDistance = std::min(bestPossibleEditDistance, d[i]); } // After calculating row i, the best possible edit distance can be found by found by finding the smallest // value in a given column. If the bestPossibleEditDistance is greater than the max distance, abort. if (j > maxDistance && bestPossibleEditDistance > maxDistance) { // equal is okay, but not greater // The closest the target can be to the text is just too far away. // This target is leaving the party early. return 0.0; } // copy current distance counts to 'previous row' distance counts: swap p and d std::swap(p, d); } // Our last action in the above loop was to switch d and p, so p now actually has the most recent cost counts // This will return less than 0.0 when the edit distance is greater than the number of characters in the shorter // word. But this was the formula that was previously used in FuzzyTermEnum, so it has not been changed (even // though minimumSimilarity must be greater than 0.0) return 1.0 - ((double)p[n] / (double)(prefix.length() + std::min(n, m))); } int32_t FuzzyTermEnum::calculateMaxDistance(int32_t m) { return (int32_t)((1.0 - minimumSimilarity) * (double)(std::min((int32_t)text.length(), m) + prefix.length())); } void FuzzyTermEnum::close() { p.reset(); d.reset(); searchTerm.reset(); FilteredTermEnum::close(); // call FilteredTermEnum::close() and let the garbage collector do its work. } } LucenePlusPlus-rel_3.0.9/src/core/search/HitQueue.cpp000066400000000000000000000022331456444476200225510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "HitQueue.h" #include "ScoreDoc.h" namespace Lucene { HitQueue::HitQueue(int32_t size, bool prePopulate) : HitQueueBase(size) { this->prePopulate = prePopulate; } HitQueue::~HitQueue() { } bool HitQueue::lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) { if (first->score == second->score) { return (first->doc > second->doc); } else { return (first->score < second->score); } } ScoreDocPtr HitQueue::getSentinelObject() { // Always set the doc Id to MAX_VALUE so that it won't be favored by lessThan. This generally should // not happen since if score is not NEG_INF, TopScoreDocCollector will always add the object to the queue. return !prePopulate ? ScoreDocPtr() : newLucene(INT_MAX, -std::numeric_limits::infinity()); } } LucenePlusPlus-rel_3.0.9/src/core/search/HitQueueBase.cpp000066400000000000000000000034131456444476200233450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "HitQueueBase.h" #include "ScoreDoc.h" namespace Lucene { HitQueueBase::HitQueueBase(int32_t size) { queueSize = size; } HitQueueBase::~HitQueueBase() { } void HitQueueBase::initialize() { queue = newLucene(shared_from_this(), queueSize); } ScoreDocPtr HitQueueBase::add(const ScoreDocPtr& scoreDoc) { return queue->add(scoreDoc); } ScoreDocPtr HitQueueBase::addOverflow(const ScoreDocPtr& scoreDoc) { return queue->addOverflow(scoreDoc); } ScoreDocPtr HitQueueBase::top() { return queue->top(); } ScoreDocPtr HitQueueBase::pop() { return queue->pop(); } ScoreDocPtr HitQueueBase::updateTop() { return queue->updateTop(); } int32_t HitQueueBase::size() { return queue->size(); } bool HitQueueBase::empty() { return queue->empty(); } void HitQueueBase::clear() { queue->clear(); } ScoreDocPtr HitQueueBase::getSentinelObject() { return ScoreDocPtr(); } PriorityQueueScoreDocs::PriorityQueueScoreDocs(const HitQueueBasePtr& hitQueue, int32_t size) : PriorityQueue(size) { _hitQueue = hitQueue; } PriorityQueueScoreDocs::~PriorityQueueScoreDocs() { } bool PriorityQueueScoreDocs::lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) { return HitQueueBasePtr(_hitQueue)->lessThan(first, second); } ScoreDocPtr PriorityQueueScoreDocs::getSentinelObject() { return HitQueueBasePtr(_hitQueue)->getSentinelObject(); } } LucenePlusPlus-rel_3.0.9/src/core/search/IndexSearcher.cpp000066400000000000000000000144631456444476200235540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "IndexSearcher.h" #include "IndexReader.h" #include "TopScoreDocCollector.h" #include "TopFieldDocs.h" #include "TopFieldCollector.h" #include "Weight.h" #include "DocIdSet.h" #include "Scorer.h" #include "Filter.h" #include "Query.h" #include "ReaderUtil.h" namespace Lucene { IndexSearcher::IndexSearcher(const DirectoryPtr& path, bool readOnly) { ConstructSearcher(IndexReader::open(path, readOnly), true); } IndexSearcher::IndexSearcher(const IndexReaderPtr& reader) { ConstructSearcher(reader, false); } IndexSearcher::IndexSearcher(const IndexReaderPtr& reader, Collection subReaders, Collection docStarts) { this->fieldSortDoTrackScores = false; this->fieldSortDoMaxScore = false; this->reader = reader; this->subReaders = subReaders; this->docStarts = docStarts; closeReader = false; } IndexSearcher::~IndexSearcher() { } void IndexSearcher::ConstructSearcher(const IndexReaderPtr& reader, bool closeReader) { this->fieldSortDoTrackScores = false; this->fieldSortDoMaxScore = false; this->reader = reader; this->closeReader = closeReader; Collection subReadersList(Collection::newInstance()); gatherSubReaders(subReadersList, reader); subReaders = subReadersList; docStarts = Collection::newInstance(subReaders.size()); int32_t maxDoc = 0; for (int32_t i = 0; i < subReaders.size(); ++i) { docStarts[i] = maxDoc; maxDoc += subReaders[i]->maxDoc(); } } void IndexSearcher::gatherSubReaders(Collection allSubReaders, const IndexReaderPtr& reader) { ReaderUtil::gatherSubReaders(allSubReaders, reader); } IndexReaderPtr IndexSearcher::getIndexReader() { return reader; } void IndexSearcher::close() { if (closeReader) { reader->close(); } } int32_t IndexSearcher::docFreq(const TermPtr& term) { return reader->docFreq(term); } DocumentPtr IndexSearcher::doc(int32_t n) { return reader->document(n); } DocumentPtr IndexSearcher::doc(int32_t n, const FieldSelectorPtr& fieldSelector) { return reader->document(n, fieldSelector); } int32_t IndexSearcher::maxDoc() { return reader->maxDoc(); } TopDocsPtr IndexSearcher::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n) { if (n <= 0) { boost::throw_exception(IllegalArgumentException(L"n must be > 0")); } TopScoreDocCollectorPtr collector(TopScoreDocCollector::create(std::min(n, reader->maxDoc()), !weight->scoresDocsOutOfOrder())); search(weight, filter, collector); return collector->topDocs(); } TopFieldDocsPtr IndexSearcher::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort) { return search(weight, filter, n, sort, true); } TopFieldDocsPtr IndexSearcher::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort, bool fillFields) { TopFieldCollectorPtr collector(TopFieldCollector::create(sort, std::min(n, reader->maxDoc()), fillFields, fieldSortDoTrackScores, fieldSortDoMaxScore, !weight->scoresDocsOutOfOrder())); search(weight, filter, collector); return boost::dynamic_pointer_cast(collector->topDocs()); } void IndexSearcher::search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& results) { if (!filter) { for (int32_t i = 0; i < subReaders.size(); ++i) { // search each subreader results->setNextReader(subReaders[i], docStarts[i]); ScorerPtr scorer(weight->scorer(subReaders[i], !results->acceptsDocsOutOfOrder(), true)); if (scorer) { scorer->score(results); } } } else { for (int32_t i = 0; i < subReaders.size(); ++i) { // search each subreader results->setNextReader(subReaders[i], docStarts[i]); searchWithFilter(subReaders[i], weight, filter, results); } } } void IndexSearcher::searchWithFilter(const IndexReaderPtr& reader, const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& collector) { BOOST_ASSERT(filter); ScorerPtr scorer(weight->scorer(reader, true, false)); if (!scorer) { return; } int32_t docID = scorer->docID(); BOOST_ASSERT(docID == -1 || docID == DocIdSetIterator::NO_MORE_DOCS); DocIdSetPtr filterDocIdSet(filter->getDocIdSet(reader)); if (!filterDocIdSet) { // this means the filter does not accept any documents. return; } DocIdSetIteratorPtr filterIter(filterDocIdSet->iterator()); if (!filterIter) { // this means the filter does not accept any documents. return; } int32_t filterDoc = filterIter->nextDoc(); int32_t scorerDoc = scorer->advance(filterDoc); collector->setScorer(scorer); while (true) { if (scorerDoc == filterDoc) { // Check if scorer has exhausted, only before collecting. if (scorerDoc == DocIdSetIterator::NO_MORE_DOCS) { break; } collector->collect(scorerDoc); filterDoc = filterIter->nextDoc(); scorerDoc = scorer->advance(filterDoc); } else if (scorerDoc > filterDoc) { filterDoc = filterIter->advance(scorerDoc); } else { scorerDoc = scorer->advance(filterDoc); } } } QueryPtr IndexSearcher::rewrite(const QueryPtr& original) { QueryPtr query(original); for (QueryPtr rewrittenQuery(query->rewrite(reader)); rewrittenQuery != query; rewrittenQuery = query->rewrite(reader)) { query = rewrittenQuery; } return query; } ExplanationPtr IndexSearcher::explain(const WeightPtr& weight, int32_t doc) { int32_t n = ReaderUtil::subIndex(doc, docStarts); int32_t deBasedDoc = doc - docStarts[n]; return weight->explain(subReaders[n], deBasedDoc); } void IndexSearcher::setDefaultFieldSortScoring(bool doTrackScores, bool doMaxScore) { fieldSortDoTrackScores = doTrackScores; fieldSortDoMaxScore = doMaxScore; } } LucenePlusPlus-rel_3.0.9/src/core/search/MatchAllDocsQuery.cpp000066400000000000000000000076531456444476200243570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MatchAllDocsQuery.h" #include "_MatchAllDocsQuery.h" #include "IndexReader.h" #include "Similarity.h" #include "TermDocs.h" #include "ComplexExplanation.h" #include "Searcher.h" #include "MiscUtils.h" namespace Lucene { MatchAllDocsQuery::MatchAllDocsQuery(const String& normsField) { this->normsField = normsField; } MatchAllDocsQuery::~MatchAllDocsQuery() { } WeightPtr MatchAllDocsQuery::createWeight(const SearcherPtr& searcher) { return newLucene(shared_from_this(), searcher); } void MatchAllDocsQuery::extractTerms(SetTerm terms) { } String MatchAllDocsQuery::toString(const String& field) { StringStream buffer; buffer << L"*:*" << boostString(); return buffer.str(); } bool MatchAllDocsQuery::equals(const LuceneObjectPtr& other) { return Query::equals(other); } int32_t MatchAllDocsQuery::hashCode() { return MiscUtils::doubleToIntBits(getBoost()) ^ 0x1aa71190; } LuceneObjectPtr MatchAllDocsQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(); MatchAllDocsQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); cloneQuery->normsField = normsField; return cloneQuery; } MatchAllDocsWeight::MatchAllDocsWeight(const MatchAllDocsQueryPtr& query, const SearcherPtr& searcher) { this->query = query; this->similarity = searcher->getSimilarity(); this->queryWeight = 0.0; this->queryNorm = 0.0; } MatchAllDocsWeight::~MatchAllDocsWeight() { } String MatchAllDocsWeight::toString() { StringStream buffer; buffer << L"weight(" << queryWeight << L", " << queryNorm << L")"; return buffer.str(); } QueryPtr MatchAllDocsWeight::getQuery() { return query; } double MatchAllDocsWeight::getValue() { return queryWeight; } double MatchAllDocsWeight::sumOfSquaredWeights() { queryWeight = getQuery()->getBoost(); return queryWeight * queryWeight; } void MatchAllDocsWeight::normalize(double norm) { this->queryNorm = norm; queryWeight *= this->queryNorm; } ScorerPtr MatchAllDocsWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { return newLucene(query, reader, similarity, shared_from_this(), !query->normsField.empty() ? reader->norms(query->normsField) : ByteArray()); } ExplanationPtr MatchAllDocsWeight::explain(const IndexReaderPtr& reader, int32_t doc) { // explain query weight ExplanationPtr queryExpl(newLucene(true, getValue(), L"MatchAllDocsQuery, product of:")); if (getQuery()->getBoost() != 1.0) { queryExpl->addDetail(newLucene(getQuery()->getBoost(), L"boost")); } queryExpl->addDetail(newLucene(queryNorm, L"queryNorm")); return queryExpl; } MatchAllScorer::MatchAllScorer(const MatchAllDocsQueryPtr& query, const IndexReaderPtr& reader, const SimilarityPtr& similarity, const WeightPtr& weight, ByteArray norms) : Scorer(similarity) { this->query = query; this->termDocs = reader->termDocs(TermPtr()); this->_score = weight->getValue(); this->norms = norms; this->doc = -1; } MatchAllScorer::~MatchAllScorer() { } int32_t MatchAllScorer::docID() { return doc; } int32_t MatchAllScorer::nextDoc() { doc = termDocs->next() ? termDocs->doc() : NO_MORE_DOCS; return doc; } double MatchAllScorer::score() { return norms ? _score * Similarity::decodeNorm(norms[docID()]) : _score; } int32_t MatchAllScorer::advance(int32_t target) { doc = termDocs->skipTo(target) ? termDocs->doc() : NO_MORE_DOCS; return doc; } } LucenePlusPlus-rel_3.0.9/src/core/search/MultiPhraseQuery.cpp000066400000000000000000000252041456444476200243060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultiPhraseQuery.h" #include "_MultiPhraseQuery.h" #include "Searcher.h" #include "Term.h" #include "TermQuery.h" #include "MultipleTermPositions.h" #include "ExactPhraseScorer.h" #include "SloppyPhraseScorer.h" #include "Similarity.h" #include "IndexReader.h" #include "ComplexExplanation.h" #include "BooleanQuery.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { MultiPhraseQuery::MultiPhraseQuery() { termArrays = Collection< Collection >::newInstance(); positions = Collection::newInstance(); slop = 0; } MultiPhraseQuery::~MultiPhraseQuery() { } void MultiPhraseQuery::setSlop(int32_t s) { slop = s; } int32_t MultiPhraseQuery::getSlop() { return slop; } void MultiPhraseQuery::add(const TermPtr& term) { add(newCollection(term)); } void MultiPhraseQuery::add(Collection terms) { int32_t position = 0; if (!positions.empty()) { position = positions[positions.size() - 1] + 1; } add(terms, position); } void MultiPhraseQuery::add(Collection terms, int32_t position) { if (termArrays.empty()) { field = terms[0]->field(); } for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) { if ((*term)->field() != field) { boost::throw_exception(IllegalArgumentException(L"All phrase terms must be in the same field (" + field + L"): " + (*term)->toString())); } } termArrays.add(terms); positions.add(position); } Collection< Collection > MultiPhraseQuery::getTermArrays() { return termArrays; } Collection MultiPhraseQuery::getPositions() { return positions; } void MultiPhraseQuery::extractTerms(SetTerm terms) { for (Collection< Collection >::iterator arr = termArrays.begin(); arr != termArrays.end(); ++arr) { for (Collection::iterator term = arr->begin(); term != arr->end(); ++term) { terms.add(*term); } } } QueryPtr MultiPhraseQuery::rewrite(const IndexReaderPtr& reader) { if (termArrays.size() == 1) { // optimize one-term case Collection terms(termArrays[0]); BooleanQueryPtr boq(newLucene(true)); for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) { boq->add(newLucene(*term), BooleanClause::SHOULD); } boq->setBoost(getBoost()); return boq; } else { return shared_from_this(); } } WeightPtr MultiPhraseQuery::createWeight(const SearcherPtr& searcher) { return newLucene(shared_from_this(), searcher); } String MultiPhraseQuery::toString(const String& field) { StringStream buffer; if (this->field != field) { buffer << this->field << L":"; } buffer << L"\""; for (Collection< Collection >::iterator arr = termArrays.begin(); arr != termArrays.end(); ++arr) { if (arr != termArrays.begin()) { buffer << L" "; } if (arr->size() > 1) { buffer << L"("; for (Collection::iterator term = arr->begin(); term != arr->end(); ++term) { if (term != arr->begin()) { buffer << L" "; } buffer << (*term)->text(); } buffer << L")"; } else if (!arr->empty()) { buffer << (*arr)[0]->text(); } } buffer << L"\""; if (slop != 0) { buffer << L"~" << slop; } buffer << boostString(); return buffer.str(); } bool MultiPhraseQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } MultiPhraseQueryPtr otherMultiPhraseQuery(boost::dynamic_pointer_cast(other)); if (!otherMultiPhraseQuery) { return false; } return (getBoost() == otherMultiPhraseQuery->getBoost() && slop == otherMultiPhraseQuery->slop && termArraysEquals(termArrays, otherMultiPhraseQuery->termArrays) && positions.equals(otherMultiPhraseQuery->positions)); } int32_t MultiPhraseQuery::hashCode() { return MiscUtils::doubleToIntBits(getBoost()) ^ slop ^ termArraysHashCode() ^ MiscUtils::hashCode(positions.begin(), positions.end(), MiscUtils::hashNumeric) ^ 0x4ac65113; } int32_t MultiPhraseQuery::termArraysHashCode() { int32_t hashCode = 1; for (Collection< Collection >::iterator arr = termArrays.begin(); arr != termArrays.end(); ++arr) { hashCode = 31 * hashCode + MiscUtils::hashCode(arr->begin(), arr->end(), MiscUtils::hashLucene); } return hashCode; } struct equalTermArrays { inline bool operator()(const Collection& first, const Collection& second) const { if (first.size() != second.size()) { return false; } return first.equals(second, luceneEquals()); } }; bool MultiPhraseQuery::termArraysEquals(Collection< Collection > first, Collection< Collection > second) { return first.equals(second, equalTermArrays()); } LuceneObjectPtr MultiPhraseQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(); MultiPhraseQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); cloneQuery->field = field; cloneQuery->termArrays = termArrays; cloneQuery->positions = positions; cloneQuery->slop = slop; return cloneQuery; } MultiPhraseWeight::MultiPhraseWeight(const MultiPhraseQueryPtr& query, const SearcherPtr& searcher) { this->query = query; this->similarity = query->getSimilarity(searcher); this->value = 0.0; this->idf = 0.0; this->queryNorm = 0.0; this->queryWeight = 0.0; // compute idf int32_t maxDoc = searcher->maxDoc(); for (Collection< Collection >::iterator arr = query->termArrays.begin(); arr != query->termArrays.end(); ++arr) { for (Collection::iterator term = arr->begin(); term != arr->end(); ++term) { idf += this->similarity->idf(searcher->docFreq(*term), maxDoc); } } } MultiPhraseWeight::~MultiPhraseWeight() { } QueryPtr MultiPhraseWeight::getQuery() { return query; } double MultiPhraseWeight::getValue() { return value; } double MultiPhraseWeight::sumOfSquaredWeights() { queryWeight = idf * getQuery()->getBoost(); // compute query weight return queryWeight * queryWeight; // square it } void MultiPhraseWeight::normalize(double norm) { queryNorm = norm; queryWeight *= queryNorm; // normalize query weight value = queryWeight * idf; // idf for document } ScorerPtr MultiPhraseWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { if (query->termArrays.empty()) { // optimize zero-term case return ScorerPtr(); } Collection tps(Collection::newInstance(query->termArrays.size())); for (int32_t i = 0; i < tps.size(); ++i) { Collection terms(query->termArrays[i]); TermPositionsPtr p; if (terms.size() > 1) { p = newLucene(reader, terms); } else { p = reader->termPositions(terms[0]); } if (!p) { return ScorerPtr(); } tps[i] = p; } if (query->slop == 0) { // optimize exact case return newLucene(shared_from_this(), tps, query->getPositions(), similarity, reader->norms(query->field)); } else { return newLucene(shared_from_this(), tps, query->getPositions(), similarity, query->slop, reader->norms(query->field)); } } ExplanationPtr MultiPhraseWeight::explain(const IndexReaderPtr& reader, int32_t doc) { ComplexExplanationPtr result(newLucene()); result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); ExplanationPtr idfExpl(newLucene(idf, L"idf(" + query->toString() + L")")); // explain query weight ExplanationPtr queryExpl(newLucene()); queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); ExplanationPtr boostExpl(newLucene(query->getBoost(), L"boost")); if (query->getBoost() != 1.0) { queryExpl->addDetail(boostExpl); } queryExpl->addDetail(idfExpl); ExplanationPtr queryNormExpl(newLucene(queryNorm, L"queryNorm")); queryExpl->addDetail(queryNormExpl); queryExpl->setValue(boostExpl->getValue() * idfExpl->getValue() * queryNormExpl->getValue()); result->addDetail(queryExpl); // explain field weight ComplexExplanationPtr fieldExpl(newLucene()); fieldExpl->setDescription(L"fieldWeight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); PhraseScorerPtr phraseScorer(boost::dynamic_pointer_cast(scorer(reader, true, false))); if (!phraseScorer) { return newLucene(0.0, L"no matching docs"); } ExplanationPtr tfExplanation(newLucene()); int32_t d = phraseScorer->advance(doc); double phraseFreq = d == doc ? phraseScorer->currentFreq() : 0.0; tfExplanation->setValue(similarity->tf(phraseFreq)); tfExplanation->setDescription(L"tf(phraseFreq=" + StringUtils::toString(phraseFreq) + L")"); fieldExpl->addDetail(tfExplanation); fieldExpl->addDetail(idfExpl); ExplanationPtr fieldNormExpl(newLucene()); ByteArray fieldNorms(reader->norms(query->field)); double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; fieldNormExpl->setValue(fieldNorm); fieldNormExpl->setDescription(L"fieldNorm(field=" + query->field + L", doc=" + StringUtils::toString(doc) + L")"); fieldExpl->addDetail(fieldNormExpl); fieldExpl->setMatch(tfExplanation->isMatch()); fieldExpl->setValue(tfExplanation->getValue() * idfExpl->getValue() * fieldNormExpl->getValue()); result->addDetail(fieldExpl); result->setMatch(fieldExpl->getMatch()); // combine them result->setValue(queryExpl->getValue() * fieldExpl->getValue()); if (queryExpl->getValue() == 1.0) { return fieldExpl; } return result; } } LucenePlusPlus-rel_3.0.9/src/core/search/MultiSearcher.cpp000066400000000000000000000274431456444476200236010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultiSearcher.h" #include "_MultiSearcher.h" #include "Term.h" #include "ReaderUtil.h" #include "HitQueue.h" #include "Query.h" #include "FieldDocSortedHitQueue.h" #include "TopDocs.h" #include "ScoreDoc.h" #include "SortField.h" #include "TopFieldDocs.h" #include "FieldDoc.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { MultiSearcher::MultiSearcher(Collection searchables) { this->searchables = searchables; this->_maxDoc = 0; this->starts = Collection::newInstance(searchables.size() + 1); // build starts array for (int32_t i = 0; i < searchables.size(); ++i) { starts[i] = _maxDoc; _maxDoc += searchables[i]->maxDoc(); // compute maxDocs } starts[searchables.size()] = _maxDoc; } MultiSearcher::~MultiSearcher() { } Collection MultiSearcher::getSearchables() { return searchables; } Collection MultiSearcher::getStarts() { return starts; } void MultiSearcher::close() { for (Collection::iterator searchable = searchables.begin(); searchable != searchables.end(); ++searchable) { (*searchable)->close(); } } int32_t MultiSearcher::docFreq(const TermPtr& term) { int32_t docFreq = 0; for (Collection::iterator searchable = searchables.begin(); searchable != searchables.end(); ++searchable) { docFreq += (*searchable)->docFreq(term); } return docFreq; } DocumentPtr MultiSearcher::doc(int32_t n) { int32_t i = subSearcher(n); // find searcher index return searchables[i]->doc(n - starts[i]); // dispatch to searcher } DocumentPtr MultiSearcher::doc(int32_t n, const FieldSelectorPtr& fieldSelector) { int32_t i = subSearcher(n); // find searcher index return searchables[i]->doc(n - starts[i], fieldSelector); // dispatch to searcher } int32_t MultiSearcher::subSearcher(int32_t n) { return ReaderUtil::subIndex(n, starts); } int32_t MultiSearcher::subDoc(int32_t n) { return n - starts[subSearcher(n)]; } int32_t MultiSearcher::maxDoc() { return _maxDoc; } TopDocsPtr MultiSearcher::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n) { HitQueuePtr hq(newLucene(n, false)); int32_t totalHits = 0; for (int32_t i = 0; i < searchables.size(); ++i) { // search each searcher TopDocsPtr docs(newLucene(SynchronizePtr(), searchables[i], weight, filter, n, hq, i, starts)->call()); totalHits += docs->totalHits; // update totalHits } Collection scoreDocs(Collection::newInstance(hq->size())); for (int32_t i = hq->size() - 1; i >= 0; --i) { // put docs in array scoreDocs[i] = hq->pop(); } double maxScore = totalHits == 0 ? -std::numeric_limits::infinity() : scoreDocs[0]->score; return newLucene(totalHits, scoreDocs, maxScore); } TopFieldDocsPtr MultiSearcher::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort) { FieldDocSortedHitQueuePtr hq(newLucene(n)); int32_t totalHits = 0; double maxScore = -std::numeric_limits::infinity(); for (int32_t i = 0; i < searchables.size(); ++i) { // search each searcher TopFieldDocsPtr docs(newLucene(SynchronizePtr(), searchables[i], weight, filter, n, hq, sort, i, starts)->call()); totalHits += docs->totalHits; // update totalHits maxScore = std::max(maxScore, docs->maxScore); } Collection scoreDocs(Collection::newInstance(hq->size())); for (int32_t i = hq->size() - 1; i >= 0; --i) { // put docs in array scoreDocs[i] = hq->pop(); } return newLucene(totalHits, scoreDocs, hq->getFields(), maxScore); } void MultiSearcher::search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& results) { for (int32_t i = 0; i < searchables.size(); ++i) { int32_t start = starts[i]; CollectorPtr hc = newLucene(results, start); searchables[i]->search(weight, filter, hc); } } QueryPtr MultiSearcher::rewrite(const QueryPtr& query) { Collection queries(Collection::newInstance(searchables.size())); for (int32_t i = 0; i < searchables.size(); ++i) { queries[i] = searchables[i]->rewrite(query); } return queries[0]->combine(queries); } ExplanationPtr MultiSearcher::explain(const WeightPtr& weight, int32_t doc) { int32_t i = subSearcher(doc); // find searcher index return searchables[i]->explain(weight, doc - starts[i]); // dispatch to searcher } WeightPtr MultiSearcher::createWeight(const QueryPtr& query) { // step 1 QueryPtr rewrittenQuery(rewrite(query)); // step 2 SetTerm terms(SetTerm::newInstance()); rewrittenQuery->extractTerms(terms); // step3 Collection allTermsArray(Collection::newInstance(terms.begin(), terms.end())); Collection aggregatedDfs(Collection::newInstance(terms.size())); for (Collection::iterator searchable = searchables.begin(); searchable != searchables.end(); ++searchable) { Collection dfs((*searchable)->docFreqs(allTermsArray)); for (int32_t j = 0; j < aggregatedDfs.size(); ++j) { aggregatedDfs[j] += dfs[j]; } } MapTermInt dfMap(MapTermInt::newInstance()); for (int32_t i = 0; i < allTermsArray.size(); ++i) { dfMap.put(allTermsArray[i], aggregatedDfs[i]); } // step4 int32_t numDocs = maxDoc(); CachedDfSourcePtr cacheSim(newLucene(dfMap, numDocs, getSimilarity())); return rewrittenQuery->weight(cacheSim); } CachedDfSource::CachedDfSource(MapTermInt dfMap, int32_t maxDoc, const SimilarityPtr& similarity) { this->dfMap = dfMap; this->_maxDoc = maxDoc; setSimilarity(similarity); } CachedDfSource::~CachedDfSource() { } int32_t CachedDfSource::docFreq(const TermPtr& term) { MapTermInt::iterator df = dfMap.find(term); if (df == dfMap.end()) { boost::throw_exception(IllegalArgumentException(L"df for term " + term->text() + L" not available")); } return df->second; } Collection CachedDfSource::docFreqs(Collection terms) { Collection result(Collection::newInstance(terms.size())); for (int32_t i = 0; i < terms.size(); ++i) { result[i] = docFreq(terms[i]); } return result; } int32_t CachedDfSource::maxDoc() { return _maxDoc; } QueryPtr CachedDfSource::rewrite(const QueryPtr& query) { // This is a bit of a hack. We know that a query which creates a Weight based on this Dummy-Searcher is // always already rewritten (see preparedWeight()). Therefore we just return the unmodified query here. return query; } void CachedDfSource::close() { boost::throw_exception(UnsupportedOperationException()); } DocumentPtr CachedDfSource::doc(int32_t n) { boost::throw_exception(UnsupportedOperationException()); return DocumentPtr(); } DocumentPtr CachedDfSource::doc(int32_t n, const FieldSelectorPtr& fieldSelector) { boost::throw_exception(UnsupportedOperationException()); return DocumentPtr(); } ExplanationPtr CachedDfSource::explain(const WeightPtr& weight, int32_t doc) { boost::throw_exception(UnsupportedOperationException()); return ExplanationPtr(); } void CachedDfSource::search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& results) { boost::throw_exception(UnsupportedOperationException()); } TopDocsPtr CachedDfSource::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n) { boost::throw_exception(UnsupportedOperationException()); return TopDocsPtr(); } TopFieldDocsPtr CachedDfSource::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort) { boost::throw_exception(UnsupportedOperationException()); return TopFieldDocsPtr(); } MultiSearcherCallableNoSort::MultiSearcherCallableNoSort(const SynchronizePtr& lock, const SearchablePtr& searchable, const WeightPtr& weight, const FilterPtr& filter, int32_t nDocs, const HitQueuePtr& hq, int32_t i, Collection starts) { this->lock = lock; this->searchable = searchable; this->weight = weight; this->filter = filter; this->nDocs = nDocs; this->hq = hq; this->i = i; this->starts = starts; } MultiSearcherCallableNoSort::~MultiSearcherCallableNoSort() { } TopDocsPtr MultiSearcherCallableNoSort::call() { TopDocsPtr docs(searchable->search(weight, filter, nDocs)); Collection scoreDocs(docs->scoreDocs); for (int32_t j = 0; j < scoreDocs.size(); ++j) { // merge scoreDocs into hq ScoreDocPtr scoreDoc(scoreDocs[j]); scoreDoc->doc += starts[i]; // convert doc SyncLock syncLock(lock); if (scoreDoc == hq->addOverflow(scoreDoc)) { break; } } return docs; } MultiSearcherCallableWithSort::MultiSearcherCallableWithSort(const SynchronizePtr& lock, const SearchablePtr& searchable, const WeightPtr& weight, const FilterPtr& filter, int32_t nDocs, const FieldDocSortedHitQueuePtr& hq, const SortPtr& sort, int32_t i, Collection starts) { this->lock = lock; this->searchable = searchable; this->weight = weight; this->filter = filter; this->nDocs = nDocs; this->hq = hq; this->i = i; this->starts = starts; this->sort = sort; } MultiSearcherCallableWithSort::~MultiSearcherCallableWithSort() { } TopFieldDocsPtr MultiSearcherCallableWithSort::call() { TopFieldDocsPtr docs(searchable->search(weight, filter, nDocs, sort)); // If one of the Sort fields is FIELD_DOC, need to fix its values, so that it will break ties by doc Id // properly. Otherwise, it will compare to 'relative' doc Ids, that belong to two different searchables. for (int32_t j = 0; j < docs->fields.size(); ++j) { if (docs->fields[j]->getType() == SortField::DOC) { // iterate over the score docs and change their fields value for (int32_t j2 = 0; j2 < docs->scoreDocs.size(); ++j2) { FieldDocPtr fd(boost::dynamic_pointer_cast(docs->scoreDocs[j2])); fd->fields[j] = VariantUtils::get(fd->fields[j]) + starts[i]; } break; } } { SyncLock syncLock(lock); hq->setFields(docs->fields); } Collection scoreDocs(docs->scoreDocs); for (int32_t j = 0; j < scoreDocs.size(); ++j) { // merge scoreDocs into hq FieldDocPtr fieldDoc(boost::dynamic_pointer_cast(scoreDocs[j])); fieldDoc->doc += starts[i]; // convert doc SyncLock syncLock(lock); if (fieldDoc == hq->addOverflow(fieldDoc)) { break; } } return docs; } MultiSearcherCollector::MultiSearcherCollector(const CollectorPtr& collector, int32_t start) { this->collector = collector; this->start = start; } MultiSearcherCollector::~MultiSearcherCollector() { } void MultiSearcherCollector::setScorer(const ScorerPtr& scorer) { collector->setScorer(scorer); } void MultiSearcherCollector::collect(int32_t doc) { collector->collect(doc); } void MultiSearcherCollector::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { collector->setNextReader(reader, start + docBase); } bool MultiSearcherCollector::acceptsDocsOutOfOrder() { return collector->acceptsDocsOutOfOrder(); } } LucenePlusPlus-rel_3.0.9/src/core/search/MultiTermQuery.cpp000066400000000000000000000244731456444476200240020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultiTermQuery.h" #include "_MultiTermQuery.h" #include "ConstantScoreQuery.h" #include "MultiTermQueryWrapperFilter.h" #include "QueryWrapperFilter.h" #include "BooleanQuery.h" #include "Term.h" #include "TermQuery.h" #include "TermDocs.h" #include "FilteredTermEnum.h" #include "IndexReader.h" #include "MiscUtils.h" namespace Lucene { MultiTermQuery::MultiTermQuery() { numberOfTerms = 0; rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT(); } MultiTermQuery::~MultiTermQuery() { } RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE() { static RewriteMethodPtr _CONSTANT_SCORE_FILTER_REWRITE; LUCENE_RUN_ONCE( _CONSTANT_SCORE_FILTER_REWRITE = newLucene(); CycleCheck::addStatic(_CONSTANT_SCORE_FILTER_REWRITE); ); return _CONSTANT_SCORE_FILTER_REWRITE; } RewriteMethodPtr MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE() { static RewriteMethodPtr _SCORING_BOOLEAN_QUERY_REWRITE; LUCENE_RUN_ONCE( _SCORING_BOOLEAN_QUERY_REWRITE = newLucene(); CycleCheck::addStatic(_SCORING_BOOLEAN_QUERY_REWRITE); ); return _SCORING_BOOLEAN_QUERY_REWRITE; } RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE() { static RewriteMethodPtr _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; LUCENE_RUN_ONCE( _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = newLucene(); CycleCheck::addStatic(_CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); ); return _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; } RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT() { static RewriteMethodPtr _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; LUCENE_RUN_ONCE( _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = newLucene(); CycleCheck::addStatic(_CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); ); return _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; } int32_t MultiTermQuery::getTotalNumberOfTerms() { return numberOfTerms; } void MultiTermQuery::clearTotalNumberOfTerms() { numberOfTerms = 0; } void MultiTermQuery::incTotalNumberOfTerms(int32_t inc) { numberOfTerms += inc; } QueryPtr MultiTermQuery::rewrite(const IndexReaderPtr& reader) { return rewriteMethod->rewrite(reader, shared_from_this()); } RewriteMethodPtr MultiTermQuery::getRewriteMethod() { return rewriteMethod; } void MultiTermQuery::setRewriteMethod(const RewriteMethodPtr& method) { rewriteMethod = method; } LuceneObjectPtr MultiTermQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = Query::clone(other); MultiTermQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->rewriteMethod = rewriteMethod; cloneQuery->numberOfTerms = numberOfTerms; return cloneQuery; } int32_t MultiTermQuery::hashCode() { int32_t prime = 31; int32_t result = 1; result = prime * result + MiscUtils::doubleToIntBits(getBoost()); result = prime * result; result += rewriteMethod->hashCode(); return result; } bool MultiTermQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } if (!other) { return false; } if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } MultiTermQueryPtr otherMultiTermQuery(boost::dynamic_pointer_cast(other)); if (!otherMultiTermQuery) { return false; } if (MiscUtils::doubleToIntBits(getBoost()) != MiscUtils::doubleToIntBits(otherMultiTermQuery->getBoost())) { return false; } if (!rewriteMethod->equals(otherMultiTermQuery->rewriteMethod)) { return false; } return true; } RewriteMethod::~RewriteMethod() { } ConstantScoreFilterRewrite::~ConstantScoreFilterRewrite() { } QueryPtr ConstantScoreFilterRewrite::rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) { QueryPtr result(newLucene(newLucene(query))); result->setBoost(query->getBoost()); return result; } ScoringBooleanQueryRewrite::~ScoringBooleanQueryRewrite() { } QueryPtr ScoringBooleanQueryRewrite::rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) { FilteredTermEnumPtr enumerator(query->getEnum(reader)); BooleanQueryPtr result(newLucene(true)); int32_t count = 0; LuceneException finally; try { do { TermPtr t(enumerator->term()); if (t) { TermQueryPtr tq(newLucene(t)); // found a match tq->setBoost(query->getBoost() * enumerator->difference()); // set the boost result->add(tq, BooleanClause::SHOULD); // add to query ++count; } } while (enumerator->next()); } catch (LuceneException& e) { finally = e; } enumerator->close(); finally.throwException(); query->incTotalNumberOfTerms(count); return result; } ConstantScoreBooleanQueryRewrite::~ConstantScoreBooleanQueryRewrite() { } QueryPtr ConstantScoreBooleanQueryRewrite::rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) { // strip the scores off QueryPtr result(newLucene(newLucene(ScoringBooleanQueryRewrite::rewrite(reader, query)))); result->setBoost(query->getBoost()); return result; } // Defaults derived from rough tests with a 20.0 million doc Wikipedia index. With more than 350 terms // in the query, the filter method is fastest const int32_t ConstantScoreAutoRewrite::DEFAULT_TERM_COUNT_CUTOFF = 350; // If the query will hit more than 1 in 1000 of the docs in the index (0.1%), the filter method is fastest const double ConstantScoreAutoRewrite::DEFAULT_DOC_COUNT_PERCENT = 0.1; ConstantScoreAutoRewrite::ConstantScoreAutoRewrite() { termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF; docCountPercent = DEFAULT_DOC_COUNT_PERCENT; } ConstantScoreAutoRewrite::~ConstantScoreAutoRewrite() { } void ConstantScoreAutoRewrite::setTermCountCutoff(int32_t count) { termCountCutoff = count; } int32_t ConstantScoreAutoRewrite::getTermCountCutoff() { return termCountCutoff; } void ConstantScoreAutoRewrite::setDocCountPercent(double percent) { docCountPercent = percent; } double ConstantScoreAutoRewrite::getDocCountPercent() { return docCountPercent; } QueryPtr ConstantScoreAutoRewrite::rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) { // Get the enum and start visiting terms. If we exhaust the enum before hitting either of the // cutoffs, we use ConstantBooleanQueryRewrite; else ConstantFilterRewrite Collection pendingTerms(Collection::newInstance()); int32_t docCountCutoff = (int32_t)((docCountPercent / 100.0) * (double)reader->maxDoc()); int32_t termCountLimit = std::min(BooleanQuery::getMaxClauseCount(), termCountCutoff); int32_t docVisitCount = 0; FilteredTermEnumPtr enumerator(query->getEnum(reader)); QueryPtr result; LuceneException finally; try { while (true) { TermPtr t(enumerator->term()); if (t) { pendingTerms.add(t); // Loading the TermInfo from the terms dict here should not be costly, because 1) the // query/filter will load the TermInfo when it runs, and 2) the terms dict has a cache docVisitCount += reader->docFreq(t); } if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) { // Too many terms -- make a filter. result = newLucene(newLucene(query)); result->setBoost(query->getBoost()); break; } else if (!enumerator->next()) { // Enumeration is done, and we hit a small enough number of terms and docs - // just make a BooleanQuery, now BooleanQueryPtr bq(newLucene(true)); for (Collection::iterator term = pendingTerms.begin(); term != pendingTerms.end(); ++ term) { TermQueryPtr tq(newLucene(*term)); bq->add(tq, BooleanClause::SHOULD); } // Strip scores result = newLucene(newLucene(bq)); result->setBoost(query->getBoost()); query->incTotalNumberOfTerms(pendingTerms.size()); break; } } } catch (LuceneException& e) { finally = e; } enumerator->close(); finally.throwException(); return result; } int32_t ConstantScoreAutoRewrite::hashCode() { int32_t prime = 1279; return (int32_t)(prime * termCountCutoff + MiscUtils::doubleToLongBits(docCountPercent)); } bool ConstantScoreAutoRewrite::equals(const LuceneObjectPtr& other) { if (RewriteMethod::equals(other)) { return true; } if (!other) { return false; } if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } ConstantScoreAutoRewritePtr otherConstantScoreAutoRewrite(boost::dynamic_pointer_cast(other)); if (!otherConstantScoreAutoRewrite) { return false; } if (termCountCutoff != otherConstantScoreAutoRewrite->termCountCutoff) { return false; } if (MiscUtils::doubleToLongBits(docCountPercent) != MiscUtils::doubleToLongBits(otherConstantScoreAutoRewrite->docCountPercent)) { return false; } return true; } ConstantScoreAutoRewriteDefault::~ConstantScoreAutoRewriteDefault() { } void ConstantScoreAutoRewriteDefault::setTermCountCutoff(int32_t count) { boost::throw_exception(UnsupportedOperationException(L"Please create a private instance")); } void ConstantScoreAutoRewriteDefault::setDocCountPercent(double percent) { boost::throw_exception(UnsupportedOperationException(L"Please create a private instance")); } } LucenePlusPlus-rel_3.0.9/src/core/search/MultiTermQueryWrapperFilter.cpp000066400000000000000000000065751456444476200265140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MultiTermQueryWrapperFilter.h" #include "MultiTermQuery.h" #include "IndexReader.h" #include "TermEnum.h" #include "TermDocs.h" #include "Term.h" #include "FilteredTermEnum.h" #include "DocIdSet.h" #include "OpenBitSet.h" #include "MiscUtils.h" namespace Lucene { MultiTermQueryWrapperFilter::MultiTermQueryWrapperFilter(const MultiTermQueryPtr& query) { this->query = query; } MultiTermQueryWrapperFilter::~MultiTermQueryWrapperFilter() { } String MultiTermQueryWrapperFilter::toString() { // query->toString should be ok for the filter, too, if the query boost is 1.0 return query->toString(); } bool MultiTermQueryWrapperFilter::equals(const LuceneObjectPtr& other) { if (Filter::equals(other)) { return true; } if (!other) { return false; } if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } MultiTermQueryWrapperFilterPtr otherMultiTermQueryWrapperFilter(boost::dynamic_pointer_cast(other)); if (otherMultiTermQueryWrapperFilter) { return query->equals(otherMultiTermQueryWrapperFilter->query); } return false; } int32_t MultiTermQueryWrapperFilter::hashCode() { return query->hashCode(); } int32_t MultiTermQueryWrapperFilter::getTotalNumberOfTerms() { return query->getTotalNumberOfTerms(); } void MultiTermQueryWrapperFilter::clearTotalNumberOfTerms() { query->clearTotalNumberOfTerms(); } DocIdSetPtr MultiTermQueryWrapperFilter::getDocIdSet(const IndexReaderPtr& reader) { TermEnumPtr enumerator(query->getEnum(reader)); OpenBitSetPtr bitSet; LuceneException finally; try { // if current term in enum is null, the enum is empty -> shortcut if (!enumerator->term()) { return DocIdSet::EMPTY_DOCIDSET(); } // else fill into a OpenBitSet bitSet = newLucene(reader->maxDoc()); Collection docs(Collection::newInstance(32)); Collection freqs(Collection::newInstance(32)); TermDocsPtr termDocs(reader->termDocs()); try { int32_t termCount = 0; do { TermPtr term(enumerator->term()); if (!term) { break; } ++termCount; termDocs->seek(term); while (true) { int32_t count = termDocs->read(docs, freqs); if (count != 0) { for (int32_t i = 0; i < count; ++i) { bitSet->set(docs[i]); } } else { break; } } } while (enumerator->next()); query->incTotalNumberOfTerms(termCount); } catch (LuceneException& e) { finally = e; } termDocs->close(); } catch (LuceneException& e) { finally = e; } enumerator->close(); finally.throwException(); return bitSet; } } LucenePlusPlus-rel_3.0.9/src/core/search/NumericRangeFilter.cpp000066400000000000000000000062321456444476200245500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NumericRangeFilter.h" #include "NumericRangeQuery.h" namespace Lucene { NumericRangeFilter::NumericRangeFilter(const NumericRangeQueryPtr& query) : MultiTermQueryWrapperFilter(query) { } NumericRangeFilter::~NumericRangeFilter() { } NumericRangeFilterPtr NumericRangeFilter::newLongRange(const String& field, int32_t precisionStep, int64_t min, int64_t max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); } NumericRangeFilterPtr NumericRangeFilter::newLongRange(const String& field, int64_t min, int64_t max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, min, max, minInclusive, maxInclusive); } NumericRangeFilterPtr NumericRangeFilter::newIntRange(const String& field, int32_t precisionStep, int32_t min, int32_t max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); } NumericRangeFilterPtr NumericRangeFilter::newIntRange(const String& field, int32_t min, int32_t max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, min, max, minInclusive, maxInclusive); } NumericRangeFilterPtr NumericRangeFilter::newDoubleRange(const String& field, int32_t precisionStep, double min, double max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); } NumericRangeFilterPtr NumericRangeFilter::newDoubleRange(const String& field, double min, double max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, min, max, minInclusive, maxInclusive); } NumericRangeFilterPtr NumericRangeFilter::newNumericRange(const String& field, int32_t precisionStep, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) { return newLucene(NumericRangeQuery::newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive)); } NumericRangeFilterPtr NumericRangeFilter::newNumericRange(const String& field, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) { return newLucene(NumericRangeQuery::newNumericRange(field, min, max, minInclusive, maxInclusive)); } String NumericRangeFilter::getField() { return boost::static_pointer_cast(query)->field; } bool NumericRangeFilter::includesMin() { return boost::static_pointer_cast(query)->minInclusive; } bool NumericRangeFilter::includesMax() { return boost::static_pointer_cast(query)->maxInclusive; } NumericValue NumericRangeFilter::getMin() { return boost::static_pointer_cast(query)->min; } NumericValue NumericRangeFilter::getMax() { return boost::static_pointer_cast(query)->min; } } LucenePlusPlus-rel_3.0.9/src/core/search/NumericRangeQuery.cpp000066400000000000000000000304111456444476200244240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NumericRangeQuery.h" #include "_NumericRangeQuery.h" #include "Term.h" #include "IndexReader.h" #include "MiscUtils.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { NumericRangeQuery::NumericRangeQuery(const String& field, int32_t precisionStep, int32_t valSize, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) { BOOST_ASSERT(valSize == 32 || valSize == 64); if (precisionStep < 1) { boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); } this->field = field; this->precisionStep = precisionStep; this->valSize = valSize; this->min = min; this->max = max; this->minInclusive = minInclusive; this->maxInclusive = maxInclusive; // For bigger precisionSteps this query likely hits too many terms, so set to CONSTANT_SCORE_FILTER // right off (especially as the FilteredTermEnum is costly if wasted only for AUTO tests because it // creates new enums from IndexReader for each sub-range) switch (valSize) { case 64: setRewriteMethod(precisionStep > 6 ? CONSTANT_SCORE_FILTER_REWRITE() : CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()); break; case 32: setRewriteMethod(precisionStep > 8 ? CONSTANT_SCORE_FILTER_REWRITE() : CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()); break; default: // should never happen boost::throw_exception(IllegalArgumentException(L"valSize must be 32 or 64")); } // shortcut if upper bound == lower bound if (!VariantUtils::isNull(min) && min == max) { setRewriteMethod(CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE()); } } NumericRangeQuery::~NumericRangeQuery() { } NumericRangeQueryPtr NumericRangeQuery::newLongRange(const String& field, int32_t precisionStep, int64_t min, int64_t max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); } NumericRangeQueryPtr NumericRangeQuery::newLongRange(const String& field, int64_t min, int64_t max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, min, max, minInclusive, maxInclusive); } NumericRangeQueryPtr NumericRangeQuery::newIntRange(const String& field, int32_t precisionStep, int32_t min, int32_t max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); } NumericRangeQueryPtr NumericRangeQuery::newIntRange(const String& field, int32_t min, int32_t max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, min, max, minInclusive, maxInclusive); } NumericRangeQueryPtr NumericRangeQuery::newDoubleRange(const String& field, int32_t precisionStep, double min, double max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); } NumericRangeQueryPtr NumericRangeQuery::newDoubleRange(const String& field, double min, double max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, min, max, minInclusive, maxInclusive); } NumericRangeQueryPtr NumericRangeQuery::newNumericRange(const String& field, int32_t precisionStep, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) { if (!VariantUtils::equalsType(min, max)) { boost::throw_exception(IllegalArgumentException(L"min/max must be of the same type")); } int32_t valSize = VariantUtils::typeOf(min) ? 32 : 64; return newLucene(field, precisionStep, valSize, min, max, minInclusive, maxInclusive); } NumericRangeQueryPtr NumericRangeQuery::newNumericRange(const String& field, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) { return newNumericRange(field, NumericUtils::PRECISION_STEP_DEFAULT, min, max, minInclusive, maxInclusive); } FilteredTermEnumPtr NumericRangeQuery::getEnum(const IndexReaderPtr& reader) { return newLucene(shared_from_this(), reader); } String NumericRangeQuery::getField() { return field; } bool NumericRangeQuery::includesMin() { return minInclusive; } bool NumericRangeQuery::includesMax() { return maxInclusive; } NumericValue NumericRangeQuery::getMin() { return min; } NumericValue NumericRangeQuery::getMax() { return min; } LuceneObjectPtr NumericRangeQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(field, precisionStep, valSize, min, max, minInclusive, maxInclusive)); NumericRangeQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->field = field; cloneQuery->precisionStep = precisionStep; cloneQuery->valSize = valSize; cloneQuery->min = min; cloneQuery->max = max; cloneQuery->minInclusive = minInclusive; cloneQuery->maxInclusive = maxInclusive; return cloneQuery; } String NumericRangeQuery::toString(const String& field) { StringStream buffer; if (this->field != field) { buffer << this->field << L":"; } buffer << (minInclusive ? L"[" : L"{"); if (VariantUtils::isNull(min)) { buffer << L"*"; } else { buffer << min; } buffer << L" TO "; if (VariantUtils::isNull(max)) { buffer << L"*"; } else { buffer << max; } buffer << (maxInclusive ? L"]" : L"}"); buffer << boostString(); return buffer.str(); } bool NumericRangeQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } if (!MultiTermQuery::equals(other)) { return false; } NumericRangeQueryPtr otherNumericRangeQuery(boost::dynamic_pointer_cast(other)); if (!otherNumericRangeQuery) { return false; } return (field == otherNumericRangeQuery->field && min == otherNumericRangeQuery->min && max == otherNumericRangeQuery->max && minInclusive == otherNumericRangeQuery->minInclusive && maxInclusive == otherNumericRangeQuery->maxInclusive && precisionStep == otherNumericRangeQuery->precisionStep); } int32_t NumericRangeQuery::hashCode() { int32_t hash = MultiTermQuery::hashCode(); hash += StringUtils::hashCode(field) ^ 0x4565fd66 + precisionStep ^ 0x64365465; if (!VariantUtils::isNull(min)) { hash += VariantUtils::hashCode(min) ^ 0x14fa55fb; } if (!VariantUtils::isNull(max)) { hash += VariantUtils::hashCode(max) ^ 0x733fa5fe; } return hash + (MiscUtils::hashCode(minInclusive) ^ 0x14fa55fb) + (MiscUtils::hashCode(maxInclusive) ^ 0x733fa5fe); } NumericRangeTermEnum::NumericRangeTermEnum(const NumericRangeQueryPtr& query, const IndexReaderPtr& reader) { this->_query = query; this->reader = reader; this->rangeBounds = Collection::newInstance(); this->termTemplate = newLucene(query->field); switch (query->valSize) { case 64: { // lower int64_t minBound = std::numeric_limits::min(); if (VariantUtils::typeOf(query->min)) { minBound = VariantUtils::get(query->min); } else if (VariantUtils::typeOf(query->min)) { minBound = NumericUtils::doubleToSortableLong(VariantUtils::get(query->min)); } if (!query->minInclusive && !VariantUtils::isNull(query->min)) { if (minBound == std::numeric_limits::max()) { break; } ++minBound; } // upper int64_t maxBound = std::numeric_limits::max(); if (VariantUtils::typeOf(query->max)) { maxBound = VariantUtils::get(query->max); } else if (VariantUtils::typeOf(query->max)) { maxBound = NumericUtils::doubleToSortableLong(VariantUtils::get(query->max)); } if (!query->maxInclusive && !VariantUtils::isNull(query->max)) { if (maxBound == std::numeric_limits::min()) { break; } --maxBound; } NumericUtils::splitLongRange(newLucene(rangeBounds), query->precisionStep, minBound, maxBound); break; } case 32: { // lower int32_t minBound = INT_MIN; if (VariantUtils::typeOf(query->min)) { minBound = VariantUtils::get(query->min); } if (!query->minInclusive && !VariantUtils::isNull(query->min)) { if (minBound == INT_MAX) { break; } ++minBound; } // upper int32_t maxBound = INT_MAX; if (VariantUtils::typeOf(query->max)) { maxBound = VariantUtils::get(query->max); } if (!query->maxInclusive && !VariantUtils::isNull(query->max)) { if (maxBound == INT_MIN) { break; } --maxBound; } NumericUtils::splitIntRange(newLucene(rangeBounds), query->precisionStep, minBound, maxBound); break; } default: // should never happen boost::throw_exception(IllegalArgumentException(L"valSize must be 32 or 64")); } // seek to first term next(); } NumericRangeTermEnum::~NumericRangeTermEnum() { } double NumericRangeTermEnum::difference() { return 1.0; } bool NumericRangeTermEnum::endEnum() { boost::throw_exception(UnsupportedOperationException(L"not implemented")); return false; } void NumericRangeTermEnum::setEnum(const TermEnumPtr& actualEnum) { boost::throw_exception(UnsupportedOperationException(L"not implemented")); } bool NumericRangeTermEnum::termCompare(const TermPtr& term) { return (term->field() == NumericRangeQueryPtr(_query)->field && term->text().compare(currentUpperBound) <= 0); } bool NumericRangeTermEnum::next() { // if a current term exists, the actual enum is initialized: try change to next term, if no // such term exists, fall-through if (currentTerm) { BOOST_ASSERT(actualEnum); if (actualEnum->next()) { currentTerm = actualEnum->term(); if (termCompare(currentTerm)) { return true; } } } // if all above fails, we go forward to the next enum, if one is available currentTerm.reset(); while (rangeBounds.size() >= 2) { BOOST_ASSERT(rangeBounds.size() % 2 == 0); // close the current enum and read next bounds if (actualEnum) { actualEnum->close(); actualEnum.reset(); } String lowerBound(rangeBounds.removeFirst()); currentUpperBound = rangeBounds.removeFirst(); // create a new enum actualEnum = reader->terms(termTemplate->createTerm(lowerBound)); currentTerm = actualEnum->term(); if (currentTerm && termCompare(currentTerm)) { return true; } // clear the current term for next iteration currentTerm.reset(); } // no more sub-range enums available BOOST_ASSERT(rangeBounds.empty() && !currentTerm); return false; } void NumericRangeTermEnum::close() { rangeBounds.clear(); currentUpperBound.clear(); FilteredTermEnum::close(); } NumericLongRangeBuilder::NumericLongRangeBuilder(Collection rangeBounds) { this->rangeBounds = rangeBounds; } NumericLongRangeBuilder::~NumericLongRangeBuilder() { } void NumericLongRangeBuilder::addRange(const String& minPrefixCoded, const String& maxPrefixCoded) { rangeBounds.add(minPrefixCoded); rangeBounds.add(maxPrefixCoded); } NumericIntRangeBuilder::NumericIntRangeBuilder(Collection rangeBounds) { this->rangeBounds = rangeBounds; } NumericIntRangeBuilder::~NumericIntRangeBuilder() { } void NumericIntRangeBuilder::addRange(const String& minPrefixCoded, const String& maxPrefixCoded) { rangeBounds.add(minPrefixCoded); rangeBounds.add(maxPrefixCoded); } } LucenePlusPlus-rel_3.0.9/src/core/search/ParallelMultiSearcher.cpp000066400000000000000000000106611456444476200252500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include #include "ParallelMultiSearcher.h" #include "_MultiSearcher.h" #include "HitQueue.h" #include "FieldDocSortedHitQueue.h" #include "FieldDoc.h" #include "TopFieldDocs.h" #include "ThreadPool.h" namespace Lucene { ParallelMultiSearcher::ParallelMultiSearcher(Collection searchables) : MultiSearcher(searchables) { } ParallelMultiSearcher::~ParallelMultiSearcher() { } int32_t ParallelMultiSearcher::docFreq(const TermPtr& term) { ThreadPoolPtr threadPool(ThreadPool::getInstance()); Collection searchThreads(Collection::newInstance(searchables.size())); for (int32_t i = 0; i < searchables.size(); ++i) { searchThreads[i] = threadPool->scheduleTask(boost::protect(boost::bind(boost::mem_fn(&Searchable::docFreq), searchables[i], term))); } int32_t docFreq = 0; for (int32_t i = 0; i < searchThreads.size(); ++i) { docFreq += searchThreads[i]->get(); } return docFreq; } TopDocsPtr ParallelMultiSearcher::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n) { HitQueuePtr hq(newLucene(n, false)); SynchronizePtr lock(newInstance()); ThreadPoolPtr threadPool(ThreadPool::getInstance()); Collection searchThreads(Collection::newInstance(searchables.size())); Collection multiSearcher(Collection::newInstance(searchables.size())); for (int32_t i = 0; i < searchables.size(); ++i) { // search each searchable multiSearcher[i] = newLucene(lock, searchables[i], weight, filter, n, hq, i, starts); searchThreads[i] = threadPool->scheduleTask(boost::protect(boost::bind(boost::mem_fn(&MultiSearcherCallableNoSort::call), multiSearcher[i]))); } int32_t totalHits = 0; double maxScore = -std::numeric_limits::infinity(); for (int32_t i = 0; i < searchThreads.size(); ++i) { TopDocsPtr topDocs(searchThreads[i]->get()); totalHits += topDocs->totalHits; maxScore = std::max(maxScore, topDocs->maxScore); } Collection scoreDocs(Collection::newInstance(hq->size())); for (int32_t i = hq->size() - 1; i >= 0; --i) { // put docs in array scoreDocs[i] = hq->pop(); } return newLucene(totalHits, scoreDocs, maxScore); } TopFieldDocsPtr ParallelMultiSearcher::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort) { if (!sort) { boost::throw_exception(NullPointerException(L"sort must not be null")); } FieldDocSortedHitQueuePtr hq(newLucene(n)); SynchronizePtr lock(newInstance()); ThreadPoolPtr threadPool(ThreadPool::getInstance()); Collection searchThreads(Collection::newInstance(searchables.size())); Collection multiSearcher(Collection::newInstance(searchables.size())); for (int32_t i = 0; i < searchables.size(); ++i) { // search each searchable multiSearcher[i] = newLucene(lock, searchables[i], weight, filter, n, hq, sort, i, starts); searchThreads[i] = threadPool->scheduleTask(boost::protect(boost::bind(boost::mem_fn(&MultiSearcherCallableWithSort::call), multiSearcher[i]))); } int32_t totalHits = 0; double maxScore = -std::numeric_limits::infinity(); for (int32_t i = 0; i < searchThreads.size(); ++i) { TopFieldDocsPtr topDocs(searchThreads[i]->get()); totalHits += topDocs->totalHits; maxScore = std::max(maxScore, topDocs->maxScore); } Collection scoreDocs(Collection::newInstance(hq->size())); for (int32_t i = hq->size() - 1; i >= 0; --i) { // put docs in array scoreDocs[i] = hq->pop(); } return newLucene(totalHits, scoreDocs, hq->getFields(), maxScore); } } LucenePlusPlus-rel_3.0.9/src/core/search/PhrasePositions.cpp000066400000000000000000000026151456444476200241560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PhrasePositions.h" #include "TermPositions.h" namespace Lucene { PhrasePositions::PhrasePositions(const TermPositionsPtr& t, int32_t o) { doc = 0; position = 0; count = 0; repeats = false; tp = t; offset = o; } PhrasePositions::~PhrasePositions() { } bool PhrasePositions::next() { if (!tp->next()) { tp->close(); // close stream doc = INT_MAX; // sentinel value return false; } doc = tp->doc(); position = 0; return true; } bool PhrasePositions::skipTo(int32_t target) { if (!tp->skipTo(target)) { tp->close(); // close stream doc = INT_MAX; // sentinel value return false; } doc = tp->doc(); position = 0; return true; } void PhrasePositions::firstPosition() { count = tp->freq(); // read first pos nextPosition(); } bool PhrasePositions::nextPosition() { if (count-- > 0) { // read subsequent pos's position = tp->nextPosition() - offset; return true; } else { return false; } } } LucenePlusPlus-rel_3.0.9/src/core/search/PhraseQuery.cpp000066400000000000000000000214731456444476200232770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PhraseQuery.h" #include "_PhraseQuery.h" #include "Similarity.h" #include "Term.h" #include "TermPositions.h" #include "TermQuery.h" #include "IndexReader.h" #include "ExactPhraseScorer.h" #include "SloppyPhraseScorer.h" #include "Explanation.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { PhraseQuery::PhraseQuery() { terms = Collection::newInstance(); positions = Collection::newInstance(); maxPosition = 0; slop = 0; } PhraseQuery::~PhraseQuery() { } void PhraseQuery::setSlop(int32_t slop) { this->slop = slop; } int32_t PhraseQuery::getSlop() { return slop; } void PhraseQuery::add(const TermPtr& term) { int32_t position = 0; if (!positions.empty()) { position = positions[positions.size() - 1] + 1; } add(term, position); } void PhraseQuery::add(const TermPtr& term, int32_t position) { if (terms.empty()) { field = term->field(); } else if (term->field() != field) { boost::throw_exception(IllegalArgumentException(L"All phrase terms must be in the same field: " + term->toString())); } terms.add(term); positions.add(position); if (position > maxPosition) { maxPosition = position; } } Collection PhraseQuery::getTerms() { return terms; } Collection PhraseQuery::getPositions() { return positions; } WeightPtr PhraseQuery::createWeight(const SearcherPtr& searcher) { if (terms.size() == 1) { // optimize one-term case QueryPtr termQuery(newLucene(terms[0])); termQuery->setBoost(getBoost()); return termQuery->createWeight(searcher); } return newLucene(shared_from_this(), searcher); } void PhraseQuery::extractTerms(SetTerm terms) { terms.addAll(this->terms.begin(), this->terms.end()); } String PhraseQuery::toString(const String& field) { StringStream buffer; if (this->field != field) { buffer << this->field << L":"; } buffer << L"\""; Collection pieces(Collection::newInstance(maxPosition + 1)); for (int32_t i = 0; i < terms.size(); ++i) { int32_t pos = positions[i]; String s(pieces[pos]); if (!s.empty()) { s += L"|"; } s += terms[i]->text(); pieces[pos] = s; } for (int32_t i = 0; i < pieces.size(); ++i) { if (i > 0) { buffer << L" "; } String s(pieces[i]); buffer << (s.empty() ? L"?" : s); } buffer << L"\""; if (slop != 0) { buffer << L"~" << slop; } buffer << boostString(); return buffer.str(); } bool PhraseQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } PhraseQueryPtr otherPhraseQuery(boost::dynamic_pointer_cast(other)); if (!otherPhraseQuery) { return false; } return (getBoost() == otherPhraseQuery->getBoost() && slop == otherPhraseQuery->slop && terms.equals(otherPhraseQuery->terms, luceneEquals()) && positions.equals(otherPhraseQuery->positions)); } int32_t PhraseQuery::hashCode() { return MiscUtils::doubleToIntBits(getBoost()) ^ slop ^ MiscUtils::hashCode(terms.begin(), terms.end(), MiscUtils::hashLucene) ^ MiscUtils::hashCode(positions.begin(), positions.end(), MiscUtils::hashNumeric); } LuceneObjectPtr PhraseQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(); PhraseQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); cloneQuery->field = field; cloneQuery->terms = terms; cloneQuery->positions = positions; cloneQuery->maxPosition = maxPosition; cloneQuery->slop = slop; return cloneQuery; } PhraseWeight::PhraseWeight(const PhraseQueryPtr& query, const SearcherPtr& searcher) { this->query = query; this->similarity = query->getSimilarity(searcher); this->value = 0.0; this->idf = 0.0; this->queryNorm = 0.0; this->queryWeight = 0.0; this->idfExp = similarity->idfExplain(query->terms, searcher); idf = idfExp->getIdf(); } PhraseWeight::~PhraseWeight() { } String PhraseWeight::toString() { return L"weight(" + query->toString() + L")"; } QueryPtr PhraseWeight::getQuery() { return query; } double PhraseWeight::getValue() { return value; } double PhraseWeight::sumOfSquaredWeights() { queryWeight = idf * getQuery()->getBoost(); // compute query weight return queryWeight * queryWeight; // square it } void PhraseWeight::normalize(double norm) { queryNorm = norm; queryWeight *= queryNorm; // normalize query weight value = queryWeight * idf; // idf for document } ScorerPtr PhraseWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { if (query->terms.empty()) { // optimize zero-term case return ScorerPtr(); } Collection tps(Collection::newInstance(query->terms.size())); for (int32_t i = 0; i < tps.size(); ++i) { TermPositionsPtr p(reader->termPositions(query->terms[i])); if (!p) { return ScorerPtr(); } tps[i] = p; } if (query->slop == 0) { // optimize exact case return newLucene(shared_from_this(), tps, query->getPositions(), similarity, reader->norms(query->field)); } else { return newLucene(shared_from_this(), tps, query->getPositions(), similarity, query->slop, reader->norms(query->field)); } } ExplanationPtr PhraseWeight::explain(const IndexReaderPtr& reader, int32_t doc) { ExplanationPtr result(newLucene()); result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); StringStream docFreqsBuffer; StringStream queryBuffer; queryBuffer << L"\""; docFreqsBuffer << idfExp->explain(); for (Collection::iterator term = query->terms.begin(); term != query->terms.end(); ++term) { if (term != query->terms.begin()) { queryBuffer << L" "; } queryBuffer << (*term)->text(); } queryBuffer << L"\""; ExplanationPtr idfExpl(newLucene(idf, L"idf(" + query->field + L":" + docFreqsBuffer.str() + L")")); // explain query weight ExplanationPtr queryExpl(newLucene()); queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); ExplanationPtr boostExpl(newLucene(query->getBoost(), L"boost")); if (query->getBoost() != 1.0) { queryExpl->addDetail(boostExpl); } queryExpl->addDetail(idfExpl); ExplanationPtr queryNormExpl(newLucene(queryNorm, L"queryNorm")); queryExpl->addDetail(queryNormExpl); queryExpl->setValue(boostExpl->getValue() * idfExpl->getValue() * queryNormExpl->getValue()); result->addDetail(queryExpl); // explain field weight ExplanationPtr fieldExpl(newLucene()); fieldExpl->setDescription(L"fieldWeight(" + query->field + L":" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); PhraseScorerPtr phraseScorer(boost::dynamic_pointer_cast(scorer(reader, true, false))); if (!phraseScorer) { return newLucene(0.0, L"no matching docs"); } ExplanationPtr tfExplanation(newLucene()); int32_t d = phraseScorer->advance(doc); double phraseFreq = d == doc ? phraseScorer->currentFreq() : 0.0; tfExplanation->setValue(similarity->tf(phraseFreq)); tfExplanation->setDescription(L"tf(phraseFreq=" + StringUtils::toString(phraseFreq) + L")"); fieldExpl->addDetail(tfExplanation); fieldExpl->addDetail(idfExpl); ExplanationPtr fieldNormExpl(newLucene()); ByteArray fieldNorms(reader->norms(query->field)); double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; fieldNormExpl->setValue(fieldNorm); fieldNormExpl->setDescription(L"fieldNorm(field=" + query->field + L", doc=" + StringUtils::toString(doc) + L")"); fieldExpl->addDetail(fieldNormExpl); fieldExpl->setValue(tfExplanation->getValue() * idfExpl->getValue() * fieldNormExpl->getValue()); result->addDetail(fieldExpl); // combine them result->setValue(queryExpl->getValue() * fieldExpl->getValue()); if (queryExpl->getValue() == 1.0) { return fieldExpl; } return result; } } LucenePlusPlus-rel_3.0.9/src/core/search/PhraseQueue.cpp000066400000000000000000000022541456444476200232520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PhraseQueue.h" #include "PhrasePositions.h" namespace Lucene { PhraseQueue::PhraseQueue(int32_t size) : PriorityQueue(size) { } PhraseQueue::~PhraseQueue() { } inline bool PhraseQueue::lessThan(const PhrasePositionsStar& first, const PhrasePositionsStar& second) { if (first && second) { if (first->doc == second->doc) { if (first->position == second->position) { // same doc and pp.position, so decide by actual term positions. // rely on: pp.position == tp.position - offset. return first->offset < second->offset; } else { return first->position < second->position; } } else { return first->doc < second->doc; } } return first ? false : true; } } LucenePlusPlus-rel_3.0.9/src/core/search/PhraseScorer.cpp000066400000000000000000000100331456444476200234150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PhraseScorer.h" #include "PhrasePositions.h" #include "PhraseQueue.h" #include "Weight.h" #include "Similarity.h" namespace Lucene { PhraseScorer::PhraseScorer(const WeightPtr& weight, Collection tps, Collection offsets, const SimilarityPtr& similarity, ByteArray norms) : Scorer(similarity) { this->firstTime = true; this->more = true; this->freq = 0.0; this->norms = norms; this->weight = weight; this->value = weight->getValue(); // convert tps to a list of phrase positions. // Note: phrase-position differs from term-position in that its position reflects the phrase offset: pp.pos = tp.pos - offset. // This allows to easily identify a matching (exact) phrase when all PhrasePositions have exactly the same position. for (int32_t i = 0; i < tps.size(); ++i) { PhrasePositionsPtr pp(newLucene(tps[i], offsets[i])); auto* __pp = pp.get(); if (__last) { // add next to end of list __last->__next = __pp; } else { __first = __pp; } __last = __pp; _holds.emplace_back(pp); } pq = newLucene(tps.size()); // construct empty pq __first->doc = -1; } PhraseScorer::~PhraseScorer() { } int32_t PhraseScorer::docID() { return __first->doc; } int32_t PhraseScorer::nextDoc() { if (firstTime) { init(); firstTime = false; } else if (more) { more = __last->next(); // trigger further scanning } if (!doNext()) { __first->doc = NO_MORE_DOCS; } return __first->doc; } bool PhraseScorer::doNext() { while (more) { while (more && __first->doc < __last->doc) { // find doc with all the terms more = __first->skipTo(__last->doc); // skip first upto last and move it to the end firstToLast(); } if (more) { // found a doc with all of the terms freq = phraseFreq(); // check for phrase if (freq == 0.0) { // no match more = __last->next(); // trigger further scanning } else { return true; } } } return false; // no more matches } double PhraseScorer::score() { double raw = getSimilarity()->tf(freq) * value; // raw score return !norms ? raw : raw * Similarity::decodeNorm(norms[__first->doc]); // normalize } int32_t PhraseScorer::advance(int32_t target) { firstTime = false; for (auto* __pp = __first; more && __pp; __pp = __pp->__next) { more = __pp->skipTo(target); } if (more) { sort(); // re-sort } if (!doNext()) { __first->doc = NO_MORE_DOCS; } return __first->doc; } double PhraseScorer::currentFreq() { return freq; } void PhraseScorer::init() { for (auto* __pp = __first; more && __pp; __pp = __pp->__next) { more = __pp->next(); } if (more) { sort(); } } void PhraseScorer::sort() { pq->clear(); for (auto* __pp = __first; more && __pp; __pp = __pp->__next) { pq->add(__pp); } pqToList(); } void PhraseScorer::pqToList() { __last = nullptr; __first = nullptr; while (pq->top()) { auto* __pp = pq->pop(); if (__last) { // add next to end of list __last->__next = __pp; } else { __first = __pp; } __last = __pp; __pp->__next = nullptr; } } void PhraseScorer::firstToLast() { __last->__next = __first; // move first to end of list __last = __first; __first = __first->__next; __last->__next = nullptr; } String PhraseScorer::toString() { return L"scorer(" + weight->toString() + L")"; } } LucenePlusPlus-rel_3.0.9/src/core/search/PositiveScoresOnlyCollector.cpp000066400000000000000000000024461456444476200265200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PositiveScoresOnlyCollector.h" #include "ScoreCachingWrappingScorer.h" namespace Lucene { PositiveScoresOnlyCollector::PositiveScoresOnlyCollector(const CollectorPtr& collector) { this->collector = collector; } PositiveScoresOnlyCollector::~PositiveScoresOnlyCollector() { } void PositiveScoresOnlyCollector::collect(int32_t doc) { if (scorer->score() > 0) { collector->collect(doc); } } void PositiveScoresOnlyCollector::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { collector->setNextReader(reader, docBase); } void PositiveScoresOnlyCollector::setScorer(const ScorerPtr& scorer) { // Set a ScoreCachingWrappingScorer in case the wrapped Collector will call score() also. this->scorer = newLucene(scorer); collector->setScorer(this->scorer); } bool PositiveScoresOnlyCollector::acceptsDocsOutOfOrder() { return collector->acceptsDocsOutOfOrder(); } } LucenePlusPlus-rel_3.0.9/src/core/search/PrefixFilter.cpp000066400000000000000000000015471456444476200234320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PrefixFilter.h" #include "PrefixQuery.h" #include "Term.h" namespace Lucene { PrefixFilter::PrefixFilter(const TermPtr& prefix) : MultiTermQueryWrapperFilter(newLucene(prefix)) { } PrefixFilter::~PrefixFilter() { } TermPtr PrefixFilter::getPrefix() { return boost::static_pointer_cast(query)->getPrefix(); } String PrefixFilter::toString() { StringStream buffer; buffer << L"PrefixFilter(" << getPrefix()->toString() << L")"; return buffer.str(); } } LucenePlusPlus-rel_3.0.9/src/core/search/PrefixQuery.cpp000066400000000000000000000041471456444476200233110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PrefixQuery.h" #include "PrefixTermEnum.h" #include "Term.h" #include "MiscUtils.h" namespace Lucene { PrefixQuery::PrefixQuery(const TermPtr& prefix) { this->prefix = prefix; } PrefixQuery::~PrefixQuery() { } TermPtr PrefixQuery::getPrefix() { return prefix; } FilteredTermEnumPtr PrefixQuery::getEnum(const IndexReaderPtr& reader) { return newLucene(reader, prefix); } String PrefixQuery::toString(const String& field) { StringStream buffer; if (prefix->field() != field) { buffer << prefix->field() << L":"; } buffer << prefix->text() << L"*" << boostString(); return buffer.str(); } LuceneObjectPtr PrefixQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(prefix)); PrefixQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->prefix = prefix; return cloneQuery; } int32_t PrefixQuery::hashCode() { int32_t prime = 31; int32_t result = MultiTermQuery::hashCode(); result = prime * result + (prefix ? prefix->hashCode() : 0); return result; } bool PrefixQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } if (!MultiTermQuery::equals(other)) { return false; } if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } PrefixQueryPtr otherPrefixQuery(boost::dynamic_pointer_cast(other)); if (!otherPrefixQuery) { return false; } if (!prefix) { if (otherPrefixQuery->prefix) { return false; } } else if (!prefix->equals(otherPrefixQuery->prefix)) { return false; } return true; } } LucenePlusPlus-rel_3.0.9/src/core/search/PrefixTermEnum.cpp000066400000000000000000000022031456444476200237270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "PrefixTermEnum.h" #include "IndexReader.h" #include "Term.h" namespace Lucene { PrefixTermEnum::PrefixTermEnum(const IndexReaderPtr& reader, const TermPtr& prefix) { this->_endEnum = false; this->prefix = prefix; setEnum(reader->terms(newLucene(prefix->field(), prefix->text()))); } PrefixTermEnum::~PrefixTermEnum() { } double PrefixTermEnum::difference() { return 1.0; } bool PrefixTermEnum::endEnum() { return _endEnum; } TermPtr PrefixTermEnum::getPrefixTerm() { return prefix; } bool PrefixTermEnum::termCompare(const TermPtr& term) { if (term->field() == prefix->field() && boost::starts_with(term->text(), prefix->text())) { return true; } _endEnum = true; return false; } } LucenePlusPlus-rel_3.0.9/src/core/search/Query.cpp000066400000000000000000000116541456444476200221340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Query.h" #include "BooleanQuery.h" #include "Searcher.h" #include "Similarity.h" #include "MiscUtils.h" namespace Lucene { Query::Query() { boost = 1.0; } Query::~Query() { } void Query::setBoost(double boost) { this->boost = boost; } double Query::getBoost() { return boost; } String Query::toString(const String& field) { return L""; // override } String Query::toString() { return toString(L""); } WeightPtr Query::createWeight(const SearcherPtr& searcher) { boost::throw_exception(UnsupportedOperationException()); return WeightPtr(); } WeightPtr Query::weight(const SearcherPtr& searcher) { QueryPtr query(searcher->rewrite(shared_from_this())); WeightPtr weight(query->createWeight(searcher)); double sum = weight->sumOfSquaredWeights(); double norm = getSimilarity(searcher)->queryNorm(sum); if (MiscUtils::isInfinite(norm) || MiscUtils::isNaN(norm)) { norm = 1.0; } weight->normalize(norm); return weight; } QueryPtr Query::rewrite(const IndexReaderPtr& reader) { return shared_from_this(); } QueryPtr Query::combine(Collection queries) { SetQuery uniques(SetQuery::newInstance()); for (Collection::iterator query = queries.begin(); query != queries.end(); ++query) { Collection clauses; BooleanQueryPtr bq(boost::dynamic_pointer_cast(*query)); // check if we can split the query into clauses bool splittable = bq.get() != NULL; if (splittable) { splittable = bq->isCoordDisabled(); clauses = bq->getClauses(); for (Collection::iterator clause = clauses.begin(); splittable && clause != clauses.end(); ++clause) { splittable = ((*clause)->getOccur() == BooleanClause::SHOULD); } } if (splittable) { for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { uniques.add((*clause)->getQuery()); } } else { uniques.add(*query); } } // optimization: if we have just one query, just return it if (uniques.size() == 1) { return *uniques.begin(); } BooleanQueryPtr result(newLucene(true)); for (SetQuery::iterator query = uniques.begin(); query != uniques.end(); ++query) { result->add(*query, BooleanClause::SHOULD); } return result; } void Query::extractTerms(SetTerm terms) { // needs to be implemented by query subclasses boost::throw_exception(UnsupportedOperationException()); } QueryPtr Query::mergeBooleanQueries(Collection queries) { SetBooleanClause allClauses(SetBooleanClause::newInstance()); for (Collection::iterator booleanQuery = queries.begin(); booleanQuery != queries.end(); ++booleanQuery) { for (Collection::iterator clause = (*booleanQuery)->begin(); clause != (*booleanQuery)->end(); ++clause) { allClauses.add(*clause); } } bool coordDisabled = queries.empty() ? false : queries[0]->isCoordDisabled(); BooleanQueryPtr result(newLucene(coordDisabled)); for (SetBooleanClause::iterator clause2 = allClauses.begin(); clause2 != allClauses.end(); ++clause2) { result->add(*clause2); } return result; } SimilarityPtr Query::getSimilarity(const SearcherPtr& searcher) { return searcher->getSimilarity(); } LuceneObjectPtr Query::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = LuceneObject::clone(other ? other : newLucene()); QueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->boost = boost; return cloneQuery; } int32_t Query::hashCode() { int32_t prime = 31; int32_t result = 1; result = prime * result + MiscUtils::doubleToIntBits(boost); return result; } bool Query::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } if (!other) { return false; } if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } QueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) { return false; } return (boost == otherQuery->boost); } String Query::boostString() { double boost = getBoost(); if (boost == 1.0) { return L""; } StringStream boostString; boostString.precision(1); boostString.setf(std::ios::fixed); boostString << L"^" << boost; return boostString.str(); } } LucenePlusPlus-rel_3.0.9/src/core/search/QueryTermVector.cpp000066400000000000000000000074341456444476200241500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "QueryTermVector.h" #include "Analyzer.h" #include "TokenStream.h" #include "StringReader.h" #include "TermAttribute.h" namespace Lucene { QueryTermVector::QueryTermVector(Collection queryTerms) { terms = Collection::newInstance(); termFreqs = Collection::newInstance(); processTerms(queryTerms); } QueryTermVector::QueryTermVector(const String& queryString, const AnalyzerPtr& analyzer) { terms = Collection::newInstance(); termFreqs = Collection::newInstance(); if (analyzer) { TokenStreamPtr stream(analyzer->tokenStream(L"", newLucene(queryString))); if (stream) { Collection terms = Collection::newInstance(); try { bool hasMoreTokens = false; stream->reset(); TermAttributePtr termAtt(stream->addAttribute()); hasMoreTokens = stream->incrementToken(); while (hasMoreTokens) { terms.add(termAtt->term()); hasMoreTokens = stream->incrementToken(); } processTerms(terms); } catch (IOException&) { } } } } QueryTermVector::~QueryTermVector() { } void QueryTermVector::processTerms(Collection queryTerms) { if (queryTerms) { std::sort(queryTerms.begin(), queryTerms.end()); MapStringInt tmpSet(MapStringInt::newInstance()); // filter out duplicates Collection tmpList(Collection::newInstance()); Collection tmpFreqs(Collection::newInstance()); int32_t j = 0; for (int32_t i = 0; i < queryTerms.size(); ++i) { String term(queryTerms[i]); MapStringInt::iterator position = tmpSet.find(term); if (position == tmpSet.end()) { tmpSet.put(term, j++); tmpList.add(term); tmpFreqs.add(1); } else { int32_t freq = tmpFreqs[position->second]; tmpFreqs[position->second] = freq + 1; } } terms = tmpList; termFreqs = Collection::newInstance(tmpFreqs.size()); int32_t i = 0; for (Collection::iterator freq = tmpFreqs.begin(); freq != tmpFreqs.end(); ++freq) { termFreqs[i++] = *freq; } } } String QueryTermVector::toString() { StringStream buffer; buffer << L"{"; for (int32_t i = 0; i < terms.size(); ++i) { if (i > 0) { buffer << L", "; } buffer << terms[i] << L'/' << termFreqs[i]; } buffer << L"}"; return buffer.str(); } int32_t QueryTermVector::size() { return terms.size(); } Collection QueryTermVector::getTerms() { return terms; } Collection QueryTermVector::getTermFrequencies() { return termFreqs; } int32_t QueryTermVector::indexOf(const String& term) { Collection::iterator search = std::lower_bound(terms.begin(), terms.end(), term); return (search == terms.end() || term < *search) ? -1 : std::distance(terms.begin(), search); } Collection QueryTermVector::indexesOf(Collection terms, int32_t start, int32_t length) { Collection res(Collection::newInstance(length)); for (int32_t i = 0; i < length; ++i) { res[i] = indexOf(terms[i]); } return res; } } LucenePlusPlus-rel_3.0.9/src/core/search/QueryWrapperFilter.cpp000066400000000000000000000034071456444476200246400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "QueryWrapperFilter.h" #include "_QueryWrapperFilter.h" #include "Query.h" #include "Weight.h" #include "Scorer.h" #include "IndexSearcher.h" namespace Lucene { QueryWrapperFilter::QueryWrapperFilter(const QueryPtr& query) { this->query = query; } QueryWrapperFilter::~QueryWrapperFilter() { } DocIdSetPtr QueryWrapperFilter::getDocIdSet(const IndexReaderPtr& reader) { WeightPtr weight(query->weight(newLucene(reader))); return newLucene(reader, weight); } String QueryWrapperFilter::toString() { return L"QueryWrapperFilter(" + query->toString() + L")"; } bool QueryWrapperFilter::equals(const LuceneObjectPtr& other) { QueryWrapperFilterPtr otherQueryWrapperFilter(boost::dynamic_pointer_cast(other)); if (!otherQueryWrapperFilter) { return false; } return this->query->equals(otherQueryWrapperFilter->query); } int32_t QueryWrapperFilter::hashCode() { return query->hashCode() ^ 0x923F64B9; } QueryWrapperFilterDocIdSet::QueryWrapperFilterDocIdSet(const IndexReaderPtr& reader, const WeightPtr& weight) { this->reader = reader; this->weight = weight; } QueryWrapperFilterDocIdSet::~QueryWrapperFilterDocIdSet() { } DocIdSetIteratorPtr QueryWrapperFilterDocIdSet::iterator() { return weight->scorer(reader, true, false); } bool QueryWrapperFilterDocIdSet::isCacheable() { return false; } } LucenePlusPlus-rel_3.0.9/src/core/search/ReqExclScorer.cpp000066400000000000000000000044571456444476200235530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ReqExclScorer.h" namespace Lucene { ReqExclScorer::ReqExclScorer(const ScorerPtr& reqScorer, const DocIdSetIteratorPtr& exclDisi) : Scorer(SimilarityPtr()) { // No similarity used. this->reqScorer = reqScorer; this->exclDisi = exclDisi; this->doc = -1; } ReqExclScorer::~ReqExclScorer() { } int32_t ReqExclScorer::nextDoc() { if (!reqScorer) { return doc; } doc = reqScorer->nextDoc(); if (doc == NO_MORE_DOCS) { reqScorer.reset(); // exhausted, nothing left return doc; } if (!exclDisi) { return doc; } doc = toNonExcluded(); return doc; } int32_t ReqExclScorer::toNonExcluded() { int32_t exclDoc = exclDisi->docID(); int32_t reqDoc = reqScorer->docID(); // may be excluded do { if (reqDoc < exclDoc) { return reqDoc; // reqScorer advanced to before exclScorer, ie. not excluded } else if (reqDoc > exclDoc) { exclDoc = exclDisi->advance(reqDoc); if (exclDoc == NO_MORE_DOCS) { exclDisi.reset(); // exhausted, no more exclusions return reqDoc; } if (exclDoc > reqDoc) { return reqDoc; // not excluded } } } while ((reqDoc = reqScorer->nextDoc()) != NO_MORE_DOCS); reqScorer.reset(); // exhausted, nothing left return NO_MORE_DOCS; } int32_t ReqExclScorer::docID() { return doc; } double ReqExclScorer::score() { return reqScorer->score(); // reqScorer may be null when next() or skipTo() already return false } int32_t ReqExclScorer::advance(int32_t target) { if (!reqScorer) { doc = NO_MORE_DOCS; return doc; } if (!exclDisi) { doc = reqScorer->advance(target); return doc; } if (reqScorer->advance(target) == NO_MORE_DOCS) { reqScorer.reset(); doc = NO_MORE_DOCS; return doc; } doc = toNonExcluded(); return doc; } } LucenePlusPlus-rel_3.0.9/src/core/search/ReqOptSumScorer.cpp000066400000000000000000000025111456444476200240740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ReqOptSumScorer.h" namespace Lucene { ReqOptSumScorer::ReqOptSumScorer(const ScorerPtr& reqScorer, const ScorerPtr& optScorer) : Scorer(SimilarityPtr()) { // No similarity used. this->reqScorer = reqScorer; this->optScorer = optScorer; } ReqOptSumScorer::~ReqOptSumScorer() { } int32_t ReqOptSumScorer::nextDoc() { return reqScorer->nextDoc(); } int32_t ReqOptSumScorer::advance(int32_t target) { return reqScorer->advance(target); } int32_t ReqOptSumScorer::docID() { return reqScorer->docID(); } double ReqOptSumScorer::score() { int32_t curDoc = reqScorer->docID(); double reqScore = reqScorer->score(); if (!optScorer) { return reqScore; } int32_t optScorerDoc = optScorer->docID(); if (optScorerDoc < curDoc && (optScorerDoc = optScorer->advance(curDoc)) == NO_MORE_DOCS) { optScorer.reset(); return reqScore; } return optScorerDoc == curDoc ? reqScore + optScorer->score() : reqScore; } } LucenePlusPlus-rel_3.0.9/src/core/search/ScoreCachingWrappingScorer.cpp000066400000000000000000000031051456444476200262350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ScoreCachingWrappingScorer.h" namespace Lucene { ScoreCachingWrappingScorer::ScoreCachingWrappingScorer(const ScorerPtr& scorer) : Scorer(scorer->getSimilarity()) { this->curDoc = -1; this->curScore = 0.0; this->_scorer = scorer; } ScoreCachingWrappingScorer::~ScoreCachingWrappingScorer() { } bool ScoreCachingWrappingScorer::score(const CollectorPtr& collector, int32_t max, int32_t firstDocID) { return ScorerPtr(_scorer)->score(collector, max, firstDocID); } SimilarityPtr ScoreCachingWrappingScorer::getSimilarity() { return ScorerPtr(_scorer)->getSimilarity(); } double ScoreCachingWrappingScorer::score() { ScorerPtr scorer(_scorer); int32_t doc = scorer->docID(); if (doc != curDoc) { curScore = scorer->score(); curDoc = doc; } return curScore; } int32_t ScoreCachingWrappingScorer::docID() { return ScorerPtr(_scorer)->docID(); } int32_t ScoreCachingWrappingScorer::nextDoc() { return ScorerPtr(_scorer)->nextDoc(); } void ScoreCachingWrappingScorer::score(const CollectorPtr& collector) { ScorerPtr(_scorer)->score(collector); } int32_t ScoreCachingWrappingScorer::advance(int32_t target) { return ScorerPtr(_scorer)->advance(target); } } LucenePlusPlus-rel_3.0.9/src/core/search/ScoreDoc.cpp000066400000000000000000000012331456444476200225200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ScoreDoc.h" namespace Lucene { ScoreDoc::ScoreDoc(int32_t doc, double score) { this->doc = doc; this->score = score; } ScoreDoc::~ScoreDoc() { } String ScoreDoc::toString() { StringStream buffer; buffer << L"doc=" << doc << L" score=" << score; return buffer.str(); } } LucenePlusPlus-rel_3.0.9/src/core/search/Scorer.cpp000066400000000000000000000041141456444476200222550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Scorer.h" #include "Collector.h" namespace Lucene { Scorer::Scorer(const SimilarityPtr& similarity) { this->similarity = similarity; } Scorer::Scorer(const WeightPtr& weight) { this->weight = weight; } Scorer::~Scorer() { } SimilarityPtr Scorer::getSimilarity() { return similarity; } void Scorer::score(const CollectorPtr& collector) { collector->setScorer(shared_from_this()); int32_t doc; while ((doc = nextDoc()) != NO_MORE_DOCS) { collector->collect(doc); } } bool Scorer::score(const CollectorPtr& collector, int32_t max, int32_t firstDocID) { collector->setScorer(shared_from_this()); int32_t doc = firstDocID; while (doc < max) { collector->collect(doc); doc = nextDoc(); } return (doc != NO_MORE_DOCS); } void Scorer::visitSubScorers(QueryPtr parent, BooleanClause::Occur relationship, ScorerVisitor *visitor){ QueryPtr q = weight->getQuery(); switch (relationship) { case BooleanClause::MUST: visitor->visitRequired(parent, q, shared_from_this()); break; case BooleanClause::MUST_NOT: visitor->visitProhibited(parent, q, shared_from_this()); break; case BooleanClause::SHOULD: visitor->visitOptional(parent, q, shared_from_this()); break; } } void Scorer::visitScorers(ScorerVisitor *visitor) { boost::shared_ptr s_obj; visitSubScorers(s_obj, BooleanClause::MUST/*must id default*/, visitor); } } LucenePlusPlus-rel_3.0.9/src/core/search/Searchable.cpp000066400000000000000000000034421456444476200230540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Searchable.h" namespace Lucene { void Searchable::search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& collector) { BOOST_ASSERT(false); // override } void Searchable::close() { BOOST_ASSERT(false); // override } int32_t Searchable::docFreq(const TermPtr& term) { BOOST_ASSERT(false); return 0; // override } Collection Searchable::docFreqs(Collection terms) { BOOST_ASSERT(false); return Collection(); // override } int32_t Searchable::maxDoc() { BOOST_ASSERT(false); return 0; // override } TopDocsPtr Searchable::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n) { BOOST_ASSERT(false); return TopDocsPtr(); // override } DocumentPtr Searchable::doc(int32_t n) { BOOST_ASSERT(false); return DocumentPtr(); // override } DocumentPtr Searchable::doc(int32_t n, const FieldSelectorPtr& fieldSelector) { BOOST_ASSERT(false); return DocumentPtr(); // override } QueryPtr Searchable::rewrite(const QueryPtr& query) { BOOST_ASSERT(false); return QueryPtr(); // override } ExplanationPtr Searchable::explain(const WeightPtr& weight, int32_t doc) { BOOST_ASSERT(false); return ExplanationPtr(); // override } TopFieldDocsPtr Searchable::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort) { BOOST_ASSERT(false); return TopFieldDocsPtr(); // override } } LucenePlusPlus-rel_3.0.9/src/core/search/Searcher.cpp000066400000000000000000000036371456444476200225650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Searcher.h" #include "Similarity.h" #include "Query.h" #include "Collector.h" namespace Lucene { Searcher::Searcher() { similarity = Similarity::getDefault(); } Searcher::~Searcher() { } TopFieldDocsPtr Searcher::search(const QueryPtr& query, const FilterPtr& filter, int32_t n, const SortPtr& sort) { return search(createWeight(query), filter, n, sort); } void Searcher::search(const QueryPtr& query, const CollectorPtr& results) { search(createWeight(query), FilterPtr(), results); } void Searcher::search(const QueryPtr& query, const FilterPtr& filter, const CollectorPtr& results) { search(createWeight(query), filter, results); } TopDocsPtr Searcher::search(const QueryPtr& query, const FilterPtr& filter, int32_t n) { return search(createWeight(query), filter, n); } TopDocsPtr Searcher::search(const QueryPtr& query, int32_t n) { return search(query, FilterPtr(), n); } ExplanationPtr Searcher::explain(const QueryPtr& query, int32_t doc) { return explain(createWeight(query), doc); } void Searcher::setSimilarity(const SimilarityPtr& similarity) { this->similarity = similarity; } SimilarityPtr Searcher::getSimilarity() { return this->similarity; } WeightPtr Searcher::createWeight(const QueryPtr& query) { return query->weight(shared_from_this()); } Collection Searcher::docFreqs(Collection terms) { Collection result(Collection::newInstance(terms.size())); for (int32_t i = 0; i < terms.size(); ++i) { result[i] = docFreq(terms[i]); } return result; } } LucenePlusPlus-rel_3.0.9/src/core/search/Similarity.cpp000066400000000000000000000065251456444476200231560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Similarity.h" #include "_Similarity.h" #include "DefaultSimilarity.h" #include "FieldInvertState.h" #include "Searcher.h" #include "Term.h" #include "SmallDouble.h" #include "StringUtils.h" namespace Lucene { const int32_t Similarity::NO_DOC_ID_PROVIDED = -1; Similarity::Similarity() { } Similarity::~Similarity() { } SimilarityPtr Similarity::getDefault() { // race condition? static SimilarityPtr defaultImpl; LUCENE_RUN_ONCE( defaultImpl = newLucene(); CycleCheck::addStatic(defaultImpl); ); return defaultImpl; } static const Collection GEN_NORM_TABLE() { static Collection _NORM_TABLE; LUCENE_RUN_ONCE( _NORM_TABLE = Collection::newInstance(256); for (int32_t i = 0; i < 256; ++i) { _NORM_TABLE[i] = SmallDouble::byteToDouble((uint8_t)i); } ); return _NORM_TABLE; } const Collection Similarity::NORM_TABLE = GEN_NORM_TABLE(); double Similarity::decodeNorm(uint8_t b) { return NORM_TABLE[b & 0xff]; // & 0xff maps negative bytes to positive above 127 } const Collection& Similarity::getNormDecoder() { return NORM_TABLE; } double Similarity::computeNorm(const String& fieldName, const FieldInvertStatePtr& state) { return (double)(state->getBoost() * lengthNorm(fieldName, state->getLength())); } uint8_t Similarity::encodeNorm(double f) { return SmallDouble::doubleToByte(f); } double Similarity::tf(int32_t freq) { return tf((double)freq); } IDFExplanationPtr Similarity::idfExplain(const TermPtr& term, const SearcherPtr& searcher) { int32_t df = searcher->docFreq(term); int32_t max = searcher->maxDoc(); double _idf = idf(df, max); return newLucene(df, max, _idf); } IDFExplanationPtr Similarity::idfExplain(Collection terms, const SearcherPtr& searcher) { int32_t max = searcher->maxDoc(); double _idf = 0.0; String exp; for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) { int32_t df = searcher->docFreq(*term); _idf += idf(df, max); exp += L" " + (*term)->text() + L"=" + StringUtils::toString(df); } return newLucene(exp, _idf); } double Similarity::scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length) { return 1.0; } SimilarityIDFExplanation::SimilarityIDFExplanation(int32_t df, int32_t max, double idf) { this->df = df; this->max = max; this->idf = idf; } SimilarityIDFExplanation::SimilarityIDFExplanation(const String& exp, double idf) { this->exp = exp; this->idf = idf; } SimilarityIDFExplanation::~SimilarityIDFExplanation() { } String SimilarityIDFExplanation::explain() { return !exp.empty() ? exp : L"idf(docFreq=" + StringUtils::toString(df) + L", maxDocs=" + StringUtils::toString(max) + L")"; } double SimilarityIDFExplanation::getIdf() { return idf; } } LucenePlusPlus-rel_3.0.9/src/core/search/SimilarityDelegator.cpp000066400000000000000000000032031456444476200247730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SimilarityDelegator.h" namespace Lucene { SimilarityDelegator::SimilarityDelegator(const SimilarityPtr& delegee) { this->delegee = delegee; } SimilarityDelegator::~SimilarityDelegator() { } double SimilarityDelegator::computeNorm(const String& fieldName, const FieldInvertStatePtr& state) { return delegee->computeNorm(fieldName, state); } double SimilarityDelegator::lengthNorm(const String& fieldName, int32_t numTokens) { return delegee->lengthNorm(fieldName, numTokens); } double SimilarityDelegator::queryNorm(double sumOfSquaredWeights) { return delegee->queryNorm(sumOfSquaredWeights); } double SimilarityDelegator::tf(double freq) { return delegee->tf(freq); } double SimilarityDelegator::sloppyFreq(int32_t distance) { return delegee->sloppyFreq(distance); } double SimilarityDelegator::idf(int32_t docFreq, int32_t numDocs) { return delegee->idf(docFreq, numDocs); } double SimilarityDelegator::coord(int32_t overlap, int32_t maxOverlap) { return delegee->coord(overlap, maxOverlap); } double SimilarityDelegator::scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length) { return delegee->scorePayload(docId, fieldName, start, end, payload, offset, length); } } LucenePlusPlus-rel_3.0.9/src/core/search/SingleTermEnum.cpp000066400000000000000000000017061456444476200237220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SingleTermEnum.h" #include "IndexReader.h" #include "Term.h" namespace Lucene { SingleTermEnum::SingleTermEnum(const IndexReaderPtr& reader, const TermPtr& singleTerm) { this->_endEnum = false; this->singleTerm = singleTerm; setEnum(reader->terms(singleTerm)); } SingleTermEnum::~SingleTermEnum() { } double SingleTermEnum::difference() { return 1.0; } bool SingleTermEnum::endEnum() { return _endEnum; } bool SingleTermEnum::termCompare(const TermPtr& term) { if (term->equals(singleTerm)) { return true; } _endEnum = true; return false; } } LucenePlusPlus-rel_3.0.9/src/core/search/SloppyPhraseScorer.cpp000066400000000000000000000143521456444476200246340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SloppyPhraseScorer.h" #include "PhrasePositions.h" #include "PhraseQueue.h" #include "Similarity.h" namespace Lucene { struct __luceneEquals { inline bool operator()(const PhrasePositions* __first, const PhrasePositions* __second) const { return __first ? (__second && __first == __second) : (!__first && !__second); } }; typedef HashMap< PhrasePositions*, LuceneObjectPtr, luceneHash, __luceneEquals > __MapPhrasePositionsLuceneObject; SloppyPhraseScorer::SloppyPhraseScorer(const WeightPtr& weight, Collection tps, Collection offsets, const SimilarityPtr& similarity, int32_t slop, ByteArray norms) : PhraseScorer(weight, tps, offsets, similarity, norms) { this->slop = slop; this->checkedRepeats = false; } SloppyPhraseScorer::~SloppyPhraseScorer() { } double SloppyPhraseScorer::phraseFreq() { int32_t end = initPhrasePositions(); double freq = 0.0; bool done = (end < 0); while (!done) { auto* __pp = pq->pop(); int32_t start = __pp->position; int32_t next = pq->top()->position; bool tpsDiffer = true; for (int32_t pos = start; pos <= next || !tpsDiffer; pos = __pp->position) { if (pos<=next && tpsDiffer) { start = pos; // advance pp to min window } if (!__pp->nextPosition()) { done = true; // ran out of a term - done break; } PhrasePositions* __pp2 = nullptr; tpsDiffer = (!__pp->repeats || !(__pp2 = termPositionsDiffer(__pp))); if (__pp2 && __pp2 != __pp) { __pp = flip(__pp, __pp2); // flip pp to pp2 } } int32_t matchLength = end - start; if (matchLength <= slop) { freq += getSimilarity()->sloppyFreq(matchLength); // score match } if (__pp->position > end) { end = __pp->position; } pq->add(__pp); // restore pq } return freq; } PhrasePositions* SloppyPhraseScorer::flip(PhrasePositions* __pp, PhrasePositions* __pp2) { int32_t n = 0; PhrasePositions* __pp3; // pop until finding pp2 while ((__pp3 = pq->pop()) != __pp2) { tmpPos[n++] = __pp3; } // insert back all but pp2 for (n--; n >= 0; --n) { pq->addOverflow(tmpPos[n]); } // insert pp back pq->add(__pp); return __pp2; } int32_t SloppyPhraseScorer::initPhrasePositions() { int32_t end = 0; // no repeats at all (most common case is also the simplest one) if (checkedRepeats && !repeats) { // build queue from list pq->clear(); for (auto* __pp = __first; __pp; __pp = __pp->__next) { __pp->firstPosition(); if (__pp->position > end) { end = __pp->position; } pq->add(__pp); // build pq from list } return end; } // position the pp's for (PhrasePositions* __pp = __first; __pp; __pp = __pp->__next) { __pp->firstPosition(); } // one time initialization for this scorer if (!checkedRepeats) { checkedRepeats = true; // check for repeats __MapPhrasePositionsLuceneObject m; for (auto* __pp = __first; __pp; __pp = __pp->__next) { int32_t tpPos = __pp->position + __pp->offset; for (auto* __pp2 = __pp->__next; __pp2; __pp2 = __pp2->__next) { int32_t tpPos2 = __pp2->position + __pp2->offset; if (tpPos2 == tpPos) { if (!m) { m = __MapPhrasePositionsLuceneObject::newInstance(); } __pp->repeats = true; __pp2->repeats = true; m.put(__pp, LuceneObjectPtr()); m.put(__pp2, LuceneObjectPtr()); } } } if (m) { repeats = Collection::newInstance(); for (__MapPhrasePositionsLuceneObject::iterator key = m.begin(); key != m.end(); ++key) { repeats.add(key->first); } } } // with repeats must advance some repeating pp's so they all start with differing tp's if (repeats) { for (Collection::iterator pp = repeats.begin(); pp != repeats.end(); ++pp) { PhrasePositions* pp2 = nullptr; while ((pp2 = termPositionsDiffer(*pp))) { if (!pp2->nextPosition()) { // out of pps that do not differ, advance the pp with higher offset return -1; // ran out of a term - done } } } } // build queue from list pq->clear(); for (auto* __pp = __first; __pp; __pp = __pp->__next) { if (__pp->position > end) { end = __pp->position; } pq->add(__pp); // build pq from list } if (repeats) { tmpPos = Collection::newInstance(pq->size()); } return end; } PhrasePositions* SloppyPhraseScorer::termPositionsDiffer(PhrasePositions* __pp) { // Efficiency note: a more efficient implementation could keep a map between repeating pp's, so that if // pp1a, pp1b, pp1c are repeats term1, and pp2a, pp2b are repeats of term2, pp2a would only be checked // against pp2b but not against pp1a, pp1b, pp1c. However this would complicate code, for a rather rare // case, so choice is to compromise here. int32_t tpPos = __pp->position + __pp->offset; for (Collection::iterator pp2 = repeats.begin(); pp2 != repeats.end(); ++pp2) { if (*pp2 == __pp) { continue; } int32_t tpPos2 = (*pp2)->position + (*pp2)->offset; if (tpPos2 == tpPos) { return __pp->offset > (*pp2)->offset ? __pp : *pp2; // do not differ: return the one with higher offset. } } return nullptr; } } LucenePlusPlus-rel_3.0.9/src/core/search/Sort.cpp000066400000000000000000000040121456444476200217440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Sort.h" #include "SortField.h" #include "MiscUtils.h" namespace Lucene { Sort::Sort() { setSort(SortField::FIELD_SCORE()); } Sort::Sort(const SortFieldPtr& field) { setSort(field); } Sort::Sort(Collection fields) { setSort(fields); } Sort::~Sort() { } SortPtr Sort::RELEVANCE() { static SortPtr _RELEVANCE; LUCENE_RUN_ONCE( _RELEVANCE = newLucene(); CycleCheck::addStatic(_RELEVANCE); ); return _RELEVANCE; } SortPtr Sort::INDEXORDER() { static SortPtr _INDEXORDER; LUCENE_RUN_ONCE( _INDEXORDER = newLucene(SortField::FIELD_DOC()); CycleCheck::addStatic(_INDEXORDER); ); return _INDEXORDER; } void Sort::setSort(const SortFieldPtr& field) { this->fields = newCollection(field); } void Sort::setSort(Collection fields) { this->fields = fields; } Collection Sort::getSort() { return fields; } String Sort::toString() { StringStream buffer; for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { if (field != fields.begin()) { buffer << L","; } buffer << (*field)->toString(); } return buffer.str(); } bool Sort::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } SortPtr otherSort(boost::dynamic_pointer_cast(other)); if (!otherSort) { return false; } return fields.equals(otherSort->fields); } int32_t Sort::hashCode() { return 0x45aaf665 + MiscUtils::hashCode(fields.begin(), fields.end(), MiscUtils::hashLucene); } } LucenePlusPlus-rel_3.0.9/src/core/search/SortField.cpp000066400000000000000000000177121456444476200227230ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SortField.h" #include "FieldCache.h" #include "FieldComparator.h" #include "FieldComparatorSource.h" #include "StringUtils.h" namespace Lucene { /// Sort by document score (relevancy). Sort values are Double and higher values are at the front. const int32_t SortField::SCORE = 0; /// Sort by document number (index order). Sort values are Integer and lower values are at the front. const int32_t SortField::DOC = 1; /// Sort using term values as Strings. Sort values are String and lower values are at the front. const int32_t SortField::STRING = 3; /// Sort using term values as Integers. Sort values are Integer and lower values are at the front. const int32_t SortField::INT = 4; /// Sort using term values as Floats. Sort values are Float and lower values are at the front. const int32_t SortField::FLOAT = 5; /// Sort using term values as Longs. Sort values are Long and lower values are at the front. const int32_t SortField::LONG = 6; /// Sort using term values as Doubles. Sort values are Double and lower values are at the front. const int32_t SortField::DOUBLE = 7; /// Sort using term values as Shorts. Sort values are Short and lower values are at the front. const int32_t SortField::SHORT = 8; /// Sort using a custom Comparator. Sort values are any ComparableValue and sorting is done according /// to natural order. const int32_t SortField::CUSTOM = 9; /// Sort using term values as Bytes. Sort values are Byte and lower values are at the front. const int32_t SortField::BYTE = 10; /// Sort using term values as Strings, but comparing by value (using String::compare) for all comparisons. /// This is typically slower than {@link #STRING}, which uses ordinals to do the sorting. const int32_t SortField::STRING_VAL = 11; SortField::SortField(const String& field, int32_t type, bool reverse) { initFieldType(field, type); this->reverse = reverse; } SortField::SortField(const String& field, const ParserPtr& parser, bool reverse) { if (boost::dynamic_pointer_cast(parser)) { initFieldType(field, INT); } else if (boost::dynamic_pointer_cast(parser)) { initFieldType(field, BYTE); } else if (boost::dynamic_pointer_cast(parser)) { initFieldType(field, LONG); } else if (boost::dynamic_pointer_cast(parser)) { initFieldType(field, DOUBLE); } else { boost::throw_exception(IllegalArgumentException(L"Parser instance does not subclass existing numeric parser from FieldCache")); } this->reverse = reverse; this->parser = parser; } SortField::SortField(const String& field, const std::locale& locale, bool reverse) { initFieldType(field, STRING); this->locale = newInstance(locale); this->reverse = reverse; } SortField::SortField(const String& field, const FieldComparatorSourcePtr& comparator, bool reverse) { initFieldType(field, CUSTOM); this->comparatorSource = comparator; this->reverse = reverse; } SortField::~SortField() { } SortFieldPtr SortField::FIELD_SCORE() { static SortFieldPtr _FIELD_SCORE; LUCENE_RUN_ONCE( _FIELD_SCORE = newLucene(L"", SCORE); CycleCheck::addStatic(_FIELD_SCORE); ); return _FIELD_SCORE; } SortFieldPtr SortField::FIELD_DOC() { static SortFieldPtr _FIELD_DOC; LUCENE_RUN_ONCE( _FIELD_DOC = newLucene(L"", DOC); CycleCheck::addStatic(_FIELD_DOC); ); return _FIELD_DOC; } void SortField::initFieldType(const String& field, int32_t type) { this->type = type; if (field.empty() && type != SCORE && type != DOC) { boost::throw_exception(IllegalArgumentException(L"Field can only be null when type is SCORE or DOC")); } this->field = field; } String SortField::getField() { return field; } int32_t SortField::getType() { return type; } localePtr SortField::getLocale() { return locale; } ParserPtr SortField::getParser() { return parser; } bool SortField::getReverse() { return reverse; } FieldComparatorSourcePtr SortField::getComparatorSource() { return comparatorSource; } String SortField::toString() { StringStream buffer; switch (type) { case SCORE: buffer << L""; break; case DOC: buffer << L""; break; case STRING: buffer << L""; break; case STRING_VAL: buffer << L""; break; case BYTE: buffer << L""; break; case SHORT: buffer << L""; break; case INT: buffer << L""; break; case LONG: buffer << L""; break; case FLOAT: buffer << L""; break; case DOUBLE: buffer << L""; break; case CUSTOM: buffer << L"toString() << L">"; break; default: buffer << L""; break; } if (parser) { buffer << L"(" << parser->toString() << L")"; } if (reverse) { buffer << L"!"; } return buffer.str(); } bool SortField::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } SortFieldPtr otherSortField(boost::dynamic_pointer_cast(other)); if (!otherSortField) { return false; } return (field == otherSortField->field && type == otherSortField->type && reverse == otherSortField->reverse && ((locale && otherSortField->locale && *locale == *otherSortField->locale) || (!locale && !otherSortField->locale)) && (comparatorSource ? comparatorSource->equals(otherSortField->comparatorSource) : !otherSortField->comparatorSource) && (parser ? parser->equals(otherSortField->parser) : !otherSortField->parser)); } int32_t SortField::hashCode() { int32_t hash = type ^ 0x346565dd + (reverse ? 1 : 0) ^ 0xaf5998bb; hash += StringUtils::hashCode(field) ^ 0xff5685dd; if (locale) { hash += StringUtils::hashCode(StringUtils::toUnicode(locale->name().c_str())) ^ 0xff5685dd; } if (comparatorSource) { hash += comparatorSource->hashCode(); } if (parser) { hash += parser->hashCode() ^ 0x3aaf56ff; } return hash; } FieldComparatorPtr SortField::getComparator(int32_t numHits, int32_t sortPos) { if (locale) { return newLucene(numHits, field, *locale); } switch (type) { case SCORE: return newLucene(numHits); case DOC: return newLucene(numHits); case SHORT: case INT: return newLucene(numHits, field, parser); case FLOAT: case DOUBLE: return newLucene(numHits, field, parser); case LONG: return newLucene(numHits, field, parser); case BYTE: return newLucene(numHits, field, parser); case CUSTOM: BOOST_ASSERT(comparatorSource); return comparatorSource->newComparator(field, numHits, sortPos, reverse); case STRING: return newLucene(numHits, field, sortPos, reverse); case STRING_VAL: return newLucene(numHits, field); default: boost::throw_exception(IllegalStateException(L"Illegal sort type: " + StringUtils::toString(type))); return FieldComparatorPtr(); } } } LucenePlusPlus-rel_3.0.9/src/core/search/SpanFilter.cpp000066400000000000000000000006701456444476200230720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanFilter.h" namespace Lucene { SpanFilter::~SpanFilter() { } } LucenePlusPlus-rel_3.0.9/src/core/search/SpanFilterResult.cpp000066400000000000000000000026201456444476200242660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanFilterResult.h" namespace Lucene { SpanFilterResult::SpanFilterResult(const DocIdSetPtr& docIdSet, Collection positions) { this->docIdSet = docIdSet; this->positions = positions; } SpanFilterResult::~SpanFilterResult() { } Collection SpanFilterResult::getPositions() { return positions; } DocIdSetPtr SpanFilterResult::getDocIdSet() { return docIdSet; } PositionInfo::PositionInfo(int32_t doc) { this->doc = doc; this->positions = Collection::newInstance(); } PositionInfo::~PositionInfo() { } void PositionInfo::addPosition(int32_t start, int32_t end) { positions.add(newLucene(start, end)); } int32_t PositionInfo::getDoc() { return doc; } Collection PositionInfo::getPositions() { return positions; } StartEnd::StartEnd(int32_t start, int32_t end) { this->start = start; this->end = end; } StartEnd::~StartEnd() { } int32_t StartEnd::getEnd() { return end; } int32_t StartEnd::getStart() { return start; } } LucenePlusPlus-rel_3.0.9/src/core/search/SpanQueryFilter.cpp000066400000000000000000000041141456444476200241150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanQueryFilter.h" #include "SpanQuery.h" #include "SpanFilterResult.h" #include "Spans.h" #include "OpenBitSet.h" #include "IndexReader.h" namespace Lucene { SpanQueryFilter::SpanQueryFilter(const SpanQueryPtr& query) { this->query = query; } SpanQueryFilter::~SpanQueryFilter() { } DocIdSetPtr SpanQueryFilter::getDocIdSet(const IndexReaderPtr& reader) { SpanFilterResultPtr result(bitSpans(reader)); return result->getDocIdSet(); } SpanFilterResultPtr SpanQueryFilter::bitSpans(const IndexReaderPtr& reader) { OpenBitSetPtr bits(newLucene(reader->maxDoc())); SpansPtr spans(query->getSpans(reader)); Collection tmp(Collection::newInstance()); int32_t currentDoc = -1; PositionInfoPtr currentInfo; while (spans->next()) { int32_t doc = spans->doc(); bits->set(doc); if (currentDoc != doc) { currentInfo = newLucene(doc); tmp.add(currentInfo); currentDoc = doc; } currentInfo->addPosition(spans->start(), spans->end()); } return newLucene(bits, tmp); } SpanQueryPtr SpanQueryFilter::getQuery() { return query; } String SpanQueryFilter::toString() { return L"SpanQueryFilter(" + query->toString() + L")"; } bool SpanQueryFilter::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } SpanQueryFilterPtr otherSpanQueryFilter(boost::dynamic_pointer_cast(other)); if (!otherSpanQueryFilter) { return false; } return query->equals(otherSpanQueryFilter->query); } int32_t SpanQueryFilter::hashCode() { return query->hashCode() ^ 0x923f64b9; } } LucenePlusPlus-rel_3.0.9/src/core/search/TermQuery.cpp000066400000000000000000000136411456444476200227620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermQuery.h" #include "_TermQuery.h" #include "TermScorer.h" #include "IndexReader.h" #include "ComplexExplanation.h" #include "Term.h" #include "TermDocs.h" #include "Similarity.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { TermQuery::TermQuery(const TermPtr& term) { this->term = term; } TermQuery::~TermQuery() { } TermPtr TermQuery::getTerm() { return term; } WeightPtr TermQuery::createWeight(const SearcherPtr& searcher) { return newLucene(shared_from_this(), searcher); } void TermQuery::extractTerms(SetTerm terms) { terms.add(getTerm()); } String TermQuery::toString(const String& field) { StringStream buffer; if (term->field() != field) { buffer << term->field() << L":"; } buffer << term->text() << boostString(); return buffer.str(); } bool TermQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } TermQueryPtr otherTermQuery(boost::dynamic_pointer_cast(other)); if (!otherTermQuery) { return false; } return (getBoost() == otherTermQuery->getBoost() && term->equals(otherTermQuery->term)); } int32_t TermQuery::hashCode() { return MiscUtils::doubleToIntBits(getBoost()) ^ term->hashCode(); } LuceneObjectPtr TermQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(term); TermQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); cloneQuery->term = term; return cloneQuery; } TermWeight::TermWeight(const TermQueryPtr& query, const SearcherPtr& searcher) { this->query = query; this->similarity = query->getSimilarity(searcher); this->value = 0.0; this->idf = 0.0; this->queryNorm = 0.0; this->queryWeight = 0.0; this->idfExp = similarity->idfExplain(query->term, searcher); idf = idfExp->getIdf(); } TermWeight::~TermWeight() { } String TermWeight::toString() { return L"weight(" + query->toString() + L")"; } QueryPtr TermWeight::getQuery() { return query; } double TermWeight::getValue() { return value; } double TermWeight::sumOfSquaredWeights() { queryWeight = idf * getQuery()->getBoost(); // compute query weight return queryWeight * queryWeight; // square it } void TermWeight::normalize(double norm) { queryNorm = norm; queryWeight *= queryNorm; // normalize query weight value = queryWeight * idf; // idf for document } ScorerPtr TermWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { TermDocsPtr termDocs(reader->termDocs(query->term)); return termDocs ? newLucene(shared_from_this(), termDocs, similarity, reader->norms(query->term->field())) : ScorerPtr(); } ExplanationPtr TermWeight::explain(const IndexReaderPtr& reader, int32_t doc) { ComplexExplanationPtr result(newLucene()); result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); ExplanationPtr expl(newLucene(idf, idfExp->explain())); // explain query weight ExplanationPtr queryExpl(newLucene()); queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); ExplanationPtr boostExpl(newLucene(query->getBoost(), L"boost")); if (query->getBoost() != 1.0) { queryExpl->addDetail(boostExpl); } queryExpl->addDetail(expl); ExplanationPtr queryNormExpl(newLucene(queryNorm, L"queryNorm")); queryExpl->addDetail(queryNormExpl); queryExpl->setValue(boostExpl->getValue() * expl->getValue() * queryNormExpl->getValue()); result->addDetail(queryExpl); // explain field weight String field(query->term->field()); ComplexExplanationPtr fieldExpl(newLucene()); fieldExpl->setDescription(L"fieldWeight(" + query->term->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); ExplanationPtr tfExplanation(newLucene()); int32_t tf = 0; TermDocsPtr termDocs(reader->termDocs(query->term)); if (termDocs) { LuceneException finally; try { if (termDocs->skipTo(doc) && termDocs->doc() == doc) { tf = termDocs->freq(); } } catch (LuceneException& e) { finally = e; } termDocs->close(); finally.throwException(); tfExplanation->setValue(similarity->tf(tf)); tfExplanation->setDescription(L"tf(termFreq(" + query->term->toString() + L")=" + StringUtils::toString(tf) + L")"); } else { tfExplanation->setValue(0.0); tfExplanation->setDescription(L"no matching term"); } fieldExpl->addDetail(tfExplanation); fieldExpl->addDetail(expl); ExplanationPtr fieldNormExpl(newLucene()); ByteArray fieldNorms(reader->norms(field)); double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; fieldNormExpl->setValue(fieldNorm); fieldNormExpl->setDescription(L"fieldNorm(field=" + field + L", doc=" + StringUtils::toString(doc) + L")"); fieldExpl->addDetail(fieldNormExpl); fieldExpl->setMatch(tfExplanation->isMatch()); fieldExpl->setValue(tfExplanation->getValue() * expl->getValue() * fieldNormExpl->getValue()); result->addDetail(fieldExpl); result->setMatch(fieldExpl->getMatch()); // combine them result->setValue(queryExpl->getValue() * fieldExpl->getValue()); if (queryExpl->getValue() == 1.0) { return fieldExpl; } return result; } } LucenePlusPlus-rel_3.0.9/src/core/search/TermRangeFilter.cpp000066400000000000000000000037271456444476200240630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermRangeFilter.h" #include "TermRangeQuery.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { TermRangeFilter::TermRangeFilter(const String& fieldName, StringValue lowerTerm, StringValue upperTerm, bool includeLower, bool includeUpper, CollatorPtr collator) : MultiTermQueryWrapperFilter(newLucene(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator)) { } TermRangeFilter::~TermRangeFilter() { } TermRangeFilterPtr TermRangeFilter::Less(const String& fieldName, StringValue upperTerm) { return newLucene(fieldName, VariantUtils::null(), upperTerm, false, true); } TermRangeFilterPtr TermRangeFilter::More(const String& fieldName, StringValue lowerTerm) { return newLucene(fieldName, lowerTerm, VariantUtils::null(), true, false); } String TermRangeFilter::getField() { return boost::static_pointer_cast(query)->getField(); } String TermRangeFilter::getLowerTerm() { return boost::static_pointer_cast(query)->getLowerTerm(); } String TermRangeFilter::getUpperTerm() { return boost::static_pointer_cast(query)->getUpperTerm(); } bool TermRangeFilter::includesLower() { return boost::static_pointer_cast(query)->includesLower(); } bool TermRangeFilter::includesUpper() { return boost::static_pointer_cast(query)->includesUpper(); } CollatorPtr TermRangeFilter::getCollator() { return boost::static_pointer_cast(query)->getCollator(); } } LucenePlusPlus-rel_3.0.9/src/core/search/TermRangeQuery.cpp000066400000000000000000000111621456444476200237330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermRangeQuery.h" #include "TermRangeTermEnum.h" #include "Collator.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { TermRangeQuery::TermRangeQuery(const String& fieldName, StringValue lowerTerm, StringValue upperTerm, bool includeLower, bool includeUpper, CollatorPtr collator) { this->field = fieldName; this->lowerTerm = lowerTerm; this->upperTerm = upperTerm; this->includeLower = includeLower; this->includeUpper = includeUpper; this->collator = collator; } TermRangeQuery::~TermRangeQuery() { } String TermRangeQuery::getField() { return field; } String TermRangeQuery::getLowerTerm() { return VariantUtils::get(lowerTerm); } String TermRangeQuery::getUpperTerm() { return VariantUtils::get(upperTerm); } bool TermRangeQuery::includesLower() { return includeLower; } bool TermRangeQuery::includesUpper() { return includeUpper; } CollatorPtr TermRangeQuery::getCollator() { return collator; } FilteredTermEnumPtr TermRangeQuery::getEnum(const IndexReaderPtr& reader) { return newLucene(reader, field, lowerTerm, upperTerm, includeLower, includeUpper, collator); } LuceneObjectPtr TermRangeQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(field, lowerTerm, upperTerm, includeLower, includeUpper, collator)); TermRangeQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->lowerTerm = lowerTerm; cloneQuery->upperTerm = upperTerm; cloneQuery->collator = collator; cloneQuery->field = field; cloneQuery->includeLower = includeLower; cloneQuery->includeUpper = includeUpper; return cloneQuery; } String TermRangeQuery::toString(const String& field) { StringStream buffer; if (getField() != field) { buffer << getField() << L":"; } buffer << (includeLower ? L"[" : L"{"); if (VariantUtils::isNull(lowerTerm)) { buffer << L"*"; } else { buffer << lowerTerm; } buffer << L" TO "; if (VariantUtils::isNull(upperTerm)) { buffer << L"*"; } else { buffer << upperTerm; } buffer << (includeUpper ? L"]" : L"}"); buffer << boostString(); return buffer.str(); } bool TermRangeQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } if (!MultiTermQuery::equals(other)) { return false; } if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } TermRangeQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) { return false; } if (!collator) { if (otherQuery->collator) { return false; } } else if (!collator->equals(otherQuery->collator)) { return false; } if (field != otherQuery->field) { return false; } if (includeLower != otherQuery->includeLower) { return false; } if (includeUpper != otherQuery->includeUpper) { return false; } if (VariantUtils::isNull(lowerTerm)) { if (!VariantUtils::isNull(otherQuery->lowerTerm)) { return false; } } else if (!VariantUtils::equals(lowerTerm, otherQuery->lowerTerm)) { return false; } if (VariantUtils::isNull(upperTerm)) { if (!VariantUtils::isNull(otherQuery->upperTerm)) { return false; } } else if (!VariantUtils::equals(upperTerm, otherQuery->upperTerm)) { return false; } return true; } int32_t TermRangeQuery::hashCode() { int32_t prime = 31; int32_t result = MultiTermQuery::hashCode(); result = prime * result + (collator ? collator->hashCode() : 0); result = prime * result + (field.empty() ? 0 : StringUtils::hashCode(field)); result = prime * result + (includeLower ? 1231 : 1237); result = prime * result + (includeUpper ? 1231 : 1237); result = prime * result + (VariantUtils::isNull(lowerTerm) ? 0 : StringUtils::hashCode(VariantUtils::get(lowerTerm))); result = prime * result + (VariantUtils::isNull(upperTerm) ? 0 : StringUtils::hashCode(VariantUtils::get(upperTerm))); return result; } } LucenePlusPlus-rel_3.0.9/src/core/search/TermRangeTermEnum.cpp000066400000000000000000000066761456444476200244000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermRangeTermEnum.h" #include "IndexReader.h" #include "Term.h" #include "Collator.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { TermRangeTermEnum::TermRangeTermEnum(const IndexReaderPtr& reader, const String& field, StringValue lowerTermText, StringValue upperTermText, bool includeLower, bool includeUpper, const CollatorPtr& collator) { this->collator = collator; this->_endEnum = false; this->upperTermText = upperTermText; this->lowerTermText = lowerTermText; this->includeLower = includeLower; this->includeUpper = includeUpper; this->field = field; // do a little bit of normalization: open ended range queries should always be inclusive. if (VariantUtils::isNull(this->lowerTermText)) { this->includeLower = true; } if (VariantUtils::isNull(this->upperTermText)) { this->includeUpper = true; } String startTermText(collator ? L"" : VariantUtils::get(this->lowerTermText)); setEnum(reader->terms(newLucene(this->field, startTermText))); } TermRangeTermEnum::~TermRangeTermEnum() { } double TermRangeTermEnum::difference() { return 1.0; } bool TermRangeTermEnum::endEnum() { return _endEnum; } bool TermRangeTermEnum::termCompare(const TermPtr& term) { if (!collator) { // Use Unicode code point ordering bool checkLower = false; if (!includeLower) { // make adjustments to set to exclusive checkLower = true; } if (term && term->field() == field) { if (!checkLower || VariantUtils::isNull(lowerTermText) || term->text().compare(VariantUtils::get(lowerTermText)) > 0) { checkLower = false; if (!VariantUtils::isNull(upperTermText)) { int32_t compare = VariantUtils::get(upperTermText).compare(term->text()); // if beyond the upper term, or is exclusive and this is equal to the upper term, break out if (compare < 0 || (!includeUpper && compare == 0)) { _endEnum = true; return false; } } return true; } } else { // break _endEnum = true; return false; } return false; } else { if (term && term->field() == field) { if ((VariantUtils::isNull(lowerTermText) || (includeLower ? collator->compare(term->text(), VariantUtils::get(lowerTermText)) >= 0 : collator->compare(term->text(), VariantUtils::get(lowerTermText)) > 0)) && (VariantUtils::isNull(upperTermText) || (includeUpper ? collator->compare(term->text(), VariantUtils::get(upperTermText)) <= 0 : collator->compare(term->text(), VariantUtils::get(upperTermText)) < 0))) { return true; } return false; } _endEnum = true; return false; } } } LucenePlusPlus-rel_3.0.9/src/core/search/TermScorer.cpp000066400000000000000000000074711456444476200231160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermScorer.h" #include "TermDocs.h" #include "Similarity.h" #include "Weight.h" #include "Collector.h" namespace Lucene { const int32_t TermScorer::SCORE_CACHE_SIZE = 32; TermScorer::TermScorer(const WeightPtr& weight, const TermDocsPtr& td, const SimilarityPtr& similarity, ByteArray norms) : Scorer(similarity) { this->weight = weight; this->termDocs = td; this->__termDocs = this->termDocs.get(); this->norms = norms; this->weightValue = weight->getValue(); this->doc = -1; this->docs = Collection::newInstance(123); this->__docs = this->docs.get(); this->freqs = Collection::newInstance(128); this->__freqs = this->freqs.get(); this->pointer = 0; this->pointerMax = 0; this->scoreCache = Collection::newInstance(SCORE_CACHE_SIZE); for (int32_t i = 0; i < SCORE_CACHE_SIZE; ++i) { scoreCache[i] = similarity->tf(i) * weightValue; } } TermScorer::~TermScorer() { } inline const Collection& TermScorer::SIM_NORM_DECODER() { return Similarity::NORM_TABLE; } void TermScorer::score(const CollectorPtr& collector) { score(collector, INT_MAX, nextDoc()); } bool TermScorer::score(const CollectorPtr& collector, int32_t max, int32_t firstDocID) { // firstDocID is ignored since nextDoc() sets 'doc' auto* __collector = collector.get(); __collector->setScorer(shared_from_this()); while (doc < max) { // for docs in window __collector->collect(doc); if (++pointer >= pointerMax) { pointerMax = __termDocs->read(docs, freqs); // refill buffers if (pointerMax != 0) { pointer = 0; } else { __termDocs->close(); // close stream doc = INT_MAX; // set to sentinel value return false; } } doc = __docs->operator[](pointer); freq = __freqs->operator[](pointer); } return true; } int32_t TermScorer::docID() { return doc; } int32_t TermScorer::nextDoc() { ++pointer; if (pointer >= pointerMax) { pointerMax = __termDocs->read(docs, freqs); // refill buffer if (pointerMax != 0) { pointer = 0; } else { __termDocs->close(); // close stream doc = NO_MORE_DOCS; return doc; } } doc = __docs->operator[](pointer); freq = __freqs->operator[](pointer); return doc; } double TermScorer::score() { BOOST_ASSERT(doc != -1); double raw = freq < SCORE_CACHE_SIZE ? scoreCache[freq] : similarity->tf(freq) * weightValue; // compute tf(f) * weight return norms ? raw * SIM_NORM_DECODER()[norms[doc] & 0xff] : raw; // normalize for field } int32_t TermScorer::advance(int32_t target) { // first scan in cache for (++pointer; pointer < pointerMax; ++pointer) { if (__docs->operator[](pointer) >= target) { doc = __docs->operator[](pointer); freq = __freqs->operator[](pointer); return doc; } } // not found in cache, seek underlying stream bool result = __termDocs->skipTo(target); if (result) { pointerMax = 1; pointer = 0; doc = __termDocs->doc(); __docs->operator[](pointer) = doc; freq = __termDocs->freq(); __freqs->operator[](pointer) = freq; } else { doc = NO_MORE_DOCS; } return doc; } String TermScorer::toString() { return L"term scorer(" + weight->toString() + L")"; } } LucenePlusPlus-rel_3.0.9/src/core/search/TimeLimitingCollector.cpp000066400000000000000000000067001456444476200252650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TimeLimitingCollector.h" #include "_TimeLimitingCollector.h" #include "StringUtils.h" namespace Lucene { /// Default timer resolution. const int32_t TimeLimitingCollector::DEFAULT_RESOLUTION = 20; int64_t TimeLimitingCollector::resolution = TimeLimitingCollector::DEFAULT_RESOLUTION; TimeLimitingCollector::TimeLimitingCollector(const CollectorPtr& collector, int64_t timeAllowed) { this->DEFAULT_GREEDY = false; this->greedy = DEFAULT_GREEDY; this->collector = collector; this->t0 = TIMER_THREAD()->getMilliseconds(); this->timeout = t0 + timeAllowed; this->docBase = 0; } TimeLimitingCollector::~TimeLimitingCollector() { } TimerThreadPtr TimeLimitingCollector::TIMER_THREAD() { static TimerThreadPtr _TIMER_THREAD; LUCENE_RUN_ONCE( _TIMER_THREAD = newLucene(); CycleCheck::addStatic(_TIMER_THREAD); ); if (!_TIMER_THREAD->isAlive()) { _TIMER_THREAD->start(); // start single thread instance } return _TIMER_THREAD; } int64_t TimeLimitingCollector::getResolution() { return resolution; } void TimeLimitingCollector::setResolution(int64_t newResolution) { resolution = std::max(newResolution, (int64_t)5); // 5 milliseconds is about the minimum reasonable time for a wait call. } void TimeLimitingCollector::stopTimer() { if (TIMER_THREAD()->isAlive()) { TIMER_THREAD()->stopThread(); TIMER_THREAD()->join(); } } bool TimeLimitingCollector::isGreedy() { return greedy; } void TimeLimitingCollector::setGreedy(bool greedy) { this->greedy = greedy; } void TimeLimitingCollector::collect(int32_t doc) { int64_t time = TIMER_THREAD()->getMilliseconds(); if (timeout < time) { if (greedy) { collector->collect(doc); } boost::throw_exception(TimeExceededException(L"Elapsed time:" + StringUtils::toString(timeout - t0) + L" ms. " + L"Exceeded allowed search time:" + StringUtils::toString(time - t0) + L" ms. " + L"Last doc:" + StringUtils::toString(docBase + doc))); } collector->collect(doc); } void TimeLimitingCollector::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { collector->setNextReader(reader, docBase); this->docBase = docBase; } void TimeLimitingCollector::setScorer(const ScorerPtr& scorer) { collector->setScorer(scorer); } bool TimeLimitingCollector::acceptsDocsOutOfOrder() { return collector->acceptsDocsOutOfOrder(); } TimerThread::TimerThread() { time = 0; _stopThread = false; } TimerThread::~TimerThread() { } void TimerThread::start() { _stopThread = false; LuceneThread::start(); } void TimerThread::run() { while (!_stopThread) { int64_t resolution; { SyncLock syncLock(this); resolution = TimeLimitingCollector::resolution; time += resolution; } LuceneThread::threadSleep(resolution); } } int64_t TimerThread::getMilliseconds() { SyncLock syncLock(this); return time; } void TimerThread::stopThread() { _stopThread = true; } } LucenePlusPlus-rel_3.0.9/src/core/search/TopDocs.cpp000066400000000000000000000017011456444476200223720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TopDocs.h" namespace Lucene { TopDocs::TopDocs(int32_t totalHits, Collection scoreDocs) { this->totalHits = totalHits; this->scoreDocs = scoreDocs; this->maxScore = std::numeric_limits::quiet_NaN(); } TopDocs::TopDocs(int32_t totalHits, Collection scoreDocs, double maxScore) { this->totalHits = totalHits; this->scoreDocs = scoreDocs; this->maxScore = maxScore; } TopDocs::~TopDocs() { } double TopDocs::getMaxScore() { return maxScore; } void TopDocs::setMaxScore(double maxScore) { this->maxScore = maxScore; } } LucenePlusPlus-rel_3.0.9/src/core/search/TopDocsCollector.cpp000066400000000000000000000062661456444476200242540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TopDocsCollector.h" #include "TopDocs.h" #include "HitQueueBase.h" namespace Lucene { TopDocsCollector::TopDocsCollector(const HitQueueBasePtr& pq) { this->pq = pq; this->totalHits = 0; } TopDocsCollector::~TopDocsCollector() { } TopDocsPtr TopDocsCollector::EMPTY_TOPDOCS() { static TopDocsPtr _EMPTY_TOPDOCS; LUCENE_RUN_ONCE( _EMPTY_TOPDOCS = newLucene(0, Collection::newInstance(), std::numeric_limits::quiet_NaN()); CycleCheck::addStatic(_EMPTY_TOPDOCS); ); return _EMPTY_TOPDOCS; } void TopDocsCollector::populateResults(Collection results, int32_t howMany) { for (int32_t i = howMany - 1; i >= 0; --i) { results[i] = pq->pop(); } } TopDocsPtr TopDocsCollector::newTopDocs(Collection results, int32_t start) { return results ? newLucene(totalHits, results) : EMPTY_TOPDOCS(); } int32_t TopDocsCollector::getTotalHits() { return totalHits; } TopDocsPtr TopDocsCollector::topDocs() { // In case pq was populated with sentinel values, there might be less results than pq.size(). // Therefore return all results until either pq.size() or totalHits. return topDocs(0, totalHits < pq->size() ? totalHits : pq->size()); } TopDocsPtr TopDocsCollector::topDocs(int32_t start) { // In case pq was populated with sentinel values, there might be less results than pq.size(). // Therefore return all results until either pq.size() or totalHits. return topDocs(start, totalHits < pq->size() ? totalHits : pq->size()); } TopDocsPtr TopDocsCollector::topDocs(int32_t start, int32_t howMany) { // In case pq was populated with sentinel values, there might be less results than pq.size(). // Therefore return all results until either pq.size() or totalHits. int32_t size = totalHits < pq->size() ? totalHits : pq->size(); // Don't bother to throw an exception, just return an empty TopDocs in case the parameters are // invalid or out of range. if (start < 0 || start >= size || howMany <= 0) { return newTopDocs(Collection(), start); } // We know that start < pq.size, so just fix howMany. howMany = std::min(size - start, howMany); Collection results = Collection::newInstance(howMany); // pq's pop() returns the 'least' element in the queue, therefore need to discard the first ones, // until we reach the requested range. Note that this loop will usually not be executed, since the // common usage should be that the caller asks for the last howMany results. However it's needed // here for completeness. for (int32_t i = pq->size() - start - howMany; i > 0; --i) { pq->pop(); } // Get the requested results from pq. populateResults(results, howMany); return newTopDocs(results, start); } } LucenePlusPlus-rel_3.0.9/src/core/search/TopFieldCollector.cpp000066400000000000000000000742231456444476200244050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TopFieldCollector.h" #include "_TopFieldCollector.h" #include "FieldValueHitQueue.h" #include "FieldComparator.h" #include "FieldDoc.h" #include "Scorer.h" #include "Sort.h" #include "TopFieldDocs.h" namespace Lucene { TopFieldCollector::TopFieldCollector(const HitQueueBasePtr& pq, int32_t numHits, bool fillFields) : TopDocsCollector(pq) { this->numHits = numHits; this->fillFields = fillFields; this->maxScore = std::numeric_limits::quiet_NaN(); this->queueFull = false; this->docBase = 0; } TopFieldCollector::~TopFieldCollector() { } const Collection TopFieldCollector::EMPTY_SCOREDOCS() { static Collection _EMPTY_SCOREDOCS; LUCENE_RUN_ONCE( _EMPTY_SCOREDOCS = Collection::newInstance(); ); return _EMPTY_SCOREDOCS; } TopFieldCollectorPtr TopFieldCollector::create(const SortPtr& sort, int32_t numHits, bool fillFields, bool trackDocScores, bool trackMaxScore, bool docsScoredInOrder) { if (sort->fields.empty()) { boost::throw_exception(IllegalArgumentException(L"Sort must contain at least one field")); } FieldValueHitQueuePtr queue(FieldValueHitQueue::create(sort->fields, numHits)); if (queue->getComparators().size() == 1) { if (docsScoredInOrder) { if (trackMaxScore) { return newLucene(queue, numHits, fillFields); } else if (trackDocScores) { return newLucene(queue, numHits, fillFields); } else { return newLucene(queue, numHits, fillFields); } } else { if (trackMaxScore) { return newLucene(queue, numHits, fillFields); } else if (trackDocScores) { return newLucene(queue, numHits, fillFields); } else { return newLucene(queue, numHits, fillFields); } } } // multiple comparators if (docsScoredInOrder) { if (trackMaxScore) { return newLucene(queue, numHits, fillFields); } else if (trackDocScores) { return newLucene(queue, numHits, fillFields); } else { return newLucene(queue, numHits, fillFields); } } else { if (trackMaxScore) { return newLucene(queue, numHits, fillFields); } else if (trackDocScores) { return newLucene(queue, numHits, fillFields); } else { return newLucene(queue, numHits, fillFields); } } } void TopFieldCollector::add(int32_t slot, int32_t doc, double score) { bottom = boost::static_pointer_cast(pq->add(newLucene(slot, docBase + doc, score))); queueFull = (totalHits == numHits); } void TopFieldCollector::populateResults(Collection results, int32_t howMany) { if (fillFields) { FieldValueHitQueuePtr queue(boost::static_pointer_cast(pq)); for (int32_t i = howMany - 1; i >= 0; --i) { results[i] = queue->fillFields(boost::static_pointer_cast(queue->pop())); } } else { for (int32_t i = howMany - 1; i >= 0; --i) { FieldValueHitQueueEntryPtr entry(boost::static_pointer_cast(pq->pop())); results[i] = newLucene(entry->doc, entry->score); } } } TopDocsPtr TopFieldCollector::newTopDocs(Collection results, int32_t start) { if (!results) { results = EMPTY_SCOREDOCS(); // Set maxScore to NaN, in case this is a maxScore tracking collector maxScore = std::numeric_limits::quiet_NaN(); } // If this is a maxScoring tracking collector and there were no results return newLucene(totalHits, results, boost::static_pointer_cast(pq)->getFields(), maxScore); } bool TopFieldCollector::acceptsDocsOutOfOrder() { return false; } OneComparatorNonScoringCollector::OneComparatorNonScoringCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : TopFieldCollector(queue, numHits, fillFields) { } OneComparatorNonScoringCollector::~OneComparatorNonScoringCollector() { } void OneComparatorNonScoringCollector::initialize() { TopFieldCollector::initialize(); FieldValueHitQueuePtr queue(boost::static_pointer_cast(pq)); comparator = queue->getComparators()[0]; reverseMul = queue->getReverseMul()[0]; } void OneComparatorNonScoringCollector::updateBottom(int32_t doc) { // bottom.score is already set to NaN in add(). bottom->doc = docBase + doc; bottom = boost::static_pointer_cast(pq->updateTop()); } void OneComparatorNonScoringCollector::collect(int32_t doc) { ++totalHits; if (queueFull) { if ((reverseMul * comparator->compareBottom(doc)) <= 0) { // since docs are visited in doc Id order, if compare is 0, it means this document is largest // than anything else in the queue, and therefore not competitive. return; } // This hit is competitive - replace bottom element in queue and adjustTop comparator->copy(bottom->slot, doc); updateBottom(doc); comparator->setBottom(bottom->slot); } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue comparator->copy(slot, doc); add(slot, doc, std::numeric_limits::quiet_NaN()); if (queueFull) { comparator->setBottom(bottom->slot); } } } void OneComparatorNonScoringCollector::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { this->docBase = docBase; comparator->setNextReader(reader, docBase); } void OneComparatorNonScoringCollector::setScorer(const ScorerPtr& scorer) { comparator->setScorer(scorer); } OutOfOrderOneComparatorNonScoringCollector::OutOfOrderOneComparatorNonScoringCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : OneComparatorNonScoringCollector(queue, numHits, fillFields) { } OutOfOrderOneComparatorNonScoringCollector::~OutOfOrderOneComparatorNonScoringCollector() { } void OutOfOrderOneComparatorNonScoringCollector::collect(int32_t doc) { ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive int32_t cmp = reverseMul * comparator->compareBottom(doc); if (cmp < 0 || (cmp == 0 && doc + docBase > bottom->doc)) { return; } // This hit is competitive - replace bottom element in queue and adjustTop comparator->copy(bottom->slot, doc); updateBottom(doc); comparator->setBottom(bottom->slot); } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue comparator->copy(slot, doc); add(slot, doc, std::numeric_limits::quiet_NaN()); if (queueFull) { comparator->setBottom(bottom->slot); } } } bool OutOfOrderOneComparatorNonScoringCollector::acceptsDocsOutOfOrder() { return true; } OneComparatorScoringNoMaxScoreCollector::OneComparatorScoringNoMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : OneComparatorNonScoringCollector(queue, numHits, fillFields) { } OneComparatorScoringNoMaxScoreCollector::~OneComparatorScoringNoMaxScoreCollector() { } void OneComparatorScoringNoMaxScoreCollector::updateBottom(int32_t doc, double score) { bottom->doc = docBase + doc; bottom->score = score; bottom = boost::static_pointer_cast(pq->updateTop()); } void OneComparatorScoringNoMaxScoreCollector::collect(int32_t doc) { ++totalHits; if (queueFull) { if ((reverseMul * comparator->compareBottom(doc)) <= 0) { // since docs are visited in doc Id order, if compare is 0, it means this document is largest // than anything else in the queue, and therefore not competitive. return; } // Compute the score only if the hit is competitive. double score = scorer->score(); // This hit is competitive - replace bottom element in queue and adjustTop comparator->copy(bottom->slot, doc); updateBottom(doc, score); comparator->setBottom(bottom->slot); } else { // Compute the score only if the hit is competitive. double score = scorer->score(); // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue comparator->copy(slot, doc); add(slot, doc, score); if (queueFull) { comparator->setBottom(bottom->slot); } } } void OneComparatorScoringNoMaxScoreCollector::setScorer(const ScorerPtr& scorer) { this->scorer = scorer; comparator->setScorer(scorer); } OutOfOrderOneComparatorScoringNoMaxScoreCollector::OutOfOrderOneComparatorScoringNoMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : OneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields) { } OutOfOrderOneComparatorScoringNoMaxScoreCollector::~OutOfOrderOneComparatorScoringNoMaxScoreCollector() { } void OutOfOrderOneComparatorScoringNoMaxScoreCollector::collect(int32_t doc) { ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive int32_t cmp = reverseMul * comparator->compareBottom(doc); if (cmp < 0 || (cmp == 0 && doc + docBase > bottom->doc)) { return; } // Compute the score only if the hit is competitive. double score = scorer->score(); // This hit is competitive - replace bottom element in queue and adjustTop comparator->copy(bottom->slot, doc); updateBottom(doc, score); comparator->setBottom(bottom->slot); } else { // Compute the score only if the hit is competitive. double score = scorer->score(); // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue comparator->copy(slot, doc); add(slot, doc, score); if (queueFull) { comparator->setBottom(bottom->slot); } } } bool OutOfOrderOneComparatorScoringNoMaxScoreCollector::acceptsDocsOutOfOrder() { return true; } OneComparatorScoringMaxScoreCollector::OneComparatorScoringMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : OneComparatorNonScoringCollector(queue, numHits, fillFields) { // Must set maxScore to NEG_INF, or otherwise std::max always returns NaN. this->maxScore = -std::numeric_limits::infinity(); } OneComparatorScoringMaxScoreCollector::~OneComparatorScoringMaxScoreCollector() { } void OneComparatorScoringMaxScoreCollector::updateBottom(int32_t doc, double score) { bottom->doc = docBase + doc; bottom->score = score; bottom = boost::static_pointer_cast(pq->updateTop()); } void OneComparatorScoringMaxScoreCollector::collect(int32_t doc) { double score = scorer->score(); if (score > maxScore) { maxScore = score; } ++totalHits; if (queueFull) { if ((reverseMul * comparator->compareBottom(doc)) <= 0) { // since docs are visited in doc Id order, if compare is 0, it means this document is largest // than anything else in the queue, and therefore not competitive. return; } // This hit is competitive - replace bottom element in queue and adjustTop comparator->copy(bottom->slot, doc); updateBottom(doc, score); comparator->setBottom(bottom->slot); } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue comparator->copy(slot, doc); add(slot, doc, score); if (queueFull) { comparator->setBottom(bottom->slot); } } } void OneComparatorScoringMaxScoreCollector::setScorer(const ScorerPtr& scorer) { this->scorer = scorer; OneComparatorNonScoringCollector::setScorer(scorer); } OutOfOrderOneComparatorScoringMaxScoreCollector::OutOfOrderOneComparatorScoringMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : OneComparatorScoringMaxScoreCollector(queue, numHits, fillFields) { } OutOfOrderOneComparatorScoringMaxScoreCollector::~OutOfOrderOneComparatorScoringMaxScoreCollector() { } void OutOfOrderOneComparatorScoringMaxScoreCollector::collect(int32_t doc) { double score = scorer->score(); if (score > maxScore) { maxScore = score; } ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive int32_t cmp = reverseMul * comparator->compareBottom(doc); if (cmp < 0 || (cmp == 0 && doc + docBase > bottom->doc)) { return; } // This hit is competitive - replace bottom element in queue and adjustTop comparator->copy(bottom->slot, doc); updateBottom(doc, score); comparator->setBottom(bottom->slot); } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue comparator->copy(slot, doc); add(slot, doc, score); if (queueFull) { comparator->setBottom(bottom->slot); } } } bool OutOfOrderOneComparatorScoringMaxScoreCollector::acceptsDocsOutOfOrder() { return true; } MultiComparatorNonScoringCollector::MultiComparatorNonScoringCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : TopFieldCollector(queue, numHits, fillFields) { } MultiComparatorNonScoringCollector::~MultiComparatorNonScoringCollector() { } void MultiComparatorNonScoringCollector::initialize() { TopFieldCollector::initialize(); FieldValueHitQueuePtr queue(boost::static_pointer_cast(pq)); comparators = queue->getComparators(); reverseMul = queue->getReverseMul(); } void MultiComparatorNonScoringCollector::updateBottom(int32_t doc) { // bottom.score is already set to NaN in add(). bottom->doc = docBase + doc; bottom = boost::static_pointer_cast(pq->updateTop()); } void MultiComparatorNonScoringCollector::collect(int32_t doc) { ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive for (int32_t i = 0; ; ++i) { int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); if (c < 0) { // Definitely not competitive. return; } else if (c > 0) { // Definitely competitive. break; } else if (i == comparators.size() - 1) { // Here c=0. If we're at the last comparator, this doc is not competitive, since docs are // visited in doc Id order, which means this doc cannot compete with any other document // in the queue. return; } } // This hit is competitive - replace bottom element in queue and adjustTop for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->copy(bottom->slot, doc); } updateBottom(doc); for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->setBottom(bottom->slot); } } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->copy(slot, doc); } add(slot, doc, std::numeric_limits::quiet_NaN()); if (queueFull) { for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->setBottom(bottom->slot); } } } } void MultiComparatorNonScoringCollector::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { this->docBase = docBase; for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->setNextReader(reader, docBase); } } void MultiComparatorNonScoringCollector::setScorer(const ScorerPtr& scorer) { // set the scorer on all comparators for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->setScorer(scorer); } } OutOfOrderMultiComparatorNonScoringCollector::OutOfOrderMultiComparatorNonScoringCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : MultiComparatorNonScoringCollector(queue, numHits, fillFields) { } OutOfOrderMultiComparatorNonScoringCollector::~OutOfOrderMultiComparatorNonScoringCollector() { } void OutOfOrderMultiComparatorNonScoringCollector::collect(int32_t doc) { ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive for (int32_t i = 0; ; ++i) { int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); if (c < 0) { // Definitely not competitive. return; } else if (c > 0) { // Definitely competitive. break; } else if (i == comparators.size() - 1) { // This is the equals case. if (doc + docBase > bottom->doc) { // Definitely not competitive return; } break; } } // This hit is competitive - replace bottom element in queue and adjustTop for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->copy(bottom->slot, doc); } updateBottom(doc); for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->setBottom(bottom->slot); } } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->copy(slot, doc); } add(slot, doc, std::numeric_limits::quiet_NaN()); if (queueFull) { for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->setBottom(bottom->slot); } } } } bool OutOfOrderMultiComparatorNonScoringCollector::acceptsDocsOutOfOrder() { return true; } MultiComparatorScoringMaxScoreCollector::MultiComparatorScoringMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : MultiComparatorNonScoringCollector(queue, numHits, fillFields) { // Must set maxScore to NEG_INF, or otherwise std::max always returns NaN. this->maxScore = -std::numeric_limits::infinity(); } MultiComparatorScoringMaxScoreCollector::~MultiComparatorScoringMaxScoreCollector() { } void MultiComparatorScoringMaxScoreCollector::updateBottom(int32_t doc, double score) { bottom->doc = docBase + doc; bottom->score = score; bottom = boost::static_pointer_cast(pq->updateTop()); } void MultiComparatorScoringMaxScoreCollector::collect(int32_t doc) { double score = ScorerPtr(_scorer)->score(); if (score > maxScore) { maxScore = score; } ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive for (int32_t i = 0; ; ++i) { int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); if (c < 0) { // Definitely not competitive. return; } else if (c > 0) { // Definitely competitive. break; } else if (i == comparators.size() - 1) { // Here c=0. If we're at the last comparator, this doc is not competitive, since docs are // visited in doc Id order, which means this doc cannot compete with any other document // in the queue. return; } } // This hit is competitive - replace bottom element in queue and adjustTop for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->copy(bottom->slot, doc); } updateBottom(doc, score); for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->setBottom(bottom->slot); } } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->copy(slot, doc); } add(slot, doc, score); if (queueFull) { for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->setBottom(bottom->slot); } } } } void MultiComparatorScoringMaxScoreCollector::setScorer(const ScorerPtr& scorer) { this->_scorer = scorer; MultiComparatorNonScoringCollector::setScorer(scorer); } OutOfOrderMultiComparatorScoringMaxScoreCollector::OutOfOrderMultiComparatorScoringMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : MultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields) { } OutOfOrderMultiComparatorScoringMaxScoreCollector::~OutOfOrderMultiComparatorScoringMaxScoreCollector() { } void OutOfOrderMultiComparatorScoringMaxScoreCollector::collect(int32_t doc) { double score = ScorerPtr(_scorer)->score(); if (score > maxScore) { maxScore = score; } ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive for (int32_t i = 0; ; ++i) { int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); if (c < 0) { // Definitely not competitive. return; } else if (c > 0) { // Definitely competitive. break; } else if (i == comparators.size() - 1) { // This is the equals case. if (doc + docBase > bottom->doc) { // Definitely not competitive return; } break; } } // This hit is competitive - replace bottom element in queue and adjustTop for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->copy(bottom->slot, doc); } updateBottom(doc, score); for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->setBottom(bottom->slot); } } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->copy(slot, doc); } add(slot, doc, score); if (queueFull) { for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->setBottom(bottom->slot); } } } } bool OutOfOrderMultiComparatorScoringMaxScoreCollector::acceptsDocsOutOfOrder() { return true; } MultiComparatorScoringNoMaxScoreCollector::MultiComparatorScoringNoMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : MultiComparatorNonScoringCollector(queue, numHits, fillFields) { } MultiComparatorScoringNoMaxScoreCollector::~MultiComparatorScoringNoMaxScoreCollector() { } void MultiComparatorScoringNoMaxScoreCollector::updateBottom(int32_t doc, double score) { bottom->doc = docBase + doc; bottom->score = score; bottom = boost::static_pointer_cast(pq->updateTop()); } void MultiComparatorScoringNoMaxScoreCollector::collect(int32_t doc) { ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive for (int32_t i = 0; ; ++i) { int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); if (c < 0) { // Definitely not competitive. return; } else if (c > 0) { // Definitely competitive. break; } else if (i == comparators.size() - 1) { // Here c=0. If we're at the last comparator, this doc is not competitive, since docs are // visited in doc Id order, which means this doc cannot compete with any other document // in the queue. return; } } // This hit is competitive - replace bottom element in queue and adjustTop for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->copy(bottom->slot, doc); } // Compute score only if it is competitive. double score = ScorerPtr(_scorer)->score(); updateBottom(doc, score); for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->setBottom(bottom->slot); } } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->copy(slot, doc); } // Compute score only if it is competitive. double score = ScorerPtr(_scorer)->score(); add(slot, doc, score); if (queueFull) { for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->setBottom(bottom->slot); } } } } void MultiComparatorScoringNoMaxScoreCollector::setScorer(const ScorerPtr& scorer) { this->_scorer = scorer; MultiComparatorNonScoringCollector::setScorer(scorer); } OutOfOrderMultiComparatorScoringNoMaxScoreCollector::OutOfOrderMultiComparatorScoringNoMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : MultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields) { } OutOfOrderMultiComparatorScoringNoMaxScoreCollector::~OutOfOrderMultiComparatorScoringNoMaxScoreCollector() { } void OutOfOrderMultiComparatorScoringNoMaxScoreCollector::collect(int32_t doc) { ++totalHits; if (queueFull) { // Fastmatch: return if this hit is not competitive for (int32_t i = 0; ; ++i) { int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); if (c < 0) { // Definitely not competitive. return; } else if (c > 0) { // Definitely competitive. break; } else if (i == comparators.size() - 1) { // This is the equals case. if (doc + docBase > bottom->doc) { // Definitely not competitive return; } break; } } // This hit is competitive - replace bottom element in queue and adjustTop for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->copy(bottom->slot, doc); } // Compute score only if it is competitive. double score = ScorerPtr(_scorer)->score(); updateBottom(doc, score); for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->setBottom(bottom->slot); } } else { // Startup transient: queue hasn't gathered numHits yet int32_t slot = totalHits - 1; // Copy hit into queue for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->copy(slot, doc); } // Compute score only if it is competitive. double score = ScorerPtr(_scorer)->score(); add(slot, doc, score); if (queueFull) { for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { (*cmp)->setBottom(bottom->slot); } } } } void OutOfOrderMultiComparatorScoringNoMaxScoreCollector::setScorer(const ScorerPtr& scorer) { this->_scorer = scorer; MultiComparatorScoringNoMaxScoreCollector::setScorer(scorer); } bool OutOfOrderMultiComparatorScoringNoMaxScoreCollector::acceptsDocsOutOfOrder() { return true; } } LucenePlusPlus-rel_3.0.9/src/core/search/TopFieldDocs.cpp000066400000000000000000000012131456444476200233340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TopFieldDocs.h" namespace Lucene { TopFieldDocs::TopFieldDocs(int32_t totalHits, Collection scoreDocs, Collection fields, double maxScore) : TopDocs(totalHits, scoreDocs, maxScore) { this->fields = fields; } TopFieldDocs::~TopFieldDocs() { } } LucenePlusPlus-rel_3.0.9/src/core/search/TopScoreDocCollector.cpp000066400000000000000000000073071456444476200250620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TopScoreDocCollector.h" #include "_TopScoreDocCollector.h" #include "HitQueue.h" #include "ScoreDoc.h" #include "Scorer.h" #include "TopDocs.h" #include "MiscUtils.h" namespace Lucene { TopScoreDocCollector::TopScoreDocCollector(int32_t numHits) : TopDocsCollector(newLucene(numHits, true)) { // HitQueue implements getSentinelObject to return a ScoreDoc, so we know that at this point top() // is already initialized. pqTop = pq->top(); docBase = 0; } TopScoreDocCollector::~TopScoreDocCollector() { } TopScoreDocCollectorPtr TopScoreDocCollector::create(int32_t numHits, bool docsScoredInOrder) { if (docsScoredInOrder) { return newLucene(numHits); } else { return newLucene(numHits); } } TopDocsPtr TopScoreDocCollector::newTopDocs(Collection results, int32_t start) { if (!results) { return EMPTY_TOPDOCS(); } // We need to compute maxScore in order to set it in TopDocs. If start == 0, it means the largest element // is already in results, use its score as maxScore. Otherwise pop everything else, until the largest // element is extracted and use its score as maxScore. double maxScore = std::numeric_limits::quiet_NaN(); if (start == 0) { maxScore = results[0]->score; } else { for (int32_t i = pq->size(); i > 1; --i) { pq->pop(); } maxScore = pq->pop()->score; } return newLucene(totalHits, results, maxScore); } void TopScoreDocCollector::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { this->docBase = docBase; } void TopScoreDocCollector::setScorer(const ScorerPtr& scorer) { this->_scorer = scorer; this->__scorer = scorer.get(); } InOrderTopScoreDocCollector::InOrderTopScoreDocCollector(int32_t numHits) : TopScoreDocCollector(numHits) { } InOrderTopScoreDocCollector::~InOrderTopScoreDocCollector() { } void InOrderTopScoreDocCollector::collect(int32_t doc) { double score = __scorer->score(); // This collector cannot handle these scores BOOST_ASSERT(score != -std::numeric_limits::infinity()); BOOST_ASSERT(!MiscUtils::isNaN(score)); ++totalHits; if (score <= pqTop->score) { // Since docs are returned in-order (ie., increasing doc Id), a document with equal score to // pqTop.score cannot compete since HitQueue favours documents with lower doc Ids. Therefore // reject those docs too. return; } pqTop->doc = doc + docBase; pqTop->score = score; pqTop = pq->updateTop(); } bool InOrderTopScoreDocCollector::acceptsDocsOutOfOrder() { return false; } OutOfOrderTopScoreDocCollector::OutOfOrderTopScoreDocCollector(int32_t numHits) : TopScoreDocCollector(numHits) { } OutOfOrderTopScoreDocCollector::~OutOfOrderTopScoreDocCollector() { } void OutOfOrderTopScoreDocCollector::collect(int32_t doc) { double score = __scorer->score(); // This collector cannot handle NaN BOOST_ASSERT(!MiscUtils::isNaN(score)); ++totalHits; doc += docBase; if (score < pqTop->score || (score == pqTop->score && doc > pqTop->doc)) { return; } pqTop->doc = doc; pqTop->score = score; pqTop = pq->updateTop(); } bool OutOfOrderTopScoreDocCollector::acceptsDocsOutOfOrder() { return true; } } LucenePlusPlus-rel_3.0.9/src/core/search/Weight.cpp000066400000000000000000000007471456444476200222570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Weight.h" namespace Lucene { Weight::~Weight() { } bool Weight::scoresDocsOutOfOrder() { return false; } } LucenePlusPlus-rel_3.0.9/src/core/search/WildcardQuery.cpp000066400000000000000000000061541456444476200236050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "WildcardQuery.h" #include "WildcardTermEnum.h" #include "Term.h" #include "PrefixQuery.h" #include "SingleTermEnum.h" #include "MiscUtils.h" namespace Lucene { WildcardQuery::WildcardQuery(const TermPtr& term) { this->term = term; String text(term->text()); this->termContainsWildcard = boost::contains(text, L"*") || boost::contains(text, L"?"); this->termIsPrefix = termContainsWildcard && !boost::contains(text, L"?") && text.find_first_of(L"*") == text.length() - 1; } WildcardQuery::~WildcardQuery() { } FilteredTermEnumPtr WildcardQuery::getEnum(const IndexReaderPtr& reader) { if (termContainsWildcard) { return newLucene(reader, getTerm()); } else { return newLucene(reader, getTerm()); } } TermPtr WildcardQuery::getTerm() { return term; } QueryPtr WildcardQuery::rewrite(const IndexReaderPtr& reader) { if (termIsPrefix) { MultiTermQueryPtr rewritten(newLucene(term->createTerm(term->text().substr(0, term->text().find('*'))))); rewritten->setBoost(getBoost()); rewritten->setRewriteMethod(getRewriteMethod()); return rewritten; } else { return MultiTermQuery::rewrite(reader); } } String WildcardQuery::toString(const String& field) { StringStream buffer; if (term->field() != field) { buffer << term->field() << L":"; } buffer << term->text() << boostString(); return buffer.str(); } LuceneObjectPtr WildcardQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(term)); WildcardQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->termContainsWildcard = termContainsWildcard; cloneQuery->termIsPrefix = termIsPrefix; cloneQuery->term = term; return cloneQuery; } int32_t WildcardQuery::hashCode() { int32_t prime = 31; int32_t result = MultiTermQuery::hashCode(); result = prime * result + (term ? term->hashCode() : 0); return result; } bool WildcardQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } if (!MultiTermQuery::equals(other)) { return false; } if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } WildcardQueryPtr otherWildcardQuery(boost::dynamic_pointer_cast(other)); if (!otherWildcardQuery) { return false; } if (!term) { if (otherWildcardQuery->term) { return false; } } else if (!term->equals(otherWildcardQuery->term)) { return false; } return true; } } LucenePlusPlus-rel_3.0.9/src/core/search/WildcardTermEnum.cpp000066400000000000000000000104451456444476200242320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "WildcardTermEnum.h" #include "Term.h" #include "IndexReader.h" namespace Lucene { const wchar_t WildcardTermEnum::WILDCARD_STRING = L'*'; const wchar_t WildcardTermEnum::WILDCARD_CHAR = L'?'; WildcardTermEnum::WildcardTermEnum(const IndexReaderPtr& reader, const TermPtr& term) { _endEnum = false; searchTerm = term; field = searchTerm->field(); String searchTermText(searchTerm->text()); String::size_type sidx = searchTermText.find(WILDCARD_STRING); String::size_type cidx = searchTermText.find(WILDCARD_CHAR); String::size_type idx = sidx; if (idx == String::npos) { idx = cidx; } else if (cidx != String::npos) { idx = std::min(idx, cidx); } pre = idx != String::npos ? searchTerm->text().substr(0, idx) : L""; preLen = pre.length(); text = searchTermText.substr(preLen); setEnum(reader->terms(newLucene(searchTerm->field(), pre))); } WildcardTermEnum::~WildcardTermEnum() { } bool WildcardTermEnum::termCompare(const TermPtr& term) { if (field == term->field()) { String searchText(term->text()); if (boost::starts_with(searchText, pre)) { return wildcardEquals(text, 0, searchText, preLen); } } _endEnum = true; return false; } double WildcardTermEnum::difference() { return 1.0; } bool WildcardTermEnum::endEnum() { return _endEnum; } bool WildcardTermEnum::wildcardEquals(const String& pattern, int32_t patternIdx, const String& string, int32_t stringIdx) { int32_t p = patternIdx; for (int32_t s = stringIdx; ; ++p, ++s) { // End of string yet? bool sEnd = (s >= (int32_t)string.length()); // End of pattern yet? bool pEnd = (p >= (int32_t)pattern.length()); // If we're looking at the end of the string if (sEnd) { // Assume the only thing left on the pattern is/are wildcards bool justWildcardsLeft = true; // Current wildcard position int32_t wildcardSearchPos = p; // While we haven't found the end of the pattern, and haven't encountered any non-wildcard characters while (wildcardSearchPos < (int32_t)pattern.length() && justWildcardsLeft) { // Check the character at the current position wchar_t wildchar = pattern[wildcardSearchPos]; // If it's not a wildcard character, then there is more pattern information after this/these wildcards. if (wildchar != WILDCARD_CHAR && wildchar != WILDCARD_STRING) { justWildcardsLeft = false; } else { // to prevent "cat" matches "ca??" if (wildchar == WILDCARD_CHAR) { return false; } // Look at the next character ++wildcardSearchPos; } } // This was a prefix wildcard search, and we've matched, so return true. if (justWildcardsLeft) { return true; } } // If we've gone past the end of the string, or the pattern, return false. if (sEnd || pEnd) { break; } // Match a single character, so continue. if (pattern[p] == WILDCARD_CHAR) { continue; } if (pattern[p] == WILDCARD_STRING) { // Look at the character beyond the '*' characters. while (p < (int32_t)pattern.length() && pattern[p] == WILDCARD_STRING) { ++p; } // Examine the string, starting at the last character. for (int32_t i = string.length(); i >= s; --i) { if (wildcardEquals(pattern, p, string, i)) { return true; } } break; } if (pattern[p] != string[s]) { break; } } return false; } } LucenePlusPlus-rel_3.0.9/src/core/search/function/000077500000000000000000000000001456444476200221415ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/search/function/ByteFieldSource.cpp000066400000000000000000000045521456444476200257030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ByteFieldSource.h" #include "_ByteFieldSource.h" #include "FieldCache.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { ByteFieldSource::ByteFieldSource(const String& field, const ByteParserPtr& parser) : FieldCacheSource(field) { this->parser = parser; } ByteFieldSource::~ByteFieldSource() { } String ByteFieldSource::description() { return L"byte(" + FieldCacheSource::description() + L")"; } DocValuesPtr ByteFieldSource::getCachedFieldValues(const FieldCachePtr& cache, const String& field, const IndexReaderPtr& reader) { Collection arr(cache->getBytes(reader, field, parser)); return newLucene(shared_from_this(), arr); } bool ByteFieldSource::cachedFieldSourceEquals(const FieldCacheSourcePtr& other) { if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } ByteFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); if (!otherSource) { return false; } return parser ? MiscUtils::equalTypes(parser, otherSource->parser) : !otherSource->parser; } int32_t ByteFieldSource::cachedFieldSourceHashCode() { return StringUtils::hashCode(parser ? ByteParser::_getClassName() : ByteFieldSource::_getClassName()); } ByteDocValues::ByteDocValues(const ByteFieldSourcePtr& source, Collection arr) { this->_source = source; this->arr = arr; } ByteDocValues::~ByteDocValues() { } double ByteDocValues::doubleVal(int32_t doc) { if (doc < 0 || doc >= arr.size()) { boost::throw_exception(IndexOutOfBoundsException()); } return (double)arr[doc]; } int32_t ByteDocValues::intVal(int32_t doc) { if (doc < 0 || doc >= arr.size()) { boost::throw_exception(IndexOutOfBoundsException()); } return (int32_t)arr[doc]; } String ByteDocValues::toString(int32_t doc) { return ByteFieldSourcePtr(_source)->description() + L"=" + StringUtils::toString(intVal(doc)); } CollectionValue ByteDocValues::getInnerArray() { return arr; } } LucenePlusPlus-rel_3.0.9/src/core/search/function/CustomScoreProvider.cpp000066400000000000000000000047451456444476200266400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CustomScoreProvider.h" #include "Explanation.h" namespace Lucene { CustomScoreProvider::CustomScoreProvider(const IndexReaderPtr& reader) { this->reader = reader; } CustomScoreProvider::~CustomScoreProvider() { } double CustomScoreProvider::customScore(int32_t doc, double subQueryScore, Collection valSrcScores) { if (valSrcScores.size() == 1) { return customScore(doc, subQueryScore, valSrcScores[0]); } if (valSrcScores.empty()) { return customScore(doc, subQueryScore, 1); } double score = subQueryScore; for (Collection::iterator srcScore = valSrcScores.begin(); srcScore != valSrcScores.end(); ++srcScore) { score *= *srcScore; } return score; } double CustomScoreProvider::customScore(int32_t doc, double subQueryScore, double valSrcScore) { return subQueryScore * valSrcScore; } ExplanationPtr CustomScoreProvider::customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, Collection valSrcExpls) { if (valSrcExpls.size() == 1) { return customExplain(doc, subQueryExpl, valSrcExpls[0]); } if (valSrcExpls.empty()) { return subQueryExpl; } double valSrcScore = 1; for (Collection::iterator srcExpl = valSrcExpls.begin(); srcExpl != valSrcExpls.end(); ++srcExpl) { valSrcScore *= (*srcExpl)->getValue(); } ExplanationPtr exp(newLucene(valSrcScore * subQueryExpl->getValue(), L"custom score: product of:")); exp->addDetail(subQueryExpl); for (Collection::iterator srcExpl = valSrcExpls.begin(); srcExpl != valSrcExpls.end(); ++srcExpl) { exp->addDetail(*srcExpl); } return exp; } ExplanationPtr CustomScoreProvider::customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, const ExplanationPtr& valSrcExpl) { double valSrcScore = 1; if (valSrcExpl) { valSrcScore *= valSrcExpl->getValue(); } ExplanationPtr exp(newLucene(valSrcScore * subQueryExpl->getValue(), L"custom score: product of:")); exp->addDetail(subQueryExpl); exp->addDetail(valSrcExpl); return exp; } } LucenePlusPlus-rel_3.0.9/src/core/search/function/CustomScoreQuery.cpp000066400000000000000000000316731456444476200261530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CustomScoreQuery.h" #include "_CustomScoreQuery.h" #include "ValueSourceQuery.h" #include "ComplexExplanation.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { CustomScoreQuery::CustomScoreQuery(const QueryPtr& subQuery) { ConstructQuery(subQuery, Collection::newInstance()); } CustomScoreQuery::CustomScoreQuery(const QueryPtr& subQuery, const ValueSourceQueryPtr& valSrcQuery) { Collection valSrcQueries(Collection::newInstance()); if (valSrcQuery) { valSrcQueries.add(valSrcQuery); } ConstructQuery(subQuery, valSrcQueries); } CustomScoreQuery::CustomScoreQuery(const QueryPtr& subQuery, Collection valSrcQueries) { ConstructQuery(subQuery, valSrcQueries); } CustomScoreQuery::~CustomScoreQuery() { } void CustomScoreQuery::ConstructQuery(const QueryPtr& subQuery, Collection valSrcQueries) { this->strict = false; this->subQuery = subQuery; this->valSrcQueries = valSrcQueries ? valSrcQueries : Collection::newInstance(); if (!subQuery) { boost::throw_exception(IllegalArgumentException(L" must not be null!")); } } QueryPtr CustomScoreQuery::rewrite(const IndexReaderPtr& reader) { CustomScoreQueryPtr cloneQuery; QueryPtr sq = subQuery->rewrite(reader); if (sq != subQuery) { cloneQuery = boost::static_pointer_cast(clone()); cloneQuery->subQuery = sq; } for (int32_t i = 0; i < valSrcQueries.size(); ++i) { ValueSourceQueryPtr v = boost::dynamic_pointer_cast(valSrcQueries[i]->rewrite(reader)); if (v != valSrcQueries[i]) { if (!cloneQuery) { cloneQuery = boost::static_pointer_cast(clone()); } cloneQuery->valSrcQueries[i] = v; } } return cloneQuery ? cloneQuery : shared_from_this(); } void CustomScoreQuery::extractTerms(SetTerm terms) { subQuery->extractTerms(terms); for (Collection::iterator srcQuery = valSrcQueries.begin(); srcQuery != valSrcQueries.end(); ++srcQuery) { (*srcQuery)->extractTerms(terms); } } LuceneObjectPtr CustomScoreQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = Query::clone(other ? other : newLucene(subQuery)); CustomScoreQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->strict = strict; cloneQuery->subQuery = boost::dynamic_pointer_cast(subQuery->clone()); cloneQuery->valSrcQueries = Collection::newInstance(valSrcQueries.size()); for (int32_t i = 0; i < valSrcQueries.size(); ++i) { cloneQuery->valSrcQueries[i] = boost::dynamic_pointer_cast(valSrcQueries[i]->clone()); } return cloneQuery; } String CustomScoreQuery::toString(const String& field) { StringStream buffer; buffer << name() << L"(" << subQuery->toString(field); for (Collection::iterator srcQuery = valSrcQueries.begin(); srcQuery != valSrcQueries.end(); ++srcQuery) { buffer << L", " << (*srcQuery)->toString(field); } buffer << L")" << (strict ? L" STRICT" : L"") << boostString(); return buffer.str(); } bool CustomScoreQuery::equals(const LuceneObjectPtr& other) { CustomScoreQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) { return false; } if (getBoost() != otherQuery->getBoost() || !subQuery->equals(otherQuery->subQuery) || strict != otherQuery->strict) { return false; } return valSrcQueries.equals(otherQuery->valSrcQueries, luceneEquals()); } int32_t CustomScoreQuery::hashCode() { return (StringUtils::hashCode(CustomScoreQuery::_getClassName()) + StringUtils::hashCode(Query::_getClassName()) + MiscUtils::hashCode(valSrcQueries.begin(), valSrcQueries.end(), MiscUtils::hashLucene)) ^ MiscUtils::doubleToIntBits(getBoost()) ^ (strict ? 1234 : 4321); } CustomScoreProviderPtr CustomScoreQuery::getCustomScoreProvider(const IndexReaderPtr& reader) { // when deprecated methods are removed, do not extend class here, just return new default CustomScoreProvider return newLucene(shared_from_this(), reader); } double CustomScoreQuery::customScore(int32_t doc, double subQueryScore, Collection valSrcScores) { if (valSrcScores.size() == 1) { return customScore(doc, subQueryScore, valSrcScores[0]); } if (valSrcScores.empty()) { return customScore(doc, subQueryScore, 1); } double score = subQueryScore; for (Collection::iterator srcScore = valSrcScores.begin(); srcScore != valSrcScores.end(); ++srcScore) { score *= *srcScore; } return score; } double CustomScoreQuery::customScore(int32_t doc, double subQueryScore, double valSrcScore) { return subQueryScore * valSrcScore; } ExplanationPtr CustomScoreQuery::customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, Collection valSrcExpls) { if (valSrcExpls.size() == 1) { return customExplain(doc, subQueryExpl, valSrcExpls[0]); } if (valSrcExpls.empty()) { return subQueryExpl; } double valSrcScore = 1; for (Collection::iterator srcExpl = valSrcExpls.begin(); srcExpl != valSrcExpls.end(); ++srcExpl) { valSrcScore *= (*srcExpl)->getValue(); } ExplanationPtr exp(newLucene(valSrcScore * subQueryExpl->getValue(), L"custom score: product of:")); exp->addDetail(subQueryExpl); for (Collection::iterator srcExpl = valSrcExpls.begin(); srcExpl != valSrcExpls.end(); ++srcExpl) { exp->addDetail(*srcExpl); } return exp; } ExplanationPtr CustomScoreQuery::customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, const ExplanationPtr& valSrcExpl) { double valSrcScore = 1; if (valSrcExpl) { valSrcScore *= valSrcExpl->getValue(); } ExplanationPtr exp(newLucene(valSrcScore * subQueryExpl->getValue(), L"custom score: product of:")); exp->addDetail(subQueryExpl); exp->addDetail(valSrcExpl); return exp; } WeightPtr CustomScoreQuery::createWeight(const SearcherPtr& searcher) { return newLucene(shared_from_this(), searcher); } bool CustomScoreQuery::isStrict() { return strict; } void CustomScoreQuery::setStrict(bool strict) { this->strict = strict; } String CustomScoreQuery::name() { return L"custom"; } DefaultCustomScoreProvider::DefaultCustomScoreProvider(const CustomScoreQueryPtr& customQuery, const IndexReaderPtr& reader) : CustomScoreProvider(reader) { _customQuery = customQuery; } DefaultCustomScoreProvider::~DefaultCustomScoreProvider() { } double DefaultCustomScoreProvider::customScore(int32_t doc, double subQueryScore, Collection valSrcScores) { return CustomScoreQueryPtr(_customQuery)->customScore(doc, subQueryScore, valSrcScores); } double DefaultCustomScoreProvider::customScore(int32_t doc, double subQueryScore, double valSrcScore) { return CustomScoreQueryPtr(_customQuery)->customScore(doc, subQueryScore, valSrcScore); } ExplanationPtr DefaultCustomScoreProvider::customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, Collection valSrcExpls) { return CustomScoreQueryPtr(_customQuery)->customExplain(doc, subQueryExpl, valSrcExpls); } ExplanationPtr DefaultCustomScoreProvider::customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, const ExplanationPtr& valSrcExpl) { return CustomScoreQueryPtr(_customQuery)->customExplain(doc, subQueryExpl, valSrcExpl); } CustomWeight::CustomWeight(const CustomScoreQueryPtr& query, const SearcherPtr& searcher) { this->query = query; this->similarity = query->getSimilarity(searcher); this->subQueryWeight = query->subQuery->weight(searcher); this->valSrcWeights = Collection::newInstance(query->valSrcQueries.size()); for (int32_t i = 0; i < query->valSrcQueries.size(); ++i) { this->valSrcWeights[i] = query->valSrcQueries[i]->createWeight(searcher); } this->qStrict = query->strict; } CustomWeight::~CustomWeight() { } QueryPtr CustomWeight::getQuery() { return query; } double CustomWeight::getValue() { return query->getBoost(); } double CustomWeight::sumOfSquaredWeights() { double sum = subQueryWeight->sumOfSquaredWeights(); for (int32_t i = 0; i < valSrcWeights.size(); ++i) { if (qStrict) { valSrcWeights[i]->sumOfSquaredWeights(); // do not include ValueSource part in the query normalization } else { sum += valSrcWeights[i]->sumOfSquaredWeights(); } } sum *= query->getBoost() * query->getBoost(); // boost each sub-weight return sum; } void CustomWeight::normalize(double norm) { norm *= query->getBoost(); // incorporate boost subQueryWeight->normalize(norm); for (int32_t i = 0; i < valSrcWeights.size(); ++i) { if (qStrict) { valSrcWeights[i]->normalize(1.0); // do not normalize the ValueSource part } else { valSrcWeights[i]->normalize(norm); } } } ScorerPtr CustomWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { // Pass true for "scoresDocsInOrder", because we require in-order scoring, even if caller does not, // since we call advance on the valSrcScorers. Pass false for "topScorer" because we will not invoke // score(Collector) on these scorers ScorerPtr subQueryScorer(subQueryWeight->scorer(reader, true, false)); if (!subQueryScorer) { return ScorerPtr(); } Collection valSrcScorers(Collection::newInstance(valSrcWeights.size())); for (int32_t i = 0; i < valSrcScorers.size(); ++i) { valSrcScorers[i] = valSrcWeights[i]->scorer(reader, true, topScorer); } return newLucene(similarity, reader, shared_from_this(), subQueryScorer, valSrcScorers); } ExplanationPtr CustomWeight::explain(const IndexReaderPtr& reader, int32_t doc) { ExplanationPtr explain(doExplain(reader, doc)); return explain ? explain : newLucene(0.0, L"no matching docs"); } ExplanationPtr CustomWeight::doExplain(const IndexReaderPtr& reader, int32_t doc) { ExplanationPtr subQueryExpl(subQueryWeight->explain(reader, doc)); if (!subQueryExpl->isMatch()) { return subQueryExpl; } // match Collection valSrcExpls(Collection::newInstance(valSrcWeights.size())); for (int32_t i = 0; i < valSrcWeights.size(); ++i) { valSrcExpls[i] = valSrcWeights[i]->explain(reader, doc); } ExplanationPtr customExp(query->getCustomScoreProvider(reader)->customExplain(doc, subQueryExpl, valSrcExpls)); double sc = getValue() * customExp->getValue(); ExplanationPtr res(newLucene(true, sc, query->toString() + L", product of:")); res->addDetail(customExp); res->addDetail(newLucene(getValue(), L"queryBoost")); // actually using the q boost as q weight (== weight value) return res; } bool CustomWeight::scoresDocsOutOfOrder() { return false; } CustomScorer::CustomScorer(const SimilarityPtr& similarity, const IndexReaderPtr& reader, const CustomWeightPtr& weight, const ScorerPtr& subQueryScorer, Collection valSrcScorers) : Scorer(similarity) { this->qWeight = weight->getValue(); this->subQueryScorer = subQueryScorer; this->valSrcScorers = valSrcScorers; this->reader = reader; this->vScores = Collection::newInstance(valSrcScorers.size()); this->provider = weight->query->getCustomScoreProvider(reader); } CustomScorer::~CustomScorer() { } int32_t CustomScorer::nextDoc() { int32_t doc = subQueryScorer->nextDoc(); if (doc != NO_MORE_DOCS) { for (int32_t i = 0; i < valSrcScorers.size(); ++i) { valSrcScorers[i]->advance(doc); } } return doc; } int32_t CustomScorer::docID() { return subQueryScorer->docID(); } double CustomScorer::score() { for (int32_t i = 0; i < valSrcScorers.size(); ++i) { vScores[i] = valSrcScorers[i]->score(); } return qWeight * provider->customScore(subQueryScorer->docID(), subQueryScorer->score(), vScores); } int32_t CustomScorer::advance(int32_t target) { int32_t doc = subQueryScorer->advance(target); if (doc != NO_MORE_DOCS) { for (int32_t i = 0; i < valSrcScorers.size(); ++i) { valSrcScorers[i]->advance(doc); } } return doc; } } LucenePlusPlus-rel_3.0.9/src/core/search/function/DocValues.cpp000066400000000000000000000040551456444476200245360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocValues.h" #include "Explanation.h" #include "MiscUtils.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { DocValues::DocValues() { minVal = std::numeric_limits::quiet_NaN(); maxVal = std::numeric_limits::quiet_NaN(); avgVal = std::numeric_limits::quiet_NaN(); computed = false; } DocValues::~DocValues() { } int32_t DocValues::intVal(int32_t doc) { return (int32_t)doubleVal(doc); } int64_t DocValues::longVal(int32_t doc) { return (int64_t)doubleVal(doc); } String DocValues::strVal(int32_t doc) { return StringUtils::toString(doubleVal(doc)); } ExplanationPtr DocValues::explain(int32_t doc) { return newLucene(doubleVal(doc), toString(doc)); } CollectionValue DocValues::getInnerArray() { boost::throw_exception(UnsupportedOperationException(L"This optional method is for test purposes only")); return VariantUtils::null(); } void DocValues::compute() { if (computed) { return; } double sum = 0; int32_t n = 0; while (true) { double val; try { val = doubleVal(n); } catch (IndexOutOfBoundsException&) { break; } sum += val; minVal = MiscUtils::isNaN(minVal) ? val : std::min(minVal, val); maxVal = MiscUtils::isNaN(maxVal) ? val : std::max(maxVal, val); ++n; } avgVal = n == 0 ? std::numeric_limits::quiet_NaN() : sum / (double)n; computed = true; } double DocValues::getMinValue() { compute(); return minVal; } double DocValues::getMaxValue() { compute(); return maxVal; } double DocValues::getAverageValue() { compute(); return avgVal; } } LucenePlusPlus-rel_3.0.9/src/core/search/function/DoubleFieldSource.cpp000066400000000000000000000043001456444476200262010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DoubleFieldSource.h" #include "FieldCache.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { DoubleFieldSource::DoubleFieldSource(const String& field, const DoubleParserPtr& parser) : FieldCacheSource(field) { this->parser = parser; } DoubleFieldSource::~DoubleFieldSource() { } String DoubleFieldSource::description() { return L"double(" + FieldCacheSource::description() + L")"; } DocValuesPtr DoubleFieldSource::getCachedFieldValues(const FieldCachePtr& cache, const String& field, const IndexReaderPtr& reader) { Collection arr(cache->getDoubles(reader, field, parser)); return newLucene(shared_from_this(), arr); } bool DoubleFieldSource::cachedFieldSourceEquals(const FieldCacheSourcePtr& other) { if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } DoubleFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); if (!otherSource) { return false; } return parser ? MiscUtils::equalTypes(parser, otherSource->parser) : !otherSource->parser; } int32_t DoubleFieldSource::cachedFieldSourceHashCode() { return StringUtils::hashCode(parser ? DoubleParser::_getClassName() : DoubleFieldSource::_getClassName()); } DoubleDocValues::DoubleDocValues(const DoubleFieldSourcePtr& source, Collection arr) { this->_source = source; this->arr = arr; } DoubleDocValues::~DoubleDocValues() { } double DoubleDocValues::doubleVal(int32_t doc) { if (doc < 0 || doc >= arr.size()) { boost::throw_exception(IndexOutOfBoundsException()); } return arr[doc]; } String DoubleDocValues::toString(int32_t doc) { return DoubleFieldSourcePtr(_source)->description() + L"=" + StringUtils::toString(doubleVal(doc)); } CollectionValue DoubleDocValues::getInnerArray() { return arr; } } LucenePlusPlus-rel_3.0.9/src/core/search/function/FieldCacheSource.cpp000066400000000000000000000022631456444476200260000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldCacheSource.h" #include "FieldCache.h" #include "StringUtils.h" namespace Lucene { FieldCacheSource::FieldCacheSource(const String& field) { this->field = field; } FieldCacheSource::~FieldCacheSource() { } DocValuesPtr FieldCacheSource::getValues(const IndexReaderPtr& reader) { return getCachedFieldValues(FieldCache::DEFAULT(), field, reader); } String FieldCacheSource::description() { return field; } bool FieldCacheSource::equals(const LuceneObjectPtr& other) { FieldCacheSourcePtr otherSource(boost::dynamic_pointer_cast(other)); if (!otherSource) { return false; } return field == otherSource->field && cachedFieldSourceEquals(otherSource); } int32_t FieldCacheSource::hashCode() { return StringUtils::hashCode(field) + cachedFieldSourceHashCode(); } } LucenePlusPlus-rel_3.0.9/src/core/search/function/FieldScoreQuery.cpp000066400000000000000000000021261456444476200257130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldScoreQuery.h" #include "ByteFieldSource.h" #include "IntFieldSource.h" #include "DoubleFieldSource.h" namespace Lucene { FieldScoreQuery::FieldScoreQuery(const String& field, Type type) : ValueSourceQuery(getValueSource(field,type)) { } FieldScoreQuery::~FieldScoreQuery() { } ValueSourcePtr FieldScoreQuery::getValueSource(const String& field, Type type) { switch (type) { case BYTE: return newLucene(field); case INT: return newLucene(field); case DOUBLE: return newLucene(field); default: boost::throw_exception(IllegalArgumentException(L"not a known Field Score Query Type")); return ValueSourcePtr(); } } } LucenePlusPlus-rel_3.0.9/src/core/search/function/IntFieldSource.cpp000066400000000000000000000045051456444476200255300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "IntFieldSource.h" #include "_IntFieldSource.h" #include "FieldCache.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { IntFieldSource::IntFieldSource(const String& field, const IntParserPtr& parser) : FieldCacheSource(field) { this->parser = parser; } IntFieldSource::~IntFieldSource() { } String IntFieldSource::description() { return L"int(" + FieldCacheSource::description() + L")"; } DocValuesPtr IntFieldSource::getCachedFieldValues(const FieldCachePtr& cache, const String& field, const IndexReaderPtr& reader) { Collection arr(cache->getInts(reader, field, parser)); return newLucene(shared_from_this(), arr); } bool IntFieldSource::cachedFieldSourceEquals(const FieldCacheSourcePtr& other) { if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } IntFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); if (!otherSource) { return false; } return parser ? MiscUtils::equalTypes(parser, otherSource->parser) : !otherSource->parser; } int32_t IntFieldSource::cachedFieldSourceHashCode() { return StringUtils::hashCode(parser ? IntParser::_getClassName() : IntFieldSource::_getClassName()); } IntDocValues::IntDocValues(const IntFieldSourcePtr& source, Collection arr) { this->_source = source; this->arr = arr; } IntDocValues::~IntDocValues() { } double IntDocValues::doubleVal(int32_t doc) { if (doc < 0 || doc >= arr.size()) { boost::throw_exception(IndexOutOfBoundsException()); } return (double)arr[doc]; } int32_t IntDocValues::intVal(int32_t doc) { if (doc < 0 || doc >= arr.size()) { boost::throw_exception(IndexOutOfBoundsException()); } return arr[doc]; } String IntDocValues::toString(int32_t doc) { return IntFieldSourcePtr(_source)->description() + L"=" + StringUtils::toString(intVal(doc)); } CollectionValue IntDocValues::getInnerArray() { return arr; } } LucenePlusPlus-rel_3.0.9/src/core/search/function/OrdFieldSource.cpp000066400000000000000000000042751456444476200255260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "OrdFieldSource.h" #include "_OrdFieldSource.h" #include "FieldCache.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { OrdFieldSource::OrdFieldSource(const String& field) { this->field = field; } OrdFieldSource::~OrdFieldSource() { } String OrdFieldSource::description() { return L"ord(" + field + L")"; } DocValuesPtr OrdFieldSource::getValues(const IndexReaderPtr& reader) { Collection arr(FieldCache::DEFAULT()->getStringIndex(reader, field)->order); return newLucene(shared_from_this(), arr); } bool OrdFieldSource::equals(const LuceneObjectPtr& other) { if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } OrdFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); if (!otherSource) { return false; } return field == otherSource->field; } int32_t OrdFieldSource::hashCode() { return StringUtils::hashCode(OrdFieldSource::_getClassName()) + StringUtils::hashCode(field); } OrdDocValues::OrdDocValues(const OrdFieldSourcePtr& source, Collection arr) { this->_source = source; this->arr = arr; } OrdDocValues::~OrdDocValues() { } double OrdDocValues::doubleVal(int32_t doc) { if (doc < 0 || doc >= arr.size()) { boost::throw_exception(IndexOutOfBoundsException()); } return (double)arr[doc]; } String OrdDocValues::strVal(int32_t doc) { // the string value of the ordinal, not the string itself if (doc < 0 || doc >= arr.size()) { boost::throw_exception(IndexOutOfBoundsException()); } return StringUtils::toString(arr[doc]); } String OrdDocValues::toString(int32_t doc) { return OrdFieldSourcePtr(_source)->description() + L"=" + StringUtils::toString(intVal(doc)); } CollectionValue OrdDocValues::getInnerArray() { return arr; } } LucenePlusPlus-rel_3.0.9/src/core/search/function/ReverseOrdFieldSource.cpp000066400000000000000000000050401456444476200270510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ReverseOrdFieldSource.h" #include "_ReverseOrdFieldSource.h" #include "FieldCache.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { ReverseOrdFieldSource::ReverseOrdFieldSource(const String& field) { this->field = field; } ReverseOrdFieldSource::~ReverseOrdFieldSource() { } String ReverseOrdFieldSource::description() { return L"rord(" + field + L")"; } DocValuesPtr ReverseOrdFieldSource::getValues(const IndexReaderPtr& reader) { StringIndexPtr sindex(FieldCache::DEFAULT()->getStringIndex(reader, field)); Collection arr(sindex->order); int32_t end = sindex->lookup.size(); return newLucene(shared_from_this(), arr, end); } bool ReverseOrdFieldSource::equals(const LuceneObjectPtr& other) { if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } ReverseOrdFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); if (!otherSource) { return false; } return field == otherSource->field; } int32_t ReverseOrdFieldSource::hashCode() { return StringUtils::hashCode(ReverseOrdFieldSource::_getClassName()) + StringUtils::hashCode(field); } ReverseOrdDocValues::ReverseOrdDocValues(const ReverseOrdFieldSourcePtr& source, Collection arr, int32_t end) { this->_source = source; this->arr = arr; this->end = end; } ReverseOrdDocValues::~ReverseOrdDocValues() { } double ReverseOrdDocValues::doubleVal(int32_t doc) { if (doc < 0 || doc >= arr.size()) { boost::throw_exception(IndexOutOfBoundsException()); } return (double)(end - arr[doc]); } int32_t ReverseOrdDocValues::intVal(int32_t doc) { if (doc < 0 || doc >= arr.size()) { boost::throw_exception(IndexOutOfBoundsException()); } return (end - arr[doc]); } String ReverseOrdDocValues::strVal(int32_t doc) { // the string value of the ordinal, not the string itself return StringUtils::toString(intVal(doc)); } String ReverseOrdDocValues::toString(int32_t doc) { return ReverseOrdFieldSourcePtr(_source)->description() + L"=" + strVal(doc); } CollectionValue ReverseOrdDocValues::getInnerArray() { return arr; } } LucenePlusPlus-rel_3.0.9/src/core/search/function/ValueSource.cpp000066400000000000000000000007711456444476200251070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ValueSource.h" namespace Lucene { ValueSource::~ValueSource() { } String ValueSource::toString() { return description(); } } LucenePlusPlus-rel_3.0.9/src/core/search/function/ValueSourceQuery.cpp000066400000000000000000000100561456444476200261320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ValueSourceQuery.h" #include "_ValueSourceQuery.h" #include "ValueSource.h" #include "DocValues.h" #include "ComplexExplanation.h" #include "IndexReader.h" #include "TermDocs.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { ValueSourceQuery::ValueSourceQuery(const ValueSourcePtr& valSrc) { this->valSrc = valSrc; } ValueSourceQuery::~ValueSourceQuery() { } QueryPtr ValueSourceQuery::rewrite(const IndexReaderPtr& reader) { return shared_from_this(); } void ValueSourceQuery::extractTerms(SetTerm terms) { // no terms involved here } WeightPtr ValueSourceQuery::createWeight(const SearcherPtr& searcher) { return newLucene(shared_from_this(), searcher); } String ValueSourceQuery::toString(const String& field) { return valSrc->toString() + boostString(); } bool ValueSourceQuery::equals(const LuceneObjectPtr& other) { ValueSourceQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) { return false; } return (getBoost() == otherQuery->getBoost() && valSrc->equals(otherQuery->valSrc)); } int32_t ValueSourceQuery::hashCode() { return (StringUtils::hashCode(ValueSourceQuery::_getClassName()) + valSrc->hashCode()) ^ MiscUtils::doubleToIntBits(getBoost()); } LuceneObjectPtr ValueSourceQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(valSrc); ValueSourceQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); cloneQuery->valSrc = valSrc; return cloneQuery; } ValueSourceWeight::ValueSourceWeight(const ValueSourceQueryPtr& query, const SearcherPtr& searcher) { this->query = query; this->similarity = query->getSimilarity(searcher); } ValueSourceWeight::~ValueSourceWeight() { } QueryPtr ValueSourceWeight::getQuery() { return query; } double ValueSourceWeight::getValue() { return queryWeight; } double ValueSourceWeight::sumOfSquaredWeights() { queryWeight = query->getBoost(); return queryWeight * queryWeight; } void ValueSourceWeight::normalize(double norm) { queryNorm = norm; queryWeight *= queryNorm; } ScorerPtr ValueSourceWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { return newLucene(similarity, reader, shared_from_this()); } ExplanationPtr ValueSourceWeight::explain(const IndexReaderPtr& reader, int32_t doc) { DocValuesPtr vals(query->valSrc->getValues(reader)); double sc = queryWeight * vals->doubleVal(doc); ExplanationPtr result(newLucene(true, sc, query->toString() + L", product of:")); result->addDetail(vals->explain(doc)); result->addDetail(newLucene(query->getBoost(), L"boost")); result->addDetail(newLucene(queryNorm, L"queryNorm")); return result; } ValueSourceScorer::ValueSourceScorer(const SimilarityPtr& similarity, const IndexReaderPtr& reader, const ValueSourceWeightPtr& weight) : Scorer(similarity) { this->weight = weight; this->qWeight = weight->getValue(); this->doc = -1; // this is when/where the values are first created. vals = weight->query->valSrc->getValues(reader); termDocs = reader->termDocs(TermPtr()); } ValueSourceScorer::~ValueSourceScorer() { } int32_t ValueSourceScorer::nextDoc() { doc = termDocs->next() ? termDocs->doc() : NO_MORE_DOCS; return doc; } int32_t ValueSourceScorer::docID() { return doc; } int32_t ValueSourceScorer::advance(int32_t target) { doc = termDocs->skipTo(target) ? termDocs->doc() : NO_MORE_DOCS; return doc; } double ValueSourceScorer::score() { return qWeight * vals->doubleVal(termDocs->doc()); } } LucenePlusPlus-rel_3.0.9/src/core/search/payloads/000077500000000000000000000000001456444476200221305ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/search/payloads/AveragePayloadFunction.cpp000066400000000000000000000026471456444476200272370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "AveragePayloadFunction.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { AveragePayloadFunction::~AveragePayloadFunction() { } double AveragePayloadFunction::currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, double currentScore, double currentPayloadScore) { return currentPayloadScore + currentScore; } double AveragePayloadFunction::docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore) { return numPayloadsSeen > 0 ? (payloadScore / (double)numPayloadsSeen) : 1.0; } int32_t AveragePayloadFunction::hashCode() { int32_t prime = 31; int32_t result = 1; result = prime * result + StringUtils::hashCode(getClassName()); return result; } bool AveragePayloadFunction::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } if (!other) { return false; } if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } return true; } } LucenePlusPlus-rel_3.0.9/src/core/search/payloads/MaxPayloadFunction.cpp000066400000000000000000000027631456444476200264110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MaxPayloadFunction.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { MaxPayloadFunction::~MaxPayloadFunction() { } double MaxPayloadFunction::currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, double currentScore, double currentPayloadScore) { if (numPayloadsSeen == 0) { return currentPayloadScore; } else { return std::max(currentPayloadScore, currentScore); } } double MaxPayloadFunction::docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore) { return numPayloadsSeen > 0 ? payloadScore : 1.0; } int32_t MaxPayloadFunction::hashCode() { int32_t prime = 31; int32_t result = 1; result = prime * result + StringUtils::hashCode(getClassName()); return result; } bool MaxPayloadFunction::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } if (!other) { return false; } if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } return true; } } LucenePlusPlus-rel_3.0.9/src/core/search/payloads/MinPayloadFunction.cpp000066400000000000000000000027631456444476200264070ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MinPayloadFunction.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { MinPayloadFunction::~MinPayloadFunction() { } double MinPayloadFunction::currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, double currentScore, double currentPayloadScore) { if (numPayloadsSeen == 0) { return currentPayloadScore; } else { return std::min(currentPayloadScore, currentScore); } } double MinPayloadFunction::docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore) { return numPayloadsSeen > 0 ? payloadScore : 1.0; } int32_t MinPayloadFunction::hashCode() { int32_t prime = 31; int32_t result = 1; result = prime * result + StringUtils::hashCode(getClassName()); return result; } bool MinPayloadFunction::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } if (!other) { return false; } if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } return true; } } LucenePlusPlus-rel_3.0.9/src/core/search/payloads/PayloadFunction.cpp000066400000000000000000000007571456444476200257440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PayloadFunction.h" namespace Lucene { PayloadFunction::PayloadFunction() { } PayloadFunction::~PayloadFunction() { } } LucenePlusPlus-rel_3.0.9/src/core/search/payloads/PayloadNearQuery.cpp000066400000000000000000000156611456444476200260720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PayloadNearQuery.h" #include "AveragePayloadFunction.h" #include "IndexReader.h" #include "NearSpansOrdered.h" #include "NearSpansUnordered.h" #include "Similarity.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { PayloadNearQuery::PayloadNearQuery(Collection clauses, int32_t slop, bool inOrder) : SpanNearQuery(clauses, slop, inOrder) { fieldName = clauses[0]->getField(); // all clauses must have same field this->function = newLucene(); } PayloadNearQuery::PayloadNearQuery(Collection clauses, int32_t slop, bool inOrder, const PayloadFunctionPtr& function) : SpanNearQuery(clauses, slop, inOrder) { fieldName = clauses[0]->getField(); // all clauses must have same field this->function = function; } PayloadNearQuery::~PayloadNearQuery() { } WeightPtr PayloadNearQuery::createWeight(const SearcherPtr& searcher) { return newLucene(shared_from_this(), searcher); } LuceneObjectPtr PayloadNearQuery::clone(const LuceneObjectPtr& other) { int32_t sz = clauses.size(); Collection newClauses(Collection::newInstance(sz)); for (int32_t i = 0; i < sz; ++i) { newClauses[i] = boost::dynamic_pointer_cast(clauses[i]->clone()); } PayloadNearQueryPtr payloadNearQuery(newLucene(newClauses, slop, inOrder)); payloadNearQuery->setBoost(getBoost()); return payloadNearQuery; } String PayloadNearQuery::toString(const String& field) { StringStream buffer; buffer << L"payloadNear(["; for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { if (clause != clauses.begin()) { buffer << L", "; } buffer << (*clause)->toString(field); } buffer << L"], " << slop << L", " << inOrder << L")" << boostString(); return buffer.str(); } bool PayloadNearQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } if (!SpanNearQuery::equals(other)) { return false; } if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } PayloadNearQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) { return false; } if (fieldName != otherQuery->fieldName) { return false; } if (!function) { if (otherQuery->function) { return false; } } else if (!function->equals(otherQuery->function)) { return false; } return true; } int32_t PayloadNearQuery::hashCode() { int32_t prime = 31; int32_t result = SpanNearQuery::hashCode(); result = prime * result + (fieldName.empty() ? 0 : StringUtils::hashCode(fieldName)); result = prime * result + (!function ? 0 : function->hashCode()); return result; } PayloadNearSpanWeight::PayloadNearSpanWeight(const SpanQueryPtr& query, const SearcherPtr& searcher) : SpanWeight(query, searcher) { } PayloadNearSpanWeight::~PayloadNearSpanWeight() { } ScorerPtr PayloadNearSpanWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { return newLucene(query->getSpans(reader), shared_from_this(), similarity, reader->norms(query->getField())); } PayloadNearSpanScorer::PayloadNearSpanScorer(const SpansPtr& spans, const WeightPtr& weight, const SimilarityPtr& similarity, ByteArray norms) : SpanScorer(spans, weight, similarity, norms) { this->spans = spans; this->payloadScore = 0.0; this->payloadsSeen = 0; this->similarity = getSimilarity(); } PayloadNearSpanScorer::~PayloadNearSpanScorer() { } void PayloadNearSpanScorer::getPayloads(Collection subSpans) { for (Collection::iterator span = subSpans.begin(); span != subSpans.end(); ++span) { if (MiscUtils::typeOf(*span)) { NearSpansOrderedPtr ordered(boost::static_pointer_cast(*span)); if (ordered->isPayloadAvailable()) { processPayloads(ordered->getPayload(), ordered->start(), ordered->end()); } getPayloads(ordered->getSubSpans()); } else if (MiscUtils::typeOf(*span)) { NearSpansUnorderedPtr unordered(boost::static_pointer_cast(*span)); if (unordered->isPayloadAvailable()) { processPayloads(unordered->getPayload(), unordered->start(), unordered->end()); } getPayloads(unordered->getSubSpans()); } } } void PayloadNearSpanScorer::processPayloads(Collection payLoads, int32_t start, int32_t end) { PayloadNearSpanWeightPtr spanWeight(boost::static_pointer_cast(weight)); PayloadNearQueryPtr nearQuery(boost::static_pointer_cast(spanWeight->query)); for (Collection::iterator payload = payLoads.begin(); payload != payLoads.end(); ++payload) { payloadScore = nearQuery->function->currentScore(doc, nearQuery->fieldName, start, end, payloadsSeen, payloadScore, similarity->scorePayload(doc, nearQuery->fieldName, spans->start(), spans->end(), *payload, 0, payload->size())); ++payloadsSeen; } } bool PayloadNearSpanScorer::setFreqCurrentDoc() { if (!more) { return false; } Collection spansArr(newCollection(spans)); payloadScore = 0.0; payloadsSeen = 0; getPayloads(spansArr); return SpanScorer::setFreqCurrentDoc(); } double PayloadNearSpanScorer::score() { PayloadNearSpanWeightPtr spanWeight(boost::static_pointer_cast(weight)); PayloadNearQueryPtr nearQuery(boost::static_pointer_cast(spanWeight->query)); return SpanScorer::score() * nearQuery->function->docScore(doc, nearQuery->fieldName, payloadsSeen, payloadScore); } ExplanationPtr PayloadNearSpanScorer::explain(int32_t doc) { ExplanationPtr result(newLucene()); ExplanationPtr nonPayloadExpl(SpanScorer::explain(doc)); result->addDetail(nonPayloadExpl); ExplanationPtr payloadBoost(newLucene()); result->addDetail(payloadBoost); double avgPayloadScore = (payloadsSeen > 0 ? (payloadScore / (double)payloadsSeen) : 1.0); payloadBoost->setValue(avgPayloadScore); payloadBoost->setDescription(L"scorePayload(...)"); result->setValue(nonPayloadExpl->getValue() * avgPayloadScore); result->setDescription(L"bnq, product of:"); return result; } } LucenePlusPlus-rel_3.0.9/src/core/search/payloads/PayloadSpanUtil.cpp000066400000000000000000000144711456444476200257140ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PayloadSpanUtil.h" #include "BooleanQuery.h" #include "BooleanClause.h" #include "PhraseQuery.h" #include "SpanTermQuery.h" #include "SpanNearQuery.h" #include "SpanOrQuery.h" #include "TermQuery.h" #include "FilteredQuery.h" #include "DisjunctionMaxQuery.h" #include "MultiPhraseQuery.h" #include "Term.h" #include "Spans.h" #include "MiscUtils.h" namespace Lucene { PayloadSpanUtil::PayloadSpanUtil(const IndexReaderPtr& reader) { this->reader = reader; } PayloadSpanUtil::~PayloadSpanUtil() { } Collection PayloadSpanUtil::getPayloadsForQuery(const QueryPtr& query) { Collection payloads(Collection::newInstance()); queryToSpanQuery(query, payloads); return payloads; } void PayloadSpanUtil::queryToSpanQuery(const QueryPtr& query, Collection payloads) { if (MiscUtils::typeOf(query)) { BooleanQueryPtr booleanQuery(boost::dynamic_pointer_cast(query)); Collection queryClauses(booleanQuery->getClauses()); for (Collection::iterator clause = queryClauses.begin(); clause != queryClauses.end(); ++clause) { if (!(*clause)->isProhibited()) { queryToSpanQuery((*clause)->getQuery(), payloads); } } } else if (MiscUtils::typeOf(query)) { PhraseQueryPtr phraseQuery(boost::dynamic_pointer_cast(query)); Collection phraseQueryTerms(phraseQuery->getTerms()); Collection clauses(Collection::newInstance(phraseQueryTerms.size())); for (int32_t i = 0; i < phraseQueryTerms.size(); ++i) { clauses[i] = newLucene(phraseQueryTerms[i]); } int32_t slop = phraseQuery->getSlop(); bool inorder = false; if (slop == 0) { inorder = true; } SpanNearQueryPtr sp(newLucene(clauses, slop, inorder)); sp->setBoost(query->getBoost()); getPayloads(payloads, sp); } else if (MiscUtils::typeOf(query)) { TermQueryPtr termQuery(boost::dynamic_pointer_cast(query)); SpanTermQueryPtr stq(newLucene(termQuery->getTerm())); stq->setBoost(query->getBoost()); getPayloads(payloads, stq); } else if (MiscUtils::typeOf(query)) { SpanQueryPtr spanQuery(boost::dynamic_pointer_cast(query)); getPayloads(payloads, spanQuery); } else if (MiscUtils::typeOf(query)) { FilteredQueryPtr filteredQuery(boost::dynamic_pointer_cast(query)); queryToSpanQuery(filteredQuery->getQuery(), payloads); } else if (MiscUtils::typeOf(query)) { DisjunctionMaxQueryPtr maxQuery(boost::dynamic_pointer_cast(query)); for (Collection::iterator disjunct = maxQuery->begin(); disjunct != maxQuery->end(); ++disjunct) { queryToSpanQuery(*disjunct, payloads); } } else if (MiscUtils::typeOf(query)) { MultiPhraseQueryPtr multiphraseQuery(boost::dynamic_pointer_cast(query)); Collection< Collection > termArrays(multiphraseQuery->getTermArrays()); Collection positions(multiphraseQuery->getPositions()); if (!positions.empty()) { int32_t maxPosition = positions[positions.size() - 1]; for (int32_t i = 0; i < positions.size() - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } Collection< Collection > disjunctLists(Collection< Collection >::newInstance(maxPosition + 1)); int32_t distinctPositions = 0; for (int32_t i = 0; i < termArrays.size(); ++i) { Collection termArray(termArrays[i]); Collection disjuncts(disjunctLists[positions[i]]); if (!disjuncts) { disjuncts = Collection::newInstance(); disjunctLists[positions[i]] = disjuncts; ++distinctPositions; } for (Collection::iterator term = termArray.begin(); term != termArray.end(); ++term) { disjuncts.add(newLucene(*term)); } } int32_t positionGaps = 0; int32_t position = 0; Collection clauses(Collection::newInstance(distinctPositions)); for (int32_t i = 0; i < disjunctLists.size(); ++i) { Collection disjuncts(disjunctLists[i]); if (disjuncts) { Collection spanDisjuncts(Collection::newInstance(disjuncts.size())); for (int32_t j = 0; j < disjuncts.size(); ++j) { spanDisjuncts[j] = boost::dynamic_pointer_cast(disjuncts[j]); } clauses[position++] = newLucene(spanDisjuncts); } else { ++positionGaps; } } int32_t slop = multiphraseQuery->getSlop(); bool inorder = (slop == 0); SpanNearQueryPtr sp(newLucene(clauses, slop + positionGaps, inorder)); sp->setBoost(query->getBoost()); getPayloads(payloads, sp); } } } void PayloadSpanUtil::getPayloads(Collection payloads, const SpanQueryPtr& query) { SpansPtr spans(query->getSpans(reader)); while (spans->next()) { if (spans->isPayloadAvailable()) { Collection payload(spans->getPayload()); for (Collection::iterator bytes = payload.begin(); bytes != payload.end(); ++bytes) { payloads.add(*bytes); } } } } } LucenePlusPlus-rel_3.0.9/src/core/search/payloads/PayloadTermQuery.cpp000066400000000000000000000137511456444476200261120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "PayloadTermQuery.h" #include "_PayloadTermQuery.h" #include "Term.h" #include "TermSpans.h" #include "TermPositions.h" #include "ComplexExplanation.h" #include "IndexReader.h" #include "Similarity.h" #include "PayloadFunction.h" #include "MiscUtils.h" namespace Lucene { PayloadTermQuery::PayloadTermQuery(const TermPtr& term, const PayloadFunctionPtr& function, bool includeSpanScore) : SpanTermQuery(term) { this->function = function; this->includeSpanScore = includeSpanScore; } PayloadTermQuery::~PayloadTermQuery() { } WeightPtr PayloadTermQuery::createWeight(const SearcherPtr& searcher) { return newLucene(shared_from_this(), searcher); } LuceneObjectPtr PayloadTermQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = SpanQuery::clone(other ? other : newLucene(term, function, includeSpanScore)); PayloadTermQueryPtr termQuery(boost::dynamic_pointer_cast(clone)); termQuery->function = function; termQuery->includeSpanScore = includeSpanScore; return termQuery; } bool PayloadTermQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } if (!SpanTermQuery::equals(other)) { return false; } if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } PayloadTermQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) { return false; } if (!function) { if (otherQuery->function) { return false; } } else if (!function->equals(otherQuery->function)) { return false; } if (includeSpanScore != otherQuery->includeSpanScore) { return false; } return true; } int32_t PayloadTermQuery::hashCode() { int32_t prime = 31; int32_t result = SpanTermQuery::hashCode(); result = prime * result + (function ? function->hashCode() : 0); result = prime * result + (includeSpanScore ? 1231 : 1237); return result; } PayloadTermWeight::PayloadTermWeight(const PayloadTermQueryPtr& query, const SearcherPtr& searcher) : SpanWeight(query, searcher) { } PayloadTermWeight::~PayloadTermWeight() { } ScorerPtr PayloadTermWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { return newLucene(boost::dynamic_pointer_cast(query->getSpans(reader)), shared_from_this(), similarity, reader->norms(query->getField())); } PayloadTermSpanScorer::PayloadTermSpanScorer(const TermSpansPtr& spans, const WeightPtr& weight, const SimilarityPtr& similarity, ByteArray norms) : SpanScorer(spans, weight, similarity, norms) { positions = spans->getPositions(); payload = ByteArray::newInstance(256); payloadScore = 0.0; payloadsSeen = 0; } PayloadTermSpanScorer::~PayloadTermSpanScorer() { } bool PayloadTermSpanScorer::setFreqCurrentDoc() { if (!more) { return false; } doc = spans->doc(); freq = 0.0; payloadScore = 0.0; payloadsSeen = 0; SimilarityPtr similarity1(getSimilarity()); while (more && doc == spans->doc()) { int32_t matchLength = spans->end() - spans->start(); freq += similarity1->sloppyFreq(matchLength); processPayload(similarity1); more = spans->next(); // this moves positions to the next match in this document } return more || (freq != 0); } void PayloadTermSpanScorer::processPayload(const SimilarityPtr& similarity) { if (positions->isPayloadAvailable()) { PayloadTermWeightPtr payloadWeight(boost::static_pointer_cast(weight)); PayloadTermQueryPtr payloadQuery(boost::static_pointer_cast(payloadWeight->query)); payload = positions->getPayload(payload, 0); payloadScore = payloadQuery->function->currentScore(doc, payloadQuery->term->field(), spans->start(), spans->end(), payloadsSeen, payloadScore, similarity->scorePayload(doc, payloadQuery->term->field(), spans->start(), spans->end(), payload, 0, positions->getPayloadLength())); ++payloadsSeen; } else { // zero out the payload? } } double PayloadTermSpanScorer::score() { PayloadTermWeightPtr payloadWeight(boost::static_pointer_cast(weight)); PayloadTermQueryPtr payloadQuery(boost::static_pointer_cast(payloadWeight->query)); return payloadQuery->includeSpanScore ? getSpanScore() * getPayloadScore() : getPayloadScore(); } double PayloadTermSpanScorer::getSpanScore() { return SpanScorer::score(); } double PayloadTermSpanScorer::getPayloadScore() { PayloadTermWeightPtr payloadWeight(boost::static_pointer_cast(weight)); PayloadTermQueryPtr payloadQuery(boost::static_pointer_cast(payloadWeight->query)); return payloadQuery->function->docScore(doc, payloadQuery->term->field(), payloadsSeen, payloadScore); } ExplanationPtr PayloadTermSpanScorer::explain(int32_t doc) { ComplexExplanationPtr result(newLucene()); ExplanationPtr nonPayloadExpl(SpanScorer::explain(doc)); result->addDetail(nonPayloadExpl); ExplanationPtr payloadBoost(newLucene()); result->addDetail(payloadBoost); double payloadScore = getPayloadScore(); payloadBoost->setValue(payloadScore); payloadBoost->setDescription(L"scorePayload(...)"); result->setValue(nonPayloadExpl->getValue() * payloadScore); result->setDescription(L"btq, product of:"); result->setMatch(nonPayloadExpl->getValue() != 0.0); return result; } } LucenePlusPlus-rel_3.0.9/src/core/search/spans/000077500000000000000000000000001456444476200214405ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/search/spans/FieldMaskingSpanQuery.cpp000066400000000000000000000061531456444476200263560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldMaskingSpanQuery.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { FieldMaskingSpanQuery::FieldMaskingSpanQuery(const SpanQueryPtr& maskedQuery, const String& maskedField) { this->maskedQuery = maskedQuery; this->field = maskedField; } FieldMaskingSpanQuery::~FieldMaskingSpanQuery() { } String FieldMaskingSpanQuery::getField() { return field; } SpanQueryPtr FieldMaskingSpanQuery::getMaskedQuery() { return maskedQuery; } // :NOTE: getBoost and setBoost are not proxied to the maskedQuery // ...this is done to be more consistent with things like SpanFirstQuery SpansPtr FieldMaskingSpanQuery::getSpans(const IndexReaderPtr& reader) { return maskedQuery->getSpans(reader); } void FieldMaskingSpanQuery::extractTerms(SetTerm terms) { maskedQuery->extractTerms(terms); } WeightPtr FieldMaskingSpanQuery::createWeight(const SearcherPtr& searcher) { return maskedQuery->createWeight(searcher); } SimilarityPtr FieldMaskingSpanQuery::getSimilarity(const SearcherPtr& searcher) { return maskedQuery->getSimilarity(searcher); } QueryPtr FieldMaskingSpanQuery::rewrite(const IndexReaderPtr& reader) { FieldMaskingSpanQueryPtr clone; SpanQueryPtr rewritten(boost::dynamic_pointer_cast(maskedQuery->rewrite(reader))); if (rewritten != maskedQuery) { clone = boost::dynamic_pointer_cast(this->clone()); clone->maskedQuery = rewritten; } if (clone) { return clone; } else { return shared_from_this(); } } String FieldMaskingSpanQuery::toString(const String& field) { StringStream buffer; buffer << L"mask(" << maskedQuery->toString(field) << L")"; buffer << boostString() << L" as " << this->field; return buffer.str(); } bool FieldMaskingSpanQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } FieldMaskingSpanQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) { return false; } return (getField() == otherQuery->getField() && getBoost() == otherQuery->getBoost() && getMaskedQuery()->equals(otherQuery->getMaskedQuery())); } int32_t FieldMaskingSpanQuery::hashCode() { return getMaskedQuery()->hashCode() ^ StringUtils::hashCode(getField()) ^ MiscUtils::doubleToRawIntBits(getBoost()); } LuceneObjectPtr FieldMaskingSpanQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = SpanQuery::clone(other ? other : newLucene(maskedQuery, field)); FieldMaskingSpanQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); cloneQuery->maskedQuery = maskedQuery; cloneQuery->field = field; return cloneQuery; } } LucenePlusPlus-rel_3.0.9/src/core/search/spans/NearSpansOrdered.cpp000066400000000000000000000205511456444476200253460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NearSpansOrdered.h" #include "SpanNearQuery.h" namespace Lucene { NearSpansOrdered::NearSpansOrdered(const SpanNearQueryPtr& spanNearQuery, const IndexReaderPtr& reader, bool collectPayloads) { if (spanNearQuery->getClauses().size() < 2) { boost::throw_exception(IllegalArgumentException(L"Less than 2 clauses: " + spanNearQuery->toString())); } this->firstTime = true; this->more = false; this->inSameDoc = false; this->matchDoc = -1; this->matchStart = -1; this->matchEnd = -1; this->collectPayloads = collectPayloads; this->allowedSlop = spanNearQuery->getSlop(); Collection clauses(spanNearQuery->getClauses()); this->subSpans = Collection::newInstance(clauses.size()); this->matchPayload = Collection::newInstance(); this->subSpansByDoc = Collection::newInstance(clauses.size()); for (int32_t i = 0; i < clauses.size(); ++i) { subSpans[i] = clauses[i]->getSpans(reader); subSpansByDoc[i] = subSpans[i]; // used in toSameDoc() } this->query = spanNearQuery; // kept for toString() only. } NearSpansOrdered::~NearSpansOrdered() { } int32_t NearSpansOrdered::doc() { return matchDoc; } int32_t NearSpansOrdered::start() { return matchStart; } int32_t NearSpansOrdered::end() { return matchEnd; } Collection NearSpansOrdered::getSubSpans() { return subSpans; } Collection NearSpansOrdered::getPayload() { return matchPayload; } bool NearSpansOrdered::isPayloadAvailable() { return !matchPayload.empty(); } bool NearSpansOrdered::next() { if (firstTime) { firstTime = false; for (int32_t i = 0; i < subSpans.size(); ++i) { if (!subSpans[i]->next()) { more = false; return false; } } more = true; } if (collectPayloads) { matchPayload.clear(); } return advanceAfterOrdered(); } bool NearSpansOrdered::skipTo(int32_t target) { if (firstTime) { firstTime = false; for (int32_t i = 0; i < subSpans.size(); ++i) { if (!subSpans[i]->skipTo(target)) { more = false; return false; } } more = true; } else if (more && (subSpans[0]->doc() < target)) { if (subSpans[0]->skipTo(target)) { inSameDoc = false; } else { more = false; return false; } } if (collectPayloads) { matchPayload.clear(); } return advanceAfterOrdered(); } bool NearSpansOrdered::advanceAfterOrdered() { while (more && (inSameDoc || toSameDoc())) { if (stretchToOrder() && shrinkToAfterShortestMatch()) { return true; } } return false; // no more matches } struct lessSpanDoc { inline bool operator()(const SpansPtr& first, const SpansPtr& second) const { return ((first->doc() - second->doc()) < 0); } }; bool NearSpansOrdered::toSameDoc() { std::sort(subSpansByDoc.begin(), subSpansByDoc.end(), lessSpanDoc()); int32_t firstIndex = 0; int32_t maxDoc = subSpansByDoc[subSpansByDoc.size() - 1]->doc(); while (subSpansByDoc[firstIndex]->doc() != maxDoc) { if (!subSpansByDoc[firstIndex]->skipTo(maxDoc)) { more = false; inSameDoc = false; return false; } maxDoc = subSpansByDoc[firstIndex]->doc(); if (++firstIndex == subSpansByDoc.size()) { firstIndex = 0; } } for (int32_t i = 0; i < subSpansByDoc.size(); ++i) { BOOST_ASSERT(subSpansByDoc[i]->doc() == maxDoc); } inSameDoc = true; return true; } bool NearSpansOrdered::docSpansOrdered(const SpansPtr& spans1, const SpansPtr& spans2) { BOOST_ASSERT(spans1->doc() == spans2->doc()); int32_t start1 = spans1->start(); int32_t start2 = spans2->start(); // Do not call docSpansOrdered(int,int,int,int) to avoid invoking .end() return start1 == start2 ? (spans1->end() < spans2->end()) : (start1 < start2); } bool NearSpansOrdered::docSpansOrdered(int32_t start1, int32_t end1, int32_t start2, int32_t end2) { return start1 == start2 ? (end1 < end2) : (start1 < start2); } bool NearSpansOrdered::stretchToOrder() { matchDoc = subSpans[0]->doc(); for (int32_t i = 1; inSameDoc && (i < subSpans.size()); ++i) { while (!docSpansOrdered(subSpans[i - 1], subSpans[i])) { if (!subSpans[i]->next()) { inSameDoc = false; more = false; break; } else if (matchDoc != subSpans[i]->doc()) { inSameDoc = false; break; } } } return inSameDoc; } bool NearSpansOrdered::shrinkToAfterShortestMatch() { SpansPtr subSpan(subSpans[subSpans.size() - 1]); matchStart = subSpan->start(); matchEnd = subSpan->end(); SetByteArray possibleMatchPayloads(SetByteArray::newInstance()); if (subSpan->isPayloadAvailable()) { Collection payload(subSpan->getPayload()); possibleMatchPayloads.addAll(payload.begin(), payload.end()); } Collection possiblePayload; int32_t matchSlop = 0; int32_t lastStart = matchStart; int32_t lastEnd = matchEnd; for (int32_t i = subSpans.size() - 2; i >= 0; --i) { SpansPtr prevSpans(subSpans[i]); if (collectPayloads && prevSpans->isPayloadAvailable()) { Collection payload(prevSpans->getPayload()); possiblePayload = Collection::newInstance(payload.begin(), payload.end()); } int32_t prevStart = prevSpans->start(); int32_t prevEnd = prevSpans->end(); while (true) { // Advance prevSpans until after (lastStart, lastEnd) if (!prevSpans->next()) { inSameDoc = false; more = false; break; // Check remaining subSpans for final match. } else if (matchDoc != prevSpans->doc()) { inSameDoc = false; // The last subSpans is not advanced here. break; // Check remaining subSpans for last match in this document. } else { int32_t ppStart = prevSpans->start(); int32_t ppEnd = prevSpans->end(); // Cannot avoid invoking .end() if (!docSpansOrdered(ppStart, ppEnd, lastStart, lastEnd)) { break; // Check remaining subSpans. } else { prevStart = ppStart; prevEnd = ppEnd; if (collectPayloads && prevSpans->isPayloadAvailable()) { Collection payload(prevSpans->getPayload()); possiblePayload = Collection::newInstance(payload.begin(), payload.end()); } } } } if (collectPayloads && possiblePayload) { possibleMatchPayloads.addAll(possiblePayload.begin(), possiblePayload.end()); } BOOST_ASSERT(prevStart <= matchStart); if (matchStart > prevEnd) { // Only non overlapping spans add to slop. matchSlop += (matchStart - prevEnd); } // Do not break on (matchSlop > allowedSlop) here to make sure that subSpans[0] is // advanced after the match, if any. matchStart = prevStart; lastStart = prevStart; lastEnd = prevEnd; } bool match = (matchSlop <= allowedSlop); if (collectPayloads && match && !possibleMatchPayloads.empty()) { matchPayload.addAll(possibleMatchPayloads.begin(), possibleMatchPayloads.end()); } return match; // ordered and allowed slop } String NearSpansOrdered::toString() { StringStream buffer; buffer << getClassName() << L"(" << query->toString() << L")@"; if (firstTime) { buffer << L"START"; } else { if (more) { buffer << doc() << L":" << start() << L"-" << end(); } else { buffer << L"END"; } } return buffer.str(); } } LucenePlusPlus-rel_3.0.9/src/core/search/spans/NearSpansUnordered.cpp000066400000000000000000000170011456444476200257050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NearSpansUnordered.h" #include "_NearSpansUnordered.h" #include "NearSpansOrdered.h" #include "SpanNearQuery.h" #include "StringUtils.h" namespace Lucene { NearSpansUnordered::NearSpansUnordered(const SpanNearQueryPtr& query, const IndexReaderPtr& reader) { this->query = query; this->reader = reader; } NearSpansUnordered::~NearSpansUnordered() { } void NearSpansUnordered::initialize() { this->slop = query->getSlop(); this->totalLength = 0; this->more = true; this->firstTime = true; Collection clauses(query->getClauses()); queue = newLucene(clauses.size()); subSpans = Collection::newInstance(clauses.size()); ordered = Collection::newInstance(); for (int32_t i = 0; i < clauses.size(); ++i) { SpansCellPtr cell(newLucene(shared_from_this(), clauses[i]->getSpans(reader), i)); ordered.add(cell); subSpans[i] = cell->spans; } } Collection NearSpansUnordered::getSubSpans() { return subSpans; } bool NearSpansUnordered::next() { if (firstTime) { initList(true); listToQueue(); // initialize queue firstTime = false; } else if (more) { if (min()->next()) { // trigger further scanning queue->updateTop(); // maintain queue } else { more = false; } } while (more) { bool queueStale = false; if (min()->doc() != max->doc()) { // maintain list queueToList(); queueStale = true; } // skip to doc with all clauses while (more && first->doc() < last->doc()) { more = first->skipTo(last->doc()); // skip first upto last firstToLast(); // and move it to the end queueStale = true; } if (!more) { return false; } // found doc with all clauses if (queueStale) { // maintain the queue listToQueue(); queueStale = false; } if (atMatch()) { return true; } more = min()->next(); if (more) { queue->updateTop(); // maintain queue } } return false; // no more matches } bool NearSpansUnordered::skipTo(int32_t target) { if (firstTime) { // initialize initList(false); for (SpansCellPtr cell(first); more && cell; cell = cell->_next) { more = cell->skipTo(target); // skip all } if (more) { listToQueue(); } firstTime = false; } else { // normal case while (more && min()->doc() < target) { // skip as needed if (min()->skipTo(target)) { queue->updateTop(); } else { more = false; } } } return (more && (atMatch() || next())); } SpansCellPtr NearSpansUnordered::min() { return queue->top(); } int32_t NearSpansUnordered::doc() { return min()->doc(); } int32_t NearSpansUnordered::start() { return min()->start(); } int32_t NearSpansUnordered::end() { return max->end(); } Collection NearSpansUnordered::getPayload() { SetByteArray matchPayload(SetByteArray::newInstance()); for (SpansCellPtr cell(first); cell; cell = cell->_next) { if (cell->isPayloadAvailable()) { Collection payload(cell->getPayload()); matchPayload.addAll(payload.begin(), payload.end()); } } return Collection::newInstance(matchPayload.begin(), matchPayload.end()); } bool NearSpansUnordered::isPayloadAvailable() { SpansCellPtr pointer(min()); while (pointer) { if (pointer->isPayloadAvailable()) { return true; } pointer = pointer->_next; } return false; } String NearSpansUnordered::toString() { StringStream buffer; buffer << getClassName() << L"(" << query->toString() << L")@"; if (firstTime) { buffer << L"START"; } else { if (more) { buffer << doc() << L":" << start() << L"-" << end(); } else { buffer << L"END"; } } return buffer.str(); } void NearSpansUnordered::initList(bool next) { for (Collection::iterator cell = ordered.begin(); more && cell != ordered.end(); ++cell) { if (next) { more = (*cell)->next(); // move to first entry } if (more) { addToList(*cell); // add to list } } } void NearSpansUnordered::addToList(const SpansCellPtr& cell) { if (last) { // add next to end of list last->_next = cell; } else { first = cell; } last = cell; cell->_next.reset(); } void NearSpansUnordered::firstToLast() { last->_next = first; // move first to end of list last = first; first = first->_next; last->_next.reset(); } void NearSpansUnordered::queueToList() { first.reset(); last.reset(); while (queue->top()) { addToList(queue->pop()); } } void NearSpansUnordered::listToQueue() { queue->clear(); // rebuild queue for (SpansCellPtr cell(first); cell; cell = cell->_next) { queue->add(cell); // add to queue from list } } bool NearSpansUnordered::atMatch() { return ((min()->doc() == max->doc()) && ((max->end() - min()->start() - totalLength) <= slop)); } SpansCell::SpansCell(const NearSpansUnorderedPtr& unordered, const SpansPtr& spans, int32_t index) { this->_unordered = unordered; this->spans = spans; this->index = index; this->length = -1; } SpansCell::~SpansCell() { } bool SpansCell::next() { return adjust(spans->next()); } bool SpansCell::skipTo(int32_t target) { return adjust(spans->skipTo(target)); } bool SpansCell::adjust(bool condition) { NearSpansUnorderedPtr unordered(_unordered); if (length != -1) { unordered->totalLength -= length; // subtract old length } if (condition) { length = end() - start(); unordered->totalLength += length; // add new length if (!unordered->max || doc() > unordered->max->doc() || ((doc() == unordered->max->doc()) && (end() > unordered->max->end()))) { unordered->max = shared_from_this(); } } unordered->more = condition; return condition; } int32_t SpansCell::doc() { return spans->doc(); } int32_t SpansCell::start() { return spans->start(); } int32_t SpansCell::end() { return spans->end(); } Collection SpansCell::getPayload() { Collection payload(spans->getPayload()); return Collection::newInstance(payload.begin(), payload.end()); } bool SpansCell::isPayloadAvailable() { return spans->isPayloadAvailable(); } String SpansCell::toString() { return spans->toString() + L"#" + StringUtils::toString(index); } CellQueue::CellQueue(int32_t size) : PriorityQueue(size) { } CellQueue::~CellQueue() { } bool CellQueue::lessThan(const SpansCellPtr& first, const SpansCellPtr& second) { if (first->doc() == second->doc()) { return NearSpansOrdered::docSpansOrdered(first, second); } else { return (first->doc() < second->doc()); } } } LucenePlusPlus-rel_3.0.9/src/core/search/spans/SpanFirstQuery.cpp000066400000000000000000000074401456444476200251100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanFirstQuery.h" #include "_SpanFirstQuery.h" #include "SpanQuery.h" #include "MiscUtils.h" namespace Lucene { SpanFirstQuery::SpanFirstQuery(const SpanQueryPtr& match, int32_t end) { this->match = match; this->end = end; } SpanFirstQuery::~SpanFirstQuery() { } SpanQueryPtr SpanFirstQuery::getMatch() { return match; } int32_t SpanFirstQuery::getEnd() { return end; } String SpanFirstQuery::getField() { return match->getField(); } String SpanFirstQuery::toString(const String& field) { StringStream buffer; buffer << L"spanFirst(" << match->toString(field) << L", " << end << L")" << boostString(); return buffer.str(); } LuceneObjectPtr SpanFirstQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = SpanQuery::clone(other ? other : newLucene(boost::dynamic_pointer_cast(match->clone()), end)); SpanFirstQueryPtr spanFirstQuery(boost::dynamic_pointer_cast(clone)); spanFirstQuery->match = match; spanFirstQuery->end = end; spanFirstQuery->setBoost(getBoost()); return spanFirstQuery; } void SpanFirstQuery::extractTerms(SetTerm terms) { match->extractTerms(terms); } SpansPtr SpanFirstQuery::getSpans(const IndexReaderPtr& reader) { return newLucene(shared_from_this(), match->getSpans(reader)); } QueryPtr SpanFirstQuery::rewrite(const IndexReaderPtr& reader) { SpanFirstQueryPtr clone; SpanQueryPtr rewritten(boost::dynamic_pointer_cast(match->rewrite(reader))); if (rewritten != match) { clone = boost::dynamic_pointer_cast(this->clone()); clone->match = rewritten; } if (clone) { return clone; // some clauses rewrote } else { return shared_from_this(); // no clauses rewrote } } bool SpanFirstQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } SpanFirstQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) { return false; } return (end == otherQuery->end && match->equals(otherQuery->match) && getBoost() == otherQuery->getBoost()); } int32_t SpanFirstQuery::hashCode() { int32_t result = match->hashCode(); result ^= (result << 8) | MiscUtils::unsignedShift(result, 25); // reversible result ^= MiscUtils::doubleToRawIntBits(getBoost()) ^ end; return result; } FirstSpans::FirstSpans(const SpanFirstQueryPtr& query, const SpansPtr& spans) { this->query = query; this->spans = spans; } FirstSpans::~FirstSpans() { } bool FirstSpans::next() { while (spans->next()) { // scan to next match if (end() <= query->end) { return true; } } return false; } bool FirstSpans::skipTo(int32_t target) { if (!spans->skipTo(target)) { return false; } return (spans->end() <= query->end || next()); } int32_t FirstSpans::doc() { return spans->doc(); } int32_t FirstSpans::start() { return spans->start(); } int32_t FirstSpans::end() { return spans->end(); } Collection FirstSpans::getPayload() { Collection result; if (spans->isPayloadAvailable()) { Collection payload(spans->getPayload()); result = Collection::newInstance(payload.begin(), payload.end()); } return result; } bool FirstSpans::isPayloadAvailable() { return spans->isPayloadAvailable(); } } LucenePlusPlus-rel_3.0.9/src/core/search/spans/SpanNearQuery.cpp000066400000000000000000000115231456444476200247030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanNearQuery.h" #include "SpanQuery.h" #include "SpanOrQuery.h" #include "NearSpansOrdered.h" #include "NearSpansUnordered.h" #include "MiscUtils.h" namespace Lucene { SpanNearQuery::SpanNearQuery(Collection clauses, int32_t slop, bool inOrder, bool collectPayloads) { this->clauses = Collection::newInstance(); for (int32_t i = 0; i < clauses.size(); ++i) { SpanQueryPtr clause(clauses[i]); if (i == 0) { // check field field = clause->getField(); } else if (clause->getField() != field) { boost::throw_exception(IllegalArgumentException(L"Clauses must have same field.")); } this->clauses.add(clause); } this->collectPayloads = collectPayloads; this->slop = slop; this->inOrder = inOrder; } SpanNearQuery::~SpanNearQuery() { } Collection SpanNearQuery::getClauses() { return clauses; } int32_t SpanNearQuery::getSlop() { return slop; } bool SpanNearQuery::isInOrder() { return inOrder; } String SpanNearQuery::getField() { return field; } void SpanNearQuery::extractTerms(SetTerm terms) { for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { (*clause)->extractTerms(terms); } } String SpanNearQuery::toString(const String& field) { StringStream buffer; buffer << L"spanNear(["; for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { if (clause != clauses.begin()) { buffer << L", "; } buffer << (*clause)->toString(field); } buffer << L"], " << slop << L", " << inOrder << L")" << boostString(); return buffer.str(); } SpansPtr SpanNearQuery::getSpans(const IndexReaderPtr& reader) { if (clauses.empty()) { // optimize 0-clause case return newLucene(getClauses())->getSpans(reader); } if (clauses.size() == 1) { // optimize 1-clause case return clauses[0]->getSpans(reader); } return inOrder ? boost::static_pointer_cast(newLucene(shared_from_this(), reader, collectPayloads)) : boost::static_pointer_cast(newLucene(shared_from_this(), reader)); } QueryPtr SpanNearQuery::rewrite(const IndexReaderPtr& reader) { SpanNearQueryPtr clone; for (int32_t i = 0; i < clauses.size(); ++i) { SpanQueryPtr clause(clauses[i]); SpanQueryPtr query(boost::dynamic_pointer_cast(clause->rewrite(reader))); if (query != clause) { // clause rewrote: must clone if (!clone) { clone = boost::dynamic_pointer_cast(this->clone()); } clone->clauses[i] = query; } } if (clone) { return clone; // some clauses rewrote } else { return shared_from_this(); // no clauses rewrote } } LuceneObjectPtr SpanNearQuery::clone(const LuceneObjectPtr& other) { int32_t sz = clauses.size(); Collection newClauses(Collection::newInstance(sz)); for (int32_t i = 0; i < sz; ++i) { newClauses[i] = boost::dynamic_pointer_cast(clauses[i]->clone()); } SpanNearQueryPtr spanNearQuery(newLucene(newClauses, slop, inOrder)); spanNearQuery->setBoost(getBoost()); return spanNearQuery; } bool SpanNearQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } SpanNearQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) { return false; } if (inOrder != otherQuery->inOrder) { return false; } if (slop != otherQuery->slop) { return false; } if (!clauses.equals(otherQuery->clauses, luceneEquals())) { return false; } return (getBoost() == otherQuery->getBoost()); } int32_t SpanNearQuery::hashCode() { int32_t result = MiscUtils::hashCode(clauses.begin(), clauses.end(), MiscUtils::hashLucene); // Mix bits before folding in things like boost, since it could cancel the last element of clauses. // This particular mix also serves to differentiate SpanNearQuery hashcodes from others. result ^= (result << 14) | MiscUtils::unsignedShift(result, 19); // reversible result += MiscUtils::doubleToRawIntBits(getBoost()); result += slop; result ^= (inOrder ? 0x99afd3bd : 0); return result; } } LucenePlusPlus-rel_3.0.9/src/core/search/spans/SpanNotQuery.cpp000066400000000000000000000136461456444476200245660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanNotQuery.h" #include "_SpanNotQuery.h" #include "MiscUtils.h" namespace Lucene { SpanNotQuery::SpanNotQuery(const SpanQueryPtr& include, const SpanQueryPtr& exclude) { this->include = include; this->exclude = exclude; if (include->getField() != exclude->getField()) { boost::throw_exception(IllegalArgumentException(L"Clauses must have same field.")); } } SpanNotQuery::~SpanNotQuery() { } SpanQueryPtr SpanNotQuery::getInclude() { return include; } SpanQueryPtr SpanNotQuery::getExclude() { return exclude; } String SpanNotQuery::getField() { return include->getField(); } void SpanNotQuery::extractTerms(SetTerm terms) { include->extractTerms(terms); } String SpanNotQuery::toString(const String& field) { StringStream buffer; buffer << L"spanNot(" << include->toString(field) << L", " << exclude->toString(field) << L")"; buffer << boostString(); return buffer.str(); } LuceneObjectPtr SpanNotQuery::clone(const LuceneObjectPtr& other) { SpanNotQueryPtr spanNotQuery(newLucene(boost::dynamic_pointer_cast(include->clone()), boost::dynamic_pointer_cast(exclude->clone()))); spanNotQuery->setBoost(getBoost()); return spanNotQuery; } SpansPtr SpanNotQuery::getSpans(const IndexReaderPtr& reader) { return newLucene(shared_from_this(), include->getSpans(reader), exclude->getSpans(reader)); } QueryPtr SpanNotQuery::rewrite(const IndexReaderPtr& reader) { SpanNotQueryPtr clone; SpanQueryPtr rewrittenInclude(boost::dynamic_pointer_cast(include->rewrite(reader))); if (rewrittenInclude != include) { clone = boost::dynamic_pointer_cast(this->clone()); clone->include = rewrittenInclude; } SpanQueryPtr rewrittenExclude(boost::dynamic_pointer_cast(exclude->rewrite(reader))); if (rewrittenExclude != exclude) { if (!clone) { clone = boost::dynamic_pointer_cast(this->clone()); } clone->exclude = rewrittenExclude; } if (clone) { return clone; // some clauses rewrote } else { return shared_from_this(); // no clauses rewrote } } bool SpanNotQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } SpanNotQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) { return false; } return (include->equals(otherQuery->include) && exclude->equals(otherQuery->exclude) && getBoost() == otherQuery->getBoost()); } int32_t SpanNotQuery::hashCode() { int32_t result = include->hashCode(); result = (result << 1) | MiscUtils::unsignedShift(result, 31); // rotate left result ^= exclude->hashCode(); result = (result << 1) | MiscUtils::unsignedShift(result, 31); // rotate left result ^= MiscUtils::doubleToRawIntBits(getBoost()); return result; } NotSpans::NotSpans(const SpanNotQueryPtr& query, const SpansPtr& includeSpans, const SpansPtr& excludeSpans) { this->query = query; this->includeSpans = includeSpans; this->moreInclude = true; this->excludeSpans = excludeSpans; this->moreExclude = excludeSpans->next(); } NotSpans::~NotSpans() { } bool NotSpans::next() { if (moreInclude) { // move to next include moreInclude = includeSpans->next(); } while (moreInclude && moreExclude) { if (includeSpans->doc() > excludeSpans->doc()) { // skip exclude moreExclude = excludeSpans->skipTo(includeSpans->doc()); } // while exclude is before while (moreExclude && includeSpans->doc() == excludeSpans->doc() && excludeSpans->end() <= includeSpans->start()) { moreExclude = excludeSpans->next(); // increment exclude } // if no intersection if (!moreExclude || includeSpans->doc() != excludeSpans->doc() || includeSpans->end() <= excludeSpans->start()) { break; // we found a match } moreInclude = includeSpans->next(); // intersected: keep scanning } return moreInclude; } bool NotSpans::skipTo(int32_t target) { if (moreInclude) { // skip include moreInclude = includeSpans->skipTo(target); } if (!moreInclude) { return false; } // skip exclude if (moreExclude && includeSpans->doc() > excludeSpans->doc()) { moreExclude = excludeSpans->skipTo(includeSpans->doc()); } // while exclude is before while (moreExclude && includeSpans->doc() == excludeSpans->doc() && excludeSpans->end() <= includeSpans->start()) { moreExclude = excludeSpans->next(); // increment exclude } // if no intersection if (!moreExclude || includeSpans->doc() != excludeSpans->doc() || includeSpans->end() <= excludeSpans->start()) { return true; // we found a match } return next(); // scan to next match } int32_t NotSpans::doc() { return includeSpans->doc(); } int32_t NotSpans::start() { return includeSpans->start(); } int32_t NotSpans::end() { return includeSpans->end(); } Collection NotSpans::getPayload() { Collection result; if (includeSpans->isPayloadAvailable()) { Collection payload(includeSpans->getPayload()); result = Collection::newInstance(payload.begin(), payload.end()); } return result; } bool NotSpans::isPayloadAvailable() { return includeSpans->isPayloadAvailable(); } String NotSpans::toString() { return L"spans(" + query->toString() + L")"; } } LucenePlusPlus-rel_3.0.9/src/core/search/spans/SpanOrQuery.cpp000066400000000000000000000151271456444476200244020ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanOrQuery.h" #include "_SpanOrQuery.h" #include "MiscUtils.h" namespace Lucene { SpanOrQuery::SpanOrQuery(Collection clauses) { // copy clauses array into an ArrayList this->clauses = Collection::newInstance(); for (int32_t i = 0; i < clauses.size(); ++i) { SpanQueryPtr clause(clauses[i]); if (i == 0) { // check field field = clause->getField(); } else if (clause->getField() != field) { boost::throw_exception(IllegalArgumentException(L"Clauses must have same field.")); } this->clauses.add(clause); } } SpanOrQuery::~SpanOrQuery() { } Collection SpanOrQuery::getClauses() { return clauses; } String SpanOrQuery::getField() { return field; } void SpanOrQuery::extractTerms(SetTerm terms) { for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { (*clause)->extractTerms(terms); } } LuceneObjectPtr SpanOrQuery::clone(const LuceneObjectPtr& other) { int32_t sz = clauses.size(); Collection newClauses(Collection::newInstance(sz)); for (int32_t i = 0; i < sz; ++i) { newClauses[i] = boost::dynamic_pointer_cast(clauses[i]->clone()); } SpanOrQueryPtr spanOrQuery(newLucene(newClauses)); spanOrQuery->setBoost(getBoost()); return spanOrQuery; } QueryPtr SpanOrQuery::rewrite(const IndexReaderPtr& reader) { SpanOrQueryPtr clone; for (int32_t i = 0; i < clauses.size(); ++i) { SpanQueryPtr clause(clauses[i]); SpanQueryPtr query(boost::dynamic_pointer_cast(clause->rewrite(reader))); if (query != clause) { // clause rewrote: must clone if (!clone) { clone = boost::dynamic_pointer_cast(this->clone()); } clone->clauses[i] = query; } } if (clone) { return clone; // some clauses rewrote } else { return shared_from_this(); // no clauses rewrote } } String SpanOrQuery::toString(const String& field) { StringStream buffer; buffer << L"SpanOr(["; for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { if (clause != clauses.begin()) { buffer << L", "; } buffer << (*clause)->toString(field); } buffer << L"])" << boostString(); return buffer.str(); } bool SpanOrQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } SpanOrQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) { return false; } if (!clauses.equals(otherQuery->clauses, luceneEquals())) { return false; } if (!clauses.empty() && field != otherQuery->field) { return false; } return (getBoost() == otherQuery->getBoost()); } int32_t SpanOrQuery::hashCode() { int32_t result = MiscUtils::hashCode(clauses.begin(), clauses.end(), MiscUtils::hashLucene); result ^= (result << 10) | MiscUtils::unsignedShift(result, 23); result ^= MiscUtils::doubleToRawIntBits(getBoost()); return result; } SpansPtr SpanOrQuery::getSpans(const IndexReaderPtr& reader) { if (clauses.size() == 1) { // optimize 1-clause case return clauses[0]->getSpans(reader); } return newLucene(shared_from_this(), reader); } SpanQueue::SpanQueue(int32_t size) : PriorityQueue(size) { } SpanQueue::~SpanQueue() { } bool SpanQueue::lessThan(const SpansPtr& first, const SpansPtr& second) { if (first->doc() == second->doc()) { if (first->start() == second->start()) { return (first->end() < second->end()); } else { return (first->start() < second->start()); } } else { return (first->doc() < second->doc()); } } OrSpans::OrSpans(const SpanOrQueryPtr& query, const IndexReaderPtr& reader) { this->query = query; this->reader = reader; } OrSpans::~OrSpans() { } bool OrSpans::initSpanQueue(int32_t target) { queue = newLucene(query->clauses.size()); for (Collection::iterator clause = query->clauses.begin(); clause != query->clauses.end(); ++clause) { SpansPtr spans((*clause)->getSpans(reader)); if ((target == -1 && spans->next()) || (target != -1 && spans->skipTo(target))) { queue->add(spans); } } return !queue->empty(); } bool OrSpans::next() { if (!queue) { return initSpanQueue(-1); } if (queue->empty()) { // all done return false; } if (top()->next()) { // move to next queue->updateTop(); return true; } queue->pop(); // exhausted a clause return !queue->empty(); } SpansPtr OrSpans::top() { return queue->top(); } bool OrSpans::skipTo(int32_t target) { if (!queue) { return initSpanQueue(target); } bool skipCalled = false; while (!queue->empty() && top()->doc() < target) { if (top()->skipTo(target)) { queue->updateTop(); } else { queue->pop(); } skipCalled = true; } if (skipCalled) { return !queue->empty(); } return next(); } int32_t OrSpans::doc() { return top()->doc(); } int32_t OrSpans::start() { return top()->start(); } int32_t OrSpans::end() { return top()->end(); } Collection OrSpans::getPayload() { Collection result; SpansPtr theTop(top()); if (theTop && theTop->isPayloadAvailable()) { Collection payload(theTop->getPayload()); result = Collection::newInstance(payload.begin(), payload.end()); } return result; } bool OrSpans::isPayloadAvailable() { SpansPtr theTop(top()); return (theTop && theTop->isPayloadAvailable()); } String OrSpans::toString() { StringStream buffer; buffer << L"spans(" << query->toString() << L")@"; if (!queue) { buffer << L"START"; } else { if (!queue->empty()) { buffer << doc() << L":" << start() << L"-" << end(); } else { buffer << L"END"; } } return buffer.str(); } } LucenePlusPlus-rel_3.0.9/src/core/search/spans/SpanQuery.cpp000066400000000000000000000011211456444476200240660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanQuery.h" #include "SpanWeight.h" namespace Lucene { SpanQuery::~SpanQuery() { } WeightPtr SpanQuery::createWeight(const SearcherPtr& searcher) { return newLucene(shared_from_this(), searcher); } } LucenePlusPlus-rel_3.0.9/src/core/search/spans/SpanScorer.cpp000066400000000000000000000045161456444476200242310ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanScorer.h" #include "Explanation.h" #include "Weight.h" #include "Similarity.h" #include "Spans.h" #include "StringUtils.h" namespace Lucene { SpanScorer::SpanScorer(const SpansPtr& spans, const WeightPtr& weight, const SimilarityPtr& similarity, ByteArray norms) : Scorer(similarity) { this->spans = spans; this->norms = norms; this->weight = weight; this->value = weight->getValue(); this->freq = 0.0; if (this->spans->next()) { doc = -1; more = true; } else { doc = NO_MORE_DOCS; more = false; } } SpanScorer::~SpanScorer() { } int32_t SpanScorer::nextDoc() { if (!setFreqCurrentDoc()) { doc = NO_MORE_DOCS; } return doc; } int32_t SpanScorer::advance(int32_t target) { if (!more) { doc = NO_MORE_DOCS; return doc; } if (spans->doc() < target) { // setFreqCurrentDoc() leaves spans->doc() ahead more = spans->skipTo(target); } if (!setFreqCurrentDoc()) { doc = NO_MORE_DOCS; } return doc; } bool SpanScorer::setFreqCurrentDoc() { if (!more) { return false; } doc = spans->doc(); freq = 0.0; do { int32_t matchLength = spans->end() - spans->start(); freq += getSimilarity()->sloppyFreq(matchLength); more = spans->next(); } while (more && (doc == spans->doc())); return true; } int32_t SpanScorer::docID() { return doc; } double SpanScorer::score() { double raw = getSimilarity()->tf(freq) * value; // raw score return norms ? raw * Similarity::decodeNorm(norms[doc]) : raw; // normalize } ExplanationPtr SpanScorer::explain(int32_t doc) { ExplanationPtr tfExplanation(newLucene()); int32_t expDoc = advance(doc); double phraseFreq = expDoc == doc ? freq : 0.0; tfExplanation->setValue(getSimilarity()->tf(phraseFreq)); tfExplanation->setDescription(L"tf(phraseFreq=" + StringUtils::toString(phraseFreq) + L")"); return tfExplanation; } } LucenePlusPlus-rel_3.0.9/src/core/search/spans/SpanTermQuery.cpp000066400000000000000000000043741456444476200247330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanTermQuery.h" #include "Term.h" #include "TermSpans.h" #include "IndexReader.h" #include "MiscUtils.h" namespace Lucene { SpanTermQuery::SpanTermQuery(const TermPtr& term) { this->term = term; } SpanTermQuery::~SpanTermQuery() { } TermPtr SpanTermQuery::getTerm() { return term; } String SpanTermQuery::getField() { return term->field(); } void SpanTermQuery::extractTerms(SetTerm terms) { terms.add(term); } String SpanTermQuery::toString(const String& field) { StringStream buffer; if (term->field() == field) { buffer << term->text(); } else { buffer << term->toString(); } buffer << boostString(); return buffer.str(); } int32_t SpanTermQuery::hashCode() { int32_t prime = 31; int32_t result = SpanQuery::hashCode(); result = prime * result + (term ? term->hashCode() : 0); return result; } bool SpanTermQuery::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } if (!SpanQuery::equals(other)) { return false; } if (!MiscUtils::equalTypes(shared_from_this(), other)) { return false; } SpanTermQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); if (!otherQuery) { return false; } if (!term) { if (otherQuery->term) { return false; } } else if (!term->equals(otherQuery->term)) { return false; } return true; } LuceneObjectPtr SpanTermQuery::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = SpanQuery::clone(other ? other : newLucene(term)); SpanTermQueryPtr spanFirstQuery(boost::dynamic_pointer_cast(clone)); spanFirstQuery->term = term; return spanFirstQuery; } SpansPtr SpanTermQuery::getSpans(const IndexReaderPtr& reader) { return newLucene(reader->termPositions(term), term); } } LucenePlusPlus-rel_3.0.9/src/core/search/spans/SpanWeight.cpp000066400000000000000000000076021456444476200242220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SpanWeight.h" #include "SpanScorer.h" #include "SpanQuery.h" #include "IndexReader.h" #include "ComplexExplanation.h" #include "Similarity.h" #include "StringUtils.h" namespace Lucene { SpanWeight::SpanWeight(const SpanQueryPtr& query, const SearcherPtr& searcher) { this->similarity = query->getSimilarity(searcher); this->query = query; terms = SetTerm::newInstance(); query->extractTerms(terms); idfExp = similarity->idfExplain(Collection::newInstance(terms.begin(), terms.end()), searcher); idf = idfExp->getIdf(); value = 0.0; queryNorm = 0.0; queryWeight = 0.0; } SpanWeight::~SpanWeight() { } QueryPtr SpanWeight::getQuery() { return query; } double SpanWeight::getValue() { return value; } double SpanWeight::sumOfSquaredWeights() { queryWeight = idf * getQuery()->getBoost(); // compute query weight return queryWeight * queryWeight; // square it } void SpanWeight::normalize(double norm) { queryNorm = norm; queryWeight *= queryNorm; // normalize query weight value = queryWeight * idf; // idf for document } ScorerPtr SpanWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { return newLucene(query->getSpans(reader), shared_from_this(), similarity, reader->norms(query->getField())); } ExplanationPtr SpanWeight::explain(const IndexReaderPtr& reader, int32_t doc) { ComplexExplanationPtr result(newLucene()); result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); String field(query->getField()); ExplanationPtr idfExpl(newLucene(idf, L"idf(" + field + L":" + idfExp->explain() + L")")); // explain query weight ExplanationPtr queryExpl(newLucene()); queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); ExplanationPtr boostExpl(newLucene(query->getBoost(), L"boost")); if (query->getBoost() != 1.0) { queryExpl->addDetail(boostExpl); } queryExpl->addDetail(idfExpl); ExplanationPtr queryNormExpl(newLucene(queryNorm, L"queryNorm")); queryExpl->addDetail(queryNormExpl); queryExpl->setValue(boostExpl->getValue() * idfExpl->getValue() * queryNormExpl->getValue()); result->addDetail(queryExpl); // explain field weight ComplexExplanationPtr fieldExpl(newLucene()); fieldExpl->setDescription(L"fieldWeight(" + field + L":" + query->toString(field) + L" in " + StringUtils::toString(doc) + L"), product of:"); ExplanationPtr tfExpl(boost::dynamic_pointer_cast(scorer(reader, true, false))->explain(doc)); fieldExpl->addDetail(tfExpl); fieldExpl->addDetail(idfExpl); ExplanationPtr fieldNormExpl(newLucene()); ByteArray fieldNorms(reader->norms(field)); double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; fieldNormExpl->setValue(fieldNorm); fieldNormExpl->setDescription(L"fieldNorm(field=" + field + L", doc=" + StringUtils::toString(doc) + L")"); fieldExpl->addDetail(fieldNormExpl); fieldExpl->setMatch(tfExpl->isMatch()); fieldExpl->setValue(tfExpl->getValue() * idfExpl->getValue() * fieldNormExpl->getValue()); result->addDetail(fieldExpl); result->setMatch(fieldExpl->getMatch()); // combine them result->setValue(queryExpl->getValue() * fieldExpl->getValue()); if (queryExpl->getValue() == 1.0) { return fieldExpl; } return result; } } LucenePlusPlus-rel_3.0.9/src/core/search/spans/Spans.cpp000066400000000000000000000006511456444476200232320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Spans.h" namespace Lucene { Spans::~Spans() { } } LucenePlusPlus-rel_3.0.9/src/core/search/spans/TermSpans.cpp000066400000000000000000000041701456444476200240620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TermSpans.h" #include "TermPositions.h" #include "Term.h" namespace Lucene { TermSpans::TermSpans(const TermPositionsPtr& positions, const TermPtr& term) { this->positions = positions; this->term = term; this->_doc = -1; this->freq = 0; this->count = 0; this->position = 0; } TermSpans::~TermSpans() { } bool TermSpans::next() { if (count == freq) { if (!positions->next()) { _doc = INT_MAX; return false; } _doc = positions->doc(); freq = positions->freq(); count = 0; } position = positions->nextPosition(); ++count; return true; } bool TermSpans::skipTo(int32_t target) { if (!positions->skipTo(target)) { _doc = INT_MAX; return false; } _doc = positions->doc(); freq = positions->freq(); count = 0; position = positions->nextPosition(); ++count; return true; } int32_t TermSpans::doc() { return _doc; } int32_t TermSpans::start() { return position; } int32_t TermSpans::end() { return position + 1; } Collection TermSpans::getPayload() { Collection payload(newCollection(ByteArray::newInstance(positions->getPayloadLength()))); payload[0] = positions->getPayload(payload[0], 0); return payload; } bool TermSpans::isPayloadAvailable() { return positions->isPayloadAvailable(); } String TermSpans::toString() { StringStream buffer; buffer << L"spans(" << term->toString() << L")@"; if (_doc == -1) { buffer << L"START"; } else if (_doc == INT_MAX) { buffer << L"END"; } else { buffer << _doc << L"-" << position; } return buffer.str(); } TermPositionsPtr TermSpans::getPositions() { return positions; } } LucenePlusPlus-rel_3.0.9/src/core/store/000077500000000000000000000000001456444476200202035ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/store/BufferedIndexInput.cpp000066400000000000000000000152371456444476200244510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BufferedIndexInput.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { /// Default buffer size. const int32_t BufferedIndexInput::BUFFER_SIZE = 1024 * 2; BufferedIndexInput::BufferedIndexInput(int32_t bufferSize) { this->bufferSize = bufferSize; bufferStart = 0; bufferLength = 0; bufferPosition = 0; } BufferedIndexInput::~BufferedIndexInput() { } uint8_t BufferedIndexInput::readByte() { if (bufferPosition < bufferLength) { return __buffer[bufferPosition++]; } refill(); return __buffer[bufferPosition++]; } static const int MAX_VARINT32_LENGHT = 5; int32_t BufferedIndexInput::readVInt() { if (bufferPosition + MAX_VARINT32_LENGHT < bufferLength) { uint8_t b = __buffer[bufferPosition++]; int32_t i = (b & 0x7f); for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) { b = __buffer[bufferPosition++]; i |= (b & 0x7f) << shift; } return i; } else { uint8_t b = readByte(); int32_t i = (b & 0x7f); for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) { b = readByte(); i |= (b & 0x7f) << shift; } return i; } } void BufferedIndexInput::setBufferSize(int32_t newSize) { if (newSize != bufferSize) { bufferSize = newSize; if (buffer) { // Resize the existing buffer and carefully save as many bytes as possible starting from the current bufferPosition ByteArray _newBuffer(ByteArray::newInstance(newSize)); int32_t leftInBuffer = bufferLength - bufferPosition; int32_t numToCopy = leftInBuffer > newSize ? newSize : leftInBuffer; MiscUtils::arrayCopy(buffer.get(), bufferPosition, _newBuffer.get(), 0, numToCopy); bufferStart += bufferPosition; bufferPosition = 0; bufferLength = numToCopy; newBuffer(_newBuffer); } } } void BufferedIndexInput::newBuffer(ByteArray newBuffer) { // Subclasses can do something here buffer = newBuffer; __buffer = newBuffer.get(); } int32_t BufferedIndexInput::getBufferSize() { return bufferSize; } void BufferedIndexInput::checkBufferSize(int32_t bufferSize) { if (bufferSize <= 0) { boost::throw_exception(IllegalArgumentException(L"bufferSize must be greater than 0 (got " + StringUtils::toString(bufferSize) + L")")); } } void BufferedIndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length) { readBytes(b, offset, length, true); } void BufferedIndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length, bool useBuffer) { if (length <= (bufferLength - bufferPosition)) { // the buffer contains enough data to satisfy this request if (length > 0) { // to allow b to be null if length is 0 MiscUtils::arrayCopy(buffer.get(), bufferPosition, b, offset, length); } bufferPosition += length; } else { // the buffer does not have enough data, first serve all we've got int32_t available = bufferLength - bufferPosition; if (available > 0) { MiscUtils::arrayCopy(buffer.get(), bufferPosition, b, offset, available); offset += available; length -= available; bufferPosition += available; } // and now, read the remaining 'length' bytes if (useBuffer && length < bufferSize) { // If the amount left to read is small enough, and we are allowed to use our buffer, // do it in the usual buffered way: fill the buffer and copy from it refill(); if (bufferLength < length) { // throw an exception when refill() could not read length bytes MiscUtils::arrayCopy(buffer.get(), 0, b, offset, bufferLength); boost::throw_exception(IOException(L"Read past EOF")); } else { MiscUtils::arrayCopy(buffer.get(), 0, b, offset, length); bufferPosition = length; } } else { // The amount left to read is larger than the buffer or we've been asked to not use // our buffer - there's no performance reason not to read it all at once. // Note that unlike the previous code of this function, there is no need to do a seek // here, because there's no need to reread what we had in the buffer. int64_t after = bufferStart + bufferPosition + length; if (after > this->length()) { boost::throw_exception(IOException(L"Read past EOF")); } readInternal(b, offset, length); bufferStart = after; bufferPosition = 0; bufferLength = 0; // trigger refill() on read } } } void BufferedIndexInput::refill() { int64_t start = bufferStart + bufferPosition; int64_t end = start + bufferSize; if (end > length()) { // don't read past EOF end = length(); } int32_t newLength = (int32_t)(end - start); if (newLength <= 0) { boost::throw_exception(IOException(L"Read past EOF")); } if (!buffer) { newBuffer(ByteArray::newInstance(bufferSize)); // allocate buffer lazily seekInternal(bufferStart); } readInternal(__buffer, 0, newLength); bufferLength = newLength; bufferStart = start; bufferPosition = 0; } void BufferedIndexInput::close() { bufferStart = 0; bufferLength = 0; bufferPosition = 0; } int64_t BufferedIndexInput::getFilePointer() { return bufferStart + bufferPosition; } void BufferedIndexInput::seek(int64_t pos) { if (pos >= bufferStart && pos < (bufferStart + bufferLength)) { bufferPosition = (int32_t)(pos - bufferStart); // seek within buffer } else { bufferStart = pos; bufferPosition = 0; bufferLength = 0; // trigger refill() on read() seekInternal(pos); } } LuceneObjectPtr BufferedIndexInput::clone(const LuceneObjectPtr& other) { BufferedIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(IndexInput::clone(other))); cloneIndexInput->bufferSize = bufferSize; cloneIndexInput->buffer.reset(); cloneIndexInput->bufferLength = 0; cloneIndexInput->bufferPosition = 0; cloneIndexInput->bufferStart = getFilePointer(); return cloneIndexInput; } } LucenePlusPlus-rel_3.0.9/src/core/store/BufferedIndexOutput.cpp000066400000000000000000000053721456444476200246510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BufferedIndexOutput.h" #include "MiscUtils.h" namespace Lucene { const int32_t BufferedIndexOutput::BUFFER_SIZE = 16384; BufferedIndexOutput::BufferedIndexOutput() { bufferStart = 0; bufferPosition = 0; buffer = ByteArray::newInstance(BUFFER_SIZE); } BufferedIndexOutput::~BufferedIndexOutput() { } void BufferedIndexOutput::writeByte(uint8_t b) { if (bufferPosition >= BUFFER_SIZE) { flush(); } buffer[bufferPosition++] = b; } void BufferedIndexOutput::writeBytes(const uint8_t* b, int32_t offset, int32_t length) { int32_t bytesLeft = BUFFER_SIZE - bufferPosition; if (bytesLeft >= length) { // we add the data to the end of the buffer MiscUtils::arrayCopy(b, offset, buffer.get(), bufferPosition, length); bufferPosition += length; // if the buffer is full, flush it if (BUFFER_SIZE - bufferPosition == 0) { flush(); } } else if (length > BUFFER_SIZE) { // we flush the buffer if (bufferPosition > 0) { flush(); } // and write data at once flushBuffer(b, offset, length); bufferStart += length; } else { // we fill/flush the buffer (until the input is written) int32_t pos = 0; // position in the input data int32_t pieceLength; while (pos < length) { pieceLength = (length - pos < bytesLeft) ? length - pos : bytesLeft; MiscUtils::arrayCopy(b, pos + offset, buffer.get(), bufferPosition, pieceLength); pos += pieceLength; bufferPosition += pieceLength; // if the buffer is full, flush it bytesLeft = BUFFER_SIZE - bufferPosition; if (bytesLeft == 0) { flush(); bytesLeft = BUFFER_SIZE; } } } } void BufferedIndexOutput::flush() { flushBuffer(buffer.get(), bufferPosition); bufferStart += bufferPosition; bufferPosition = 0; } void BufferedIndexOutput::flushBuffer(const uint8_t* b, int32_t length) { flushBuffer(b, 0, length); } void BufferedIndexOutput::flushBuffer(const uint8_t* b, int32_t offset, int32_t length) { // override } void BufferedIndexOutput::close() { flush(); } int64_t BufferedIndexOutput::getFilePointer() { return bufferStart + bufferPosition; } void BufferedIndexOutput::seek(int64_t pos) { flush(); bufferStart = pos; } } LucenePlusPlus-rel_3.0.9/src/core/store/ChecksumIndexInput.cpp000066400000000000000000000031451456444476200244640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ChecksumIndexInput.h" namespace Lucene { ChecksumIndexInput::ChecksumIndexInput(const IndexInputPtr& main) { this->main = main; } ChecksumIndexInput::~ChecksumIndexInput() { } uint8_t ChecksumIndexInput::readByte() { uint8_t b = main->readByte(); checksum.process_byte(b); return b; } void ChecksumIndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length) { main->readBytes(b, offset, length); checksum.process_bytes(b + offset, length); } int64_t ChecksumIndexInput::getChecksum() { return checksum.checksum(); } void ChecksumIndexInput::close() { main->close(); } int64_t ChecksumIndexInput::getFilePointer() { return main->getFilePointer(); } void ChecksumIndexInput::seek(int64_t pos) { boost::throw_exception(RuntimeException(L"Seek not allowed")); } int64_t ChecksumIndexInput::length() { return main->length(); } LuceneObjectPtr ChecksumIndexInput::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = IndexInput::clone(other ? other : newLucene(main)); ChecksumIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(clone)); cloneIndexInput->main = main; cloneIndexInput->checksum = checksum; return cloneIndexInput; } } LucenePlusPlus-rel_3.0.9/src/core/store/ChecksumIndexOutput.cpp000066400000000000000000000035131456444476200246640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ChecksumIndexOutput.h" namespace Lucene { ChecksumIndexOutput::ChecksumIndexOutput(const IndexOutputPtr& main) { this->main = main; } ChecksumIndexOutput::~ChecksumIndexOutput() { } void ChecksumIndexOutput::writeByte(uint8_t b) { checksum.process_byte(b); main->writeByte(b); } void ChecksumIndexOutput::writeBytes(const uint8_t* b, int32_t offset, int32_t length) { checksum.process_bytes(b + offset, length); main->writeBytes(b, offset, length); } int64_t ChecksumIndexOutput::getChecksum() { return checksum.checksum(); } void ChecksumIndexOutput::flush() { main->flush(); } void ChecksumIndexOutput::close() { main->close(); } int64_t ChecksumIndexOutput::getFilePointer() { return main->getFilePointer(); } void ChecksumIndexOutput::seek(int64_t pos) { boost::throw_exception(RuntimeException(L"Seek not allowed")); } void ChecksumIndexOutput::prepareCommit() { int64_t checksum = getChecksum(); // Intentionally write a mismatched checksum. This is because we want to 1) test, as best we can, that we // are able to write a long to the file, but 2) not actually "commit" the file yet. This (prepare commit) // is phase 1 of a two-phase commit. int64_t pos = main->getFilePointer(); main->writeLong(checksum - 1); main->flush(); main->seek(pos); } void ChecksumIndexOutput::finishCommit() { main->writeLong(getChecksum()); } int64_t ChecksumIndexOutput::length() { return main->length(); } } LucenePlusPlus-rel_3.0.9/src/core/store/Directory.cpp000066400000000000000000000061221456444476200226540ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Directory.h" #include "LockFactory.h" #include "BufferedIndexOutput.h" #include "IndexFileNameFilter.h" #include "IndexInput.h" #include "IndexOutput.h" namespace Lucene { Directory::Directory() { isOpen = true; } Directory::~Directory() { } void Directory::close() { // override } void Directory::sync(const String& name) { } IndexInputPtr Directory::openInput(const String& name, int32_t bufferSize) { return openInput(name); } LockPtr Directory::makeLock(const String& name) { return lockFactory->makeLock(name); } void Directory::clearLock(const String& name) { if (lockFactory) { lockFactory->clearLock(name); } } void Directory::setLockFactory(const LockFactoryPtr& lockFactory) { BOOST_ASSERT(lockFactory); this->lockFactory = lockFactory; this->lockFactory->setLockPrefix(getLockID()); } LockFactoryPtr Directory::getLockFactory() { return lockFactory; } String Directory::getLockID() { return toString(); } String Directory::toString() { return LuceneObject::toString() + L" lockFactory=" + getLockFactory()->toString(); } void Directory::copy(const DirectoryPtr& src, const DirectoryPtr& dest, bool closeDirSrc) { HashSet files(src->listAll()); ByteArray buf(ByteArray::newInstance(BufferedIndexOutput::BUFFER_SIZE)); for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { if (!IndexFileNameFilter::accept(L"", *file)) { continue; } IndexOutputPtr os; IndexInputPtr is; LuceneException finally; try { // create file in dest directory os = dest->createOutput(*file); // read current file is = src->openInput(*file); // and copy to dest directory int64_t len = is->length(); int64_t readCount = 0; while (readCount < len) { int32_t toRead = readCount + BufferedIndexOutput::BUFFER_SIZE > len ? (int32_t)(len - readCount) : BufferedIndexOutput::BUFFER_SIZE; is->readBytes(buf.get(), 0, toRead); os->writeBytes(buf.get(), toRead); readCount += toRead; } } catch (LuceneException& e) { finally = e; } // graceful cleanup try { if (os) { os->close(); } } catch (...) { } try { if (is) { is->close(); } } catch (...) { } finally.throwException(); } if (closeDirSrc) { src->close(); } } void Directory::ensureOpen() { if (!isOpen) { boost::throw_exception(AlreadyClosedException(L"This directory is closed")); } } } LucenePlusPlus-rel_3.0.9/src/core/store/FSDirectory.cpp000066400000000000000000000154631456444476200231150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FSDirectory.h" #include "NativeFSLockFactory.h" #include "SimpleFSDirectory.h" #include "BufferedIndexInput.h" #include "LuceneThread.h" #include "FileUtils.h" #include "StringUtils.h" #if defined(_WIN32) #include #elif defined(__APPLE__) #include #else #include #endif #include extern "C" { #include "../util/md5/md5.h" } namespace Lucene { /// Default read chunk size. This is a conditional default based on operating system. #ifdef LPP_BUILD_64 const int32_t FSDirectory::DEFAULT_READ_CHUNK_SIZE = INT_MAX; #else const int32_t FSDirectory::DEFAULT_READ_CHUNK_SIZE = 100 * 1024 * 1024; // 100mb #endif FSDirectory::FSDirectory(const String& path, const LockFactoryPtr& lockFactory) { checked = false; chunkSize = DEFAULT_READ_CHUNK_SIZE; LockFactoryPtr _lockFactory(lockFactory); // new ctors use always NativeFSLockFactory as default if (!_lockFactory) { _lockFactory = newLucene(); } directory = path; if (FileUtils::fileExists(directory) && !FileUtils::isDirectory(directory)) { boost::throw_exception(NoSuchDirectoryException(L"File '" + directory + L"' exists but is not a directory")); } setLockFactory(_lockFactory); // for filesystem based LockFactory, delete the lockPrefix if the locks are placed // in index dir. if no index dir is given, set ourselves FSLockFactoryPtr lf(boost::dynamic_pointer_cast(_lockFactory)); if (lf) { if (lf->getLockDir().empty()) { lf->setLockDir(directory); lf->setLockPrefix(L""); } else if (lf->getLockDir() == directory) { lf->setLockPrefix(L""); } } } FSDirectory::~FSDirectory() { } FSDirectoryPtr FSDirectory::open(const String& path) { return open(path, LockFactoryPtr()); } FSDirectoryPtr FSDirectory::open(const String& path, const LockFactoryPtr& lockFactory) { return newLucene(path, lockFactory); } void FSDirectory::createDir() { if (!checked) { if (!FileUtils::fileExists(directory) && !FileUtils::createDirectory(directory)) { boost::throw_exception(IOException(L"Cannot create directory: " + directory)); } checked = true; } } void FSDirectory::initOutput(const String& name) { ensureOpen(); createDir(); String path(FileUtils::joinPath(directory, name)); if (FileUtils::fileExists(path) && !FileUtils::removeFile(path)) { // delete existing, if any boost::throw_exception(IOException(L"Cannot overwrite: " + name)); } } HashSet FSDirectory::listAll(const String& dir) { if (!FileUtils::fileExists(dir)) { boost::throw_exception(NoSuchDirectoryException(L"Directory '" + dir + L"' does not exist")); } else if (!FileUtils::isDirectory(dir)) { boost::throw_exception(NoSuchDirectoryException(L"File '" + dir + L"' exists but is not a directory")); } HashSet result(HashSet::newInstance()); // Exclude subdirs if (!FileUtils::listDirectory(dir, true, result)) { boost::throw_exception(IOException(L"Directory '" + dir + L"' exists and is a directory, but cannot be listed")); } return result; } HashSet FSDirectory::listAll() { ensureOpen(); return listAll(directory); } bool FSDirectory::fileExists(const String& name) { ensureOpen(); return FileUtils::fileExists(FileUtils::joinPath(directory, name)); } uint64_t FSDirectory::fileModified(const String& name) { ensureOpen(); return FileUtils::fileModified(FileUtils::joinPath(directory, name)); } uint64_t FSDirectory::fileModified(const String& directory, const String& name) { return FileUtils::fileModified(FileUtils::joinPath(directory, name)); } void FSDirectory::touchFile(const String& name) { ensureOpen(); FileUtils::touchFile(FileUtils::joinPath(directory, name)); } void FSDirectory::deleteFile(const String& name) { ensureOpen(); if (!FileUtils::removeFile(FileUtils::joinPath(directory, name))) { boost::throw_exception(IOException(L"Cannot delete: " + name)); } } int64_t FSDirectory::fileLength(const String& name) { ensureOpen(); return FileUtils::fileLength(FileUtils::joinPath(directory, name)); } void FSDirectory::sync(const String& name) { ensureOpen(); String path(FileUtils::joinPath(directory, name)); bool success = false; for (int32_t retryCount = 0; retryCount < 5; ++retryCount) { boost::iostreams::file_descriptor syncFile; try { syncFile.open(boost::filesystem::path(path)); } catch (...) { } if (syncFile.is_open()) { boost::iostreams::file_descriptor::handle_type fd = syncFile.handle(); #if defined(_WIN32) bool ok = ::FlushFileBuffers(fd) != 0; #elif defined(__APPLE__) bool ok = fcntl(fd, F_FULLFSYNC) == 0; #else bool ok = fsync(fd) == 0; #endif syncFile.close(); if (ok) success = true; break; } LuceneThread::threadSleep(5); // pause 5 msec } if (!success) { boost::throw_exception(IOException(L"Sync failure: " + path)); } } IndexInputPtr FSDirectory::openInput(const String& name) { ensureOpen(); return openInput(name, BufferedIndexInput::BUFFER_SIZE); } IndexInputPtr FSDirectory::openInput(const String& name, int32_t bufferSize) { return Directory::openInput(name, bufferSize); } String FSDirectory::getLockID() { ensureOpen(); md5_state_t state; md5_byte_t digest[16]; md5_init(&state); md5_append(&state, (const md5_byte_t*)StringUtils::toUTF8(directory).c_str(), directory.size()); md5_finish(&state, digest); static const wchar_t* hexDigits = L"0123456789abcdef"; String lockID(L"lucene-"); for (int32_t i = 0; i < 16; ++i) { lockID += hexDigits[(digest[i] >> 4) & 0x0f]; lockID += hexDigits[digest[i] & 0x0f]; } return lockID; } void FSDirectory::close() { SyncLock syncLock(this); isOpen = false; } String FSDirectory::toString() { return getClassName() + L"@" + directory + L" lockFactory=" + getLockFactory()->toString(); } String FSDirectory::getFile() { ensureOpen(); return directory; } void FSDirectory::setReadChunkSize(int32_t chunkSize) { #ifndef LPP_BUILD_64 this->chunkSize = chunkSize; #endif } int32_t FSDirectory::getReadChunkSize() { return chunkSize; } } LucenePlusPlus-rel_3.0.9/src/core/store/FSLockFactory.cpp000066400000000000000000000014271456444476200233640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FSLockFactory.h" namespace Lucene { FSLockFactory::FSLockFactory() { } FSLockFactory::~FSLockFactory() { } void FSLockFactory::setLockDir(const String& lockDir) { if (!this->lockDir.empty()) { boost::throw_exception(IllegalStateException(L"You can set the lock directory for this factory only once.")); } this->lockDir = lockDir; } String FSLockFactory::getLockDir() { return lockDir; } } LucenePlusPlus-rel_3.0.9/src/core/store/FileSwitchDirectory.cpp000066400000000000000000000054671456444476200246510ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FileSwitchDirectory.h" namespace Lucene { FileSwitchDirectory::FileSwitchDirectory(HashSet primaryExtensions, const DirectoryPtr& primaryDir, const DirectoryPtr& secondaryDir, bool doClose) { this->primaryExtensions = primaryExtensions; this->primaryDir = primaryDir; this->secondaryDir = secondaryDir; this->doClose = doClose; this->lockFactory = primaryDir->getLockFactory(); } FileSwitchDirectory::~FileSwitchDirectory() { } DirectoryPtr FileSwitchDirectory::getPrimaryDir() { return primaryDir; } DirectoryPtr FileSwitchDirectory::getSecondaryDir() { return secondaryDir; } void FileSwitchDirectory::close() { if (doClose) { LuceneException finally; try { secondaryDir->close(); } catch (LuceneException& e) { finally = e; } doClose = false; primaryDir->close(); finally.throwException(); } } HashSet FileSwitchDirectory::listAll() { HashSet primaryFiles(primaryDir->listAll()); HashSet secondaryFiles(secondaryDir->listAll()); HashSet files(HashSet::newInstance(primaryFiles.begin(), primaryFiles.end())); files.addAll(secondaryFiles.begin(), secondaryFiles.end()); return files; } String FileSwitchDirectory::getExtension(const String& name) { String::size_type i = name.find_last_of(L'.'); return i == String::npos ? L"" : name.substr(i + 1); } DirectoryPtr FileSwitchDirectory::getDirectory(const String& name) { return primaryExtensions.contains(getExtension(name)) ? primaryDir : secondaryDir; } bool FileSwitchDirectory::fileExists(const String& name) { return getDirectory(name)->fileExists(name); } uint64_t FileSwitchDirectory::fileModified(const String& name) { return getDirectory(name)->fileModified(name); } void FileSwitchDirectory::touchFile(const String& name) { getDirectory(name)->touchFile(name); } void FileSwitchDirectory::deleteFile(const String& name) { getDirectory(name)->deleteFile(name); } int64_t FileSwitchDirectory::fileLength(const String& name) { return getDirectory(name)->fileLength(name); } IndexOutputPtr FileSwitchDirectory::createOutput(const String& name) { return getDirectory(name)->createOutput(name); } void FileSwitchDirectory::sync(const String& name) { getDirectory(name)->sync(name); } IndexInputPtr FileSwitchDirectory::openInput(const String& name) { return getDirectory(name)->openInput(name); } } LucenePlusPlus-rel_3.0.9/src/core/store/IndexInput.cpp000066400000000000000000000075271456444476200230110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "IndexInput.h" #include "UTF8Stream.h" #include "Reader.h" #include "StringUtils.h" namespace Lucene { IndexInput::IndexInput() { preUTF8Strings = false; } IndexInput::~IndexInput() { } void IndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length, bool useBuffer) { // default to ignoring useBuffer entirely readBytes(b, offset, length); } int32_t IndexInput::readInt() { int32_t i = (readByte() & 0xff) << 24; i |= (readByte() & 0xff) << 16; i |= (readByte() & 0xff) << 8; i |= (readByte() & 0xff); return i; } int32_t IndexInput::readVInt() { uint8_t b = readByte(); int32_t i = (b & 0x7f); for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) { b = readByte(); i |= (b & 0x7f) << shift; } return i; } int64_t IndexInput::readLong() { int64_t i = (int64_t)readInt() << 32; i |= (readInt() & 0xffffffffLL); return i; } int64_t IndexInput::readVLong() { uint8_t b = readByte(); int64_t i = (b & 0x7f); for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) { b = readByte(); i |= (int64_t)(b & 0x7f) << shift; } return i; } void IndexInput::setModifiedUTF8StringsMode() { preUTF8Strings = true; } String IndexInput::readString() { if (preUTF8Strings) { return readModifiedUTF8String(); } int32_t length = readVInt(); ByteArray bytes(ByteArray::newInstance(length)); readBytes(bytes.get(), 0, length); return StringUtils::toUnicode(bytes.get(), length); } String IndexInput::readModifiedUTF8String() { int32_t length = readVInt(); CharArray chars(CharArray::newInstance(length)); return String(chars.get(), readChars(chars.get(), 0, length)); } int32_t IndexInput::readChars(wchar_t* buffer, int32_t start, int32_t length) { Array chars(Array::newInstance(length)); for (int32_t i = 0; i < length; ++i) { uint8_t b = readByte(); if ((b & 0x80) == 0) { chars[i] = (uint16_t)(b & 0x7f); } else if ((b & 0xe0) != 0xe0) { chars[i] = (uint16_t)(((b & 0x1f) << 6) | (readByte() & 0x3f)); } else { uint32_t ch = ((b & 0x0f) << 12); ch |= (readByte() & 0x3f) << 6; ch |= (readByte() & 0x3f); chars[i] = (uint16_t)ch; } } UTF16DecoderPtr utf16Decoder(newLucene(chars.get(), chars.get() + length)); int32_t decodeLength = utf16Decoder->decode(buffer + start, length); return decodeLength == Reader::READER_EOF ? 0 : decodeLength; } void IndexInput::skipChars(int32_t length) { for (int32_t i = 0; i < length; ++i) { uint8_t b = readByte(); if ((b & 0x80) == 0) { // do nothing, we only need one byte } else if ((b & 0xe0) != 0xe0) { readByte(); // read an additional byte } else { // read two additional bytes readByte(); readByte(); } } } MapStringString IndexInput::readStringStringMap() { MapStringString map(MapStringString::newInstance()); int32_t count = readInt(); for (int32_t i = 0; i < count; ++i) { String key(readString()); String val(readString()); map.put(key, val); } return map; } LuceneObjectPtr IndexInput::clone(const LuceneObjectPtr& other) { IndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(LuceneObject::clone(other))); cloneIndexInput->preUTF8Strings = preUTF8Strings; return cloneIndexInput; } } LucenePlusPlus-rel_3.0.9/src/core/store/IndexOutput.cpp000066400000000000000000000061701456444476200232030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "IndexOutput.h" #include "IndexInput.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "StringUtils.h" namespace Lucene { const int32_t IndexOutput::COPY_BUFFER_SIZE = 16384; IndexOutput::~IndexOutput() { } void IndexOutput::writeBytes(const uint8_t* b, int32_t length) { writeBytes(b, 0, length); } void IndexOutput::writeInt(int32_t i) { writeByte((uint8_t)(i >> 24)); writeByte((uint8_t)(i >> 16)); writeByte((uint8_t)(i >> 8)); writeByte((uint8_t)i); } void IndexOutput::writeVInt(int32_t i) { while ((i & ~0x7f) != 0) { writeByte((uint8_t)((i & 0x7f) | 0x80)); i = MiscUtils::unsignedShift(i, 7); } writeByte((uint8_t)i); } void IndexOutput::writeLong(int64_t i) { writeInt((int32_t)(i >> 32)); writeInt((int32_t)i); } void IndexOutput::writeVLong(int64_t i) { while ((i & ~0x7f) != 0) { writeByte((uint8_t)((i & 0x7f) | 0x80)); i = MiscUtils::unsignedShift(i, (int64_t)7); } writeByte((uint8_t)i); } void IndexOutput::writeString(const String& s) { UTF8ResultPtr utf8Result(newLucene()); StringUtils::toUTF8(s.c_str(), s.length(), utf8Result); writeVInt(utf8Result->length); writeBytes(utf8Result->result.get(), utf8Result->length); } void IndexOutput::writeChars(const String& s, int32_t start, int32_t length) { int32_t end = start + length; for (int32_t i = start; i < end; ++i) { int32_t code = (int32_t)s[i]; if (code >= 0x01 && code <= 0x7f) { writeByte((uint8_t)code); } else if (((code >= 0x80) && (code <= 0x7ff)) || code == 0) { writeByte((uint8_t)(0xc0 | (code >> 6))); writeByte((uint8_t)(0x80 | (code & 0x3f))); } else { writeByte((uint8_t)(0xe0 | MiscUtils::unsignedShift(code, 12))); writeByte((uint8_t)(0x80 | ((code >> 6) & 0x3f))); writeByte((uint8_t)(0x80 | (code & 0x3f))); } } } void IndexOutput::copyBytes(const IndexInputPtr& input, int64_t numBytes) { BOOST_ASSERT(numBytes >= 0); int64_t left = numBytes; if (!copyBuffer) { copyBuffer = ByteArray::newInstance(COPY_BUFFER_SIZE); } while (left > 0) { int32_t toCopy = left > COPY_BUFFER_SIZE ? COPY_BUFFER_SIZE : (int32_t)left; input->readBytes(copyBuffer.get(), 0, toCopy); writeBytes(copyBuffer.get(), 0, toCopy); left -= toCopy; } } void IndexOutput::setLength(int64_t length) { } void IndexOutput::writeStringStringMap(MapStringString map) { if (!map) { writeInt(0); } else { writeInt(map.size()); for (MapStringString::iterator entry = map.begin(); entry != map.end(); ++entry) { writeString(entry->first); writeString(entry->second); } } } } LucenePlusPlus-rel_3.0.9/src/core/store/Lock.cpp000066400000000000000000000023101456444476200215730ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Lock.h" #include "LuceneThread.h" namespace Lucene { /// How long {@link #obtain(int64_t)} waits, in milliseconds, in between attempts to acquire the lock. const int32_t Lock::LOCK_OBTAIN_WAIT_FOREVER = -1; /// Pass this value to {@link #obtain(int64_t)} to try forever to obtain the lock. const int32_t Lock::LOCK_POLL_INTERVAL = 1000; Lock::~Lock() { } bool Lock::obtain(int32_t lockWaitTimeout) { bool locked = obtain(); int32_t maxSleepCount = lockWaitTimeout / LOCK_POLL_INTERVAL; int32_t sleepCount = 0; while (!locked) { if (lockWaitTimeout != LOCK_OBTAIN_WAIT_FOREVER && sleepCount++ >= maxSleepCount) { boost::throw_exception(LockObtainFailedException(L"Lock obtain timed out")); } LuceneThread::threadSleep(LOCK_POLL_INTERVAL); locked = obtain(); } return locked; } } LucenePlusPlus-rel_3.0.9/src/core/store/LockFactory.cpp000066400000000000000000000011351456444476200231270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LockFactory.h" namespace Lucene { LockFactory::~LockFactory() { } void LockFactory::setLockPrefix(const String& lockPrefix) { this->lockPrefix = lockPrefix; } String LockFactory::getLockPrefix() { return lockPrefix; } } LucenePlusPlus-rel_3.0.9/src/core/store/MMapDirectory.cpp000066400000000000000000000055731456444476200234400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MMapDirectory.h" #include "_MMapDirectory.h" #include "SimpleFSDirectory.h" #include "_SimpleFSDirectory.h" #include "MiscUtils.h" #include "FileUtils.h" #include "StringUtils.h" namespace Lucene { MMapDirectory::MMapDirectory(const String& path, const LockFactoryPtr& lockFactory) : FSDirectory(path, lockFactory) { } MMapDirectory::~MMapDirectory() { } IndexInputPtr MMapDirectory::openInput(const String& name, int32_t bufferSize) { ensureOpen(); return newLucene(FileUtils::joinPath(directory, name)); } IndexOutputPtr MMapDirectory::createOutput(const String& name) { initOutput(name); return newLucene(FileUtils::joinPath(directory, name)); } MMapIndexInput::MMapIndexInput(const String& path) { _length = path.empty() ? 0 : (int32_t)FileUtils::fileLength(path); bufferPosition = 0; if (!path.empty()) { try { file.open(boost::filesystem::wpath(path), _length); } catch (...) { boost::throw_exception(FileNotFoundException(path)); } } isClone = false; } MMapIndexInput::~MMapIndexInput() { } uint8_t MMapIndexInput::readByte() { try { return file.data()[bufferPosition++]; } catch (...) { boost::throw_exception(IOException(L"Read past EOF")); return 0; } } void MMapIndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length) { try { MiscUtils::arrayCopy(file.data(), bufferPosition, b, offset, length); bufferPosition += length; } catch (...) { boost::throw_exception(IOException(L"Read past EOF")); } } int64_t MMapIndexInput::getFilePointer() { return bufferPosition; } void MMapIndexInput::seek(int64_t pos) { bufferPosition = (int32_t)pos; } int64_t MMapIndexInput::length() { return (int64_t)_length; } void MMapIndexInput::close() { if (isClone || !file.is_open()) { return; } _length = 0; bufferPosition = 0; file.close(); } LuceneObjectPtr MMapIndexInput::clone(const LuceneObjectPtr& other) { if (!file.is_open()) { boost::throw_exception(AlreadyClosedException(L"MMapIndexInput already closed")); } LuceneObjectPtr clone = IndexInput::clone(other ? other : newLucene()); MMapIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(clone)); cloneIndexInput->_length = _length; cloneIndexInput->file = file; cloneIndexInput->bufferPosition = bufferPosition; cloneIndexInput->isClone = true; return cloneIndexInput; } } LucenePlusPlus-rel_3.0.9/src/core/store/NativeFSLockFactory.cpp000066400000000000000000000151101456444476200245250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include #include "NativeFSLockFactory.h" #include "_NativeFSLockFactory.h" #include "Random.h" #include "FileUtils.h" #include "StringUtils.h" namespace Lucene { NativeFSLockFactory::NativeFSLockFactory(const String& lockDirName) { setLockDir(lockDirName); } NativeFSLockFactory::~NativeFSLockFactory() { } LockPtr NativeFSLockFactory::makeLock(const String& lockName) { SyncLock syncLock(this); return newLucene(lockDir, lockPrefix.empty() ? lockName : lockPrefix + L"-" + lockName); } void NativeFSLockFactory::clearLock(const String& lockName) { // note that this isn't strictly required anymore because the existence of these files does not mean // they are locked, but still do this in case people really want to see the files go away if (FileUtils::isDirectory(lockDir)) { String lockPath(FileUtils::joinPath(lockDir, lockPrefix.empty() ? lockName : lockPrefix + L"-" + lockName)); if (FileUtils::fileExists(lockPath) && !FileUtils::removeFile(lockPath)) { boost::throw_exception(IOException(L"Failed to delete: " + lockPath)); } } } NativeFSLock::NativeFSLock(const String& lockDir, const String& lockFileName) { this->lockDir = lockDir; path = FileUtils::joinPath(lockDir, lockFileName); } NativeFSLock::~NativeFSLock() { try { release(); } catch (...) { } } SynchronizePtr NativeFSLock::LOCK_HELD_LOCK() { static SynchronizePtr _LOCK_HELD_LOCK; LUCENE_RUN_ONCE( _LOCK_HELD_LOCK = newInstance(); ); return _LOCK_HELD_LOCK; } HashSet NativeFSLock::LOCK_HELD() { static HashSet _LOCK_HELD; LUCENE_RUN_ONCE( _LOCK_HELD = HashSet::newInstance(); ); return _LOCK_HELD; } bool NativeFSLock::lockExists() { SyncLock syncLock(this); return lock.get() != NULL; } bool NativeFSLock::obtain() { SyncLock syncLock(this); if (lockExists()) // our instance is already locked { return false; } // ensure that lockdir exists and is a directory if (!FileUtils::fileExists(lockDir)) { if (!FileUtils::createDirectory(lockDir)) { boost::throw_exception(IOException(L"Cannot create directory: " + lockDir)); } } else if (!FileUtils::isDirectory(lockDir)) { boost::throw_exception(IOException(L"Found regular file where directory expected: " + lockDir)); } bool markedHeld = false; // make sure nobody else in-process has this lock held already and mark it held if not { SyncLock heldLock(LOCK_HELD_LOCK()); if (LOCK_HELD().contains(path)) // someone else already has the lock { return false; } else { // this "reserves" the fact that we are the one thread trying to obtain this lock, so we own the // only instance of a channel against this file LOCK_HELD().add(path); markedHeld = true; } } try { // we can get intermittent "access denied" here, so we treat this as failure to acquire the lock boost::filesystem::ofstream f(path, std::ios::binary | std::ios::out); if (f.is_open()) { std::string lockpath; // file_lock only accepts char* filenames and we cannot losslessly convert Unicode paths to // char*. The usual way to work around this is to use 8.3 short names. #if defined(_WIN32) || defined(_WIN64) wchar_t pathOut[MAX_PATH+1]; if (::GetShortPathNameW(path.c_str(), pathOut, MAX_PATH+1) != 0) { lockpath = boost::filesystem::path(pathOut).string(); } else #endif // Windows { lockpath = boost::filesystem::path(path).string(); } lock = newInstance(lockpath.c_str()); lock->lock(); } } catch (...) { lock.reset(); } if (markedHeld && !lockExists()) { SyncLock heldLock(LOCK_HELD_LOCK()); LOCK_HELD().remove(path); } return lockExists(); } void NativeFSLock::release() { SyncLock syncLock(this); if (lockExists()) { try { lock->unlock(); lock.reset(); } catch (...) { } { SyncLock heldLock(LOCK_HELD_LOCK()); LOCK_HELD().remove(path); } // we don't care anymore if the file cannot be deleted because it's held up by another process // (eg. AntiVirus). NativeFSLock does not depend on the existence/absence of the lock file FileUtils::removeFile(path); } else { // if we don't hold the lock, and somebody still called release(), for example as a result of // calling IndexWriter.unlock(), we should attempt to obtain the lock and release it. If the // obtain fails, it means the lock cannot be released, and we should throw a proper exception // rather than silently failing/not doing anything. bool obtained = false; LuceneException finally; try { obtained = obtain(); if (!obtained) { boost::throw_exception(LockReleaseFailedException(L"Cannot forcefully unlock a NativeFSLock which is held by another indexer component: " + path)); } } catch (LuceneException& e) { finally = e; } if (obtained) { release(); } finally.throwException(); } } bool NativeFSLock::isLocked() { SyncLock syncLock(this); // the test for is islocked is not directly possible with native file locks // first a shortcut, if a lock reference in this instance is available if (lockExists()) { return true; } // look if lock file is present; if not, there can definitely be no lock! if (!FileUtils::fileExists(path)) { return false; } // try to obtain and release (if was locked) the lock try { bool obtained = obtain(); if (obtained) { release(); } return !obtained; } catch (LuceneException&) { return false; } } String NativeFSLock::toString() { return getClassName() + L"@" + path; } } LucenePlusPlus-rel_3.0.9/src/core/store/NoLockFactory.cpp000066400000000000000000000025121456444476200234240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NoLockFactory.h" #include "_NoLockFactory.h" namespace Lucene { NoLockFactory::~NoLockFactory() { } NoLockFactoryPtr NoLockFactory::getNoLockFactory() { static NoLockFactoryPtr singleton; LUCENE_RUN_ONCE( singleton = newLucene(); CycleCheck::addStatic(singleton); ); return singleton; } NoLockPtr NoLockFactory::getSingletonLock() { // Single instance returned whenever makeLock is called. static NoLockPtr singletonLock; LUCENE_RUN_ONCE( singletonLock = newLucene(); CycleCheck::addStatic(singletonLock); ); return singletonLock; } LockPtr NoLockFactory::makeLock(const String& lockName) { return getSingletonLock(); } void NoLockFactory::clearLock(const String& lockName) { } NoLock::~NoLock() { } bool NoLock::obtain() { return true; } void NoLock::release() { } bool NoLock::isLocked() { return false; } String NoLock::toString() { return getClassName(); } } LucenePlusPlus-rel_3.0.9/src/core/store/RAMDirectory.cpp000066400000000000000000000110671456444476200232200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "RAMDirectory.h" #include "RAMFile.h" #include "RAMInputStream.h" #include "RAMOutputStream.h" #include "SingleInstanceLockFactory.h" #include "LuceneThread.h" #include "MiscUtils.h" namespace Lucene { RAMDirectory::RAMDirectory() { this->fileMap = MapStringRAMFile::newInstance(); this->_sizeInBytes = 0; this->copyDirectory = false; this->closeDir = false; setLockFactory(newLucene()); } RAMDirectory::RAMDirectory(const DirectoryPtr& dir) { this->fileMap = MapStringRAMFile::newInstance(); this->_sizeInBytes = 0; this->copyDirectory = true; this->_dirSource = dir; this->closeDir = false; setLockFactory(newLucene()); } RAMDirectory::RAMDirectory(const DirectoryPtr& dir, bool closeDir) { this->fileMap = MapStringRAMFile::newInstance(); this->_sizeInBytes = 0; this->copyDirectory = true; this->_dirSource = dir; this->closeDir = closeDir; setLockFactory(newLucene()); } RAMDirectory::~RAMDirectory() { } void RAMDirectory::initialize() { if (copyDirectory) { Directory::copy(DirectoryPtr(_dirSource), shared_from_this(), closeDir); } } HashSet RAMDirectory::listAll() { SyncLock syncLock(this); ensureOpen(); HashSet result(HashSet::newInstance()); for (MapStringRAMFile::iterator fileName = fileMap.begin(); fileName != fileMap.end(); ++fileName) { result.add(fileName->first); } return result; } bool RAMDirectory::fileExists(const String& name) { ensureOpen(); SyncLock syncLock(this); return fileMap.contains(name); } uint64_t RAMDirectory::fileModified(const String& name) { ensureOpen(); SyncLock syncLock(this); MapStringRAMFile::iterator ramFile = fileMap.find(name); if (ramFile == fileMap.end()) { boost::throw_exception(FileNotFoundException(name)); } return ramFile->second->getLastModified(); } void RAMDirectory::touchFile(const String& name) { ensureOpen(); RAMFilePtr file; { SyncLock syncLock(this); MapStringRAMFile::iterator ramFile = fileMap.find(name); if (ramFile == fileMap.end()) { boost::throw_exception(FileNotFoundException(name)); } file = ramFile->second; } int64_t ts1 = MiscUtils::currentTimeMillis(); while (ts1 == MiscUtils::currentTimeMillis()) { LuceneThread::threadSleep(1); } file->setLastModified(MiscUtils::currentTimeMillis()); } int64_t RAMDirectory::fileLength(const String& name) { ensureOpen(); SyncLock syncLock(this); MapStringRAMFile::iterator ramFile = fileMap.find(name); if (ramFile == fileMap.end()) { boost::throw_exception(FileNotFoundException(name)); } return ramFile->second->getLength(); } int64_t RAMDirectory::sizeInBytes() { SyncLock syncLock(this); ensureOpen(); return _sizeInBytes; } void RAMDirectory::deleteFile(const String& name) { SyncLock syncLock(this); ensureOpen(); MapStringRAMFile::iterator ramFile = fileMap.find(name); if (ramFile == fileMap.end()) { boost::throw_exception(FileNotFoundException(name)); } _sizeInBytes -= ramFile->second->getSizeInBytes(); fileMap.remove(name); } IndexOutputPtr RAMDirectory::createOutput(const String& name) { ensureOpen(); RAMFilePtr file(newLucene(shared_from_this())); { SyncLock syncLock(this); MapStringRAMFile::iterator existing = fileMap.find(name); if (existing != fileMap.end()) { _sizeInBytes -= existing->second->getSizeInBytes(); existing->second->_directory.reset(); } fileMap.put(name, file); } return newLucene(file); } IndexInputPtr RAMDirectory::openInput(const String& name) { ensureOpen(); RAMFilePtr file; { SyncLock syncLock(this); MapStringRAMFile::iterator ramFile = fileMap.find(name); if (ramFile == fileMap.end()) { boost::throw_exception(FileNotFoundException(name)); } file = ramFile->second; } return newLucene(file); } void RAMDirectory::close() { isOpen = false; fileMap.reset(); } } LucenePlusPlus-rel_3.0.9/src/core/store/RAMFile.cpp000066400000000000000000000040221456444476200221240ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "RAMFile.h" #include "RAMDirectory.h" #include "MiscUtils.h" namespace Lucene { RAMFile::RAMFile() { this->buffers = Collection::newInstance(); this->length = 0; this->sizeInBytes = 0; this->lastModified = MiscUtils::currentTimeMillis(); } RAMFile::RAMFile(const RAMDirectoryPtr& directory) { this->buffers = Collection::newInstance(); this->length = 0; this->sizeInBytes = 0; this->_directory = directory; this->lastModified = MiscUtils::currentTimeMillis(); } RAMFile::~RAMFile() { } int64_t RAMFile::getLength() { SyncLock syncLock(this); return length; } void RAMFile::setLength(int64_t length) { SyncLock syncLock(this); this->length = length; } int64_t RAMFile::getLastModified() { SyncLock syncLock(this); return lastModified; } void RAMFile::setLastModified(int64_t lastModified) { SyncLock syncLock(this); this->lastModified = lastModified; } ByteArray RAMFile::addBuffer(int32_t size) { ByteArray buffer(newBuffer(size)); { SyncLock syncLock(this); buffers.add(buffer); sizeInBytes += size; } RAMDirectoryPtr directory(_directory.lock()); if (directory) { SyncLock dirLock(directory); directory->_sizeInBytes += size; } return buffer; } ByteArray RAMFile::getBuffer(int32_t index) { SyncLock syncLock(this); return buffers[index]; } int32_t RAMFile::numBuffers() { SyncLock syncLock(this); return buffers.size(); } ByteArray RAMFile::newBuffer(int32_t size) { return ByteArray::newInstance(size); } int64_t RAMFile::getSizeInBytes() { SyncLock syncLock(this); return sizeInBytes; } } LucenePlusPlus-rel_3.0.9/src/core/store/RAMInputStream.cpp000066400000000000000000000075331456444476200235320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "RAMInputStream.h" #include "RAMFile.h" #include "RAMOutputStream.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { const int32_t RAMInputStream::BUFFER_SIZE = RAMOutputStream::BUFFER_SIZE; RAMInputStream::RAMInputStream() { _length = 0; // make sure that we switch to the first needed buffer lazily currentBufferIndex = -1; bufferPosition = 0; bufferStart = 0; bufferLength = 0; } RAMInputStream::RAMInputStream(const RAMFilePtr& f) { file = f; _length = file->length; if (_length / BUFFER_SIZE >= INT_MAX) { boost::throw_exception(IOException(L"Too large RAMFile: " + StringUtils::toString(_length))); } // make sure that we switch to the first needed buffer lazily currentBufferIndex = -1; bufferPosition = 0; bufferStart = 0; bufferLength = 0; } RAMInputStream::~RAMInputStream() { } void RAMInputStream::close() { // nothing to do here } int64_t RAMInputStream::length() { return _length; } uint8_t RAMInputStream::readByte() { if (bufferPosition >= bufferLength) { ++currentBufferIndex; switchCurrentBuffer(true); } return currentBuffer[bufferPosition++]; } void RAMInputStream::readBytes(uint8_t* b, int32_t offset, int32_t length) { while (length > 0) { if (bufferPosition >= bufferLength) { ++currentBufferIndex; switchCurrentBuffer(true); } int32_t remainInBuffer = bufferLength - bufferPosition; int32_t bytesToCopy = length < remainInBuffer ? length : remainInBuffer; MiscUtils::arrayCopy(currentBuffer.get(), bufferPosition, b, offset, bytesToCopy); offset += bytesToCopy; length -= bytesToCopy; bufferPosition += bytesToCopy; } } void RAMInputStream::switchCurrentBuffer(bool enforceEOF) { if (currentBufferIndex >= file->numBuffers()) { // end of file reached, no more buffers left if (enforceEOF) { boost::throw_exception(IOException(L"Read past EOF")); } else { // force eof if a read takes place at this position --currentBufferIndex; bufferPosition = BUFFER_SIZE; } } else { currentBuffer = file->getBuffer(currentBufferIndex); bufferPosition = 0; bufferStart = (int64_t)BUFFER_SIZE * (int64_t)currentBufferIndex; int64_t buflen = _length - bufferStart; bufferLength = buflen > BUFFER_SIZE ? BUFFER_SIZE : (int32_t)buflen; } } int64_t RAMInputStream::getFilePointer() { return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition; } void RAMInputStream::seek(int64_t pos) { if (!currentBuffer || (int32_t)pos < bufferStart || (int32_t)pos >= bufferStart + BUFFER_SIZE) { currentBufferIndex = (int32_t)(pos / BUFFER_SIZE); switchCurrentBuffer(false); } bufferPosition = (int32_t)(pos % BUFFER_SIZE); } LuceneObjectPtr RAMInputStream::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = IndexInput::clone(other ? other : newLucene()); RAMInputStreamPtr cloneInputStream(boost::dynamic_pointer_cast(clone)); cloneInputStream->file = file; cloneInputStream->_length = _length; cloneInputStream->currentBuffer = currentBuffer; cloneInputStream->currentBufferIndex = currentBufferIndex; cloneInputStream->bufferPosition = bufferPosition; cloneInputStream->bufferStart = bufferStart; cloneInputStream->bufferLength = bufferLength; return cloneInputStream; } } LucenePlusPlus-rel_3.0.9/src/core/store/RAMOutputStream.cpp000066400000000000000000000074431456444476200237330ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "RAMOutputStream.h" #include "RAMFile.h" #include "RAMDirectory.h" #include "MiscUtils.h" namespace Lucene { const int32_t RAMOutputStream::BUFFER_SIZE = 1024; RAMOutputStream::RAMOutputStream() { file = newLucene(RAMDirectoryPtr()); // make sure that we switch to the first needed buffer lazily currentBufferIndex = -1; bufferPosition = 0; bufferStart = 0; bufferLength = 0; } RAMOutputStream::RAMOutputStream(const RAMFilePtr& f) { file = f; // make sure that we switch to the first needed buffer lazily currentBufferIndex = -1; bufferPosition = 0; bufferStart = 0; bufferLength = 0; } RAMOutputStream::~RAMOutputStream() { } void RAMOutputStream::writeTo(const IndexOutputPtr& out) { flush(); int64_t end = file->length; int64_t pos = 0; int32_t buffer = 0; while (pos < end) { int32_t length = BUFFER_SIZE; int64_t nextPos = pos + length; if (nextPos > end) { // at the last buffer length = (int32_t)(end - pos); } out->writeBytes(file->getBuffer(buffer++).get(), length); pos = nextPos; } } void RAMOutputStream::reset() { currentBuffer.reset(); currentBufferIndex = -1; bufferPosition = 0; bufferStart = 0; bufferLength = 0; file->setLength(0); } void RAMOutputStream::close() { flush(); } void RAMOutputStream::seek(int64_t pos) { // set the file length in case we seek back and flush() has not been called yet setFileLength(); if ((int64_t)pos < bufferStart || (int64_t)pos >= bufferStart + bufferLength) { currentBufferIndex = (int32_t)(pos / BUFFER_SIZE); switchCurrentBuffer(); } bufferPosition = (int32_t)(pos % BUFFER_SIZE); } int64_t RAMOutputStream::length() { return file->length; } void RAMOutputStream::writeByte(uint8_t b) { if (bufferPosition == bufferLength) { ++currentBufferIndex; switchCurrentBuffer(); } currentBuffer[bufferPosition++] = b; } void RAMOutputStream::writeBytes(const uint8_t* b, int32_t offset, int32_t length) { while (length > 0) { BOOST_ASSERT(b != NULL); if (bufferPosition == bufferLength) { ++currentBufferIndex; switchCurrentBuffer(); } int32_t remainInBuffer = currentBuffer.size() - bufferPosition; int32_t bytesToCopy = length < remainInBuffer ? length : remainInBuffer; MiscUtils::arrayCopy(b, offset, currentBuffer.get(), bufferPosition, bytesToCopy); offset += bytesToCopy; length -= bytesToCopy; bufferPosition += bytesToCopy; } } void RAMOutputStream::switchCurrentBuffer() { if (currentBufferIndex == file->numBuffers()) { currentBuffer = file->addBuffer(BUFFER_SIZE); } else { currentBuffer = file->getBuffer(currentBufferIndex); } bufferPosition = 0; bufferStart = (int64_t)BUFFER_SIZE * (int64_t)currentBufferIndex; bufferLength = currentBuffer.size(); } void RAMOutputStream::setFileLength() { int64_t pointer = bufferStart + bufferPosition; if (pointer > file->length) { file->setLength(pointer); } } void RAMOutputStream::flush() { file->setLastModified(MiscUtils::currentTimeMillis()); setFileLength(); } int64_t RAMOutputStream::getFilePointer() { return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition; } int64_t RAMOutputStream::sizeInBytes() { return file->numBuffers() * BUFFER_SIZE; } } LucenePlusPlus-rel_3.0.9/src/core/store/SimpleFSDirectory.cpp000066400000000000000000000136411456444476200242630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "SimpleFSDirectory.h" #include "_SimpleFSDirectory.h" #include "IndexOutput.h" #include "FileReader.h" #include "FileUtils.h" #include "StringUtils.h" namespace Lucene { SimpleFSDirectory::SimpleFSDirectory(const String& path, const LockFactoryPtr& lockFactory) : FSDirectory(path, lockFactory) { } SimpleFSDirectory::~SimpleFSDirectory() { } IndexOutputPtr SimpleFSDirectory::createOutput(const String& name) { initOutput(name); return newLucene(FileUtils::joinPath(directory, name)); } IndexInputPtr SimpleFSDirectory::openInput(const String& name) { return FSDirectory::openInput(name); } IndexInputPtr SimpleFSDirectory::openInput(const String& name, int32_t bufferSize) { ensureOpen(); return newLucene(FileUtils::joinPath(directory, name), bufferSize, getReadChunkSize()); } const int32_t InputFile::FILE_EOF = FileReader::FILE_EOF; const int32_t InputFile::FILE_ERROR = FileReader::FILE_ERROR; InputFile::InputFile(const String& path) { file = newInstance(path, std::ios::binary | std::ios::in); if (!file->is_open()) { boost::throw_exception(FileNotFoundException(path)); } position = 0; length = FileUtils::fileLength(path); } InputFile::~InputFile() { } void InputFile::setPosition(int64_t position) { this->position = position; file->seekg((std::streamoff)position); if (!file->good()) { boost::throw_exception(IOException()); } } int64_t InputFile::getPosition() { return position; } int64_t InputFile::getLength() { return length; } int32_t InputFile::read(uint8_t* b, int32_t offset, int32_t length) { try { if (file->eof()) { return FILE_EOF; } file->read((char*)b + offset, length); int32_t readCount = file->gcount(); position += readCount; return readCount; } catch (...) { return FILE_ERROR; } } void InputFile::close() { if (file->is_open()) { file->close(); } } bool InputFile::isValid() { return (file && file->is_open() && file->good()); } SimpleFSIndexInput::SimpleFSIndexInput() { this->chunkSize = 0; this->isClone = false; } SimpleFSIndexInput::SimpleFSIndexInput(const String& path, int32_t bufferSize, int32_t chunkSize) : BufferedIndexInput(bufferSize) { this->file = newLucene(path); this->path = path; this->chunkSize = chunkSize; this->isClone = false; } SimpleFSIndexInput::~SimpleFSIndexInput() { } void SimpleFSIndexInput::readInternal(uint8_t* b, int32_t offset, int32_t length) { SyncLock fileLock(file); int64_t position = getFilePointer(); if (position != file->getPosition()) { file->setPosition(position); } int32_t total = 0; while (total < length) { int32_t readLength = total + chunkSize > length ? length - total : chunkSize; int32_t i = file->read(b, offset + total, readLength); if (i == InputFile::FILE_EOF) { boost::throw_exception(IOException(L"Read past EOF")); } total += i; } } void SimpleFSIndexInput::seekInternal(int64_t pos) { } int64_t SimpleFSIndexInput::length() { return file->getLength(); } void SimpleFSIndexInput::close() { if (!isClone) { file->close(); } } bool SimpleFSIndexInput::isValid() { return file->isValid(); } LuceneObjectPtr SimpleFSIndexInput::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = BufferedIndexInput::clone(other ? other : newLucene()); SimpleFSIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(clone)); cloneIndexInput->path = path; cloneIndexInput->file = file; cloneIndexInput->chunkSize = chunkSize; cloneIndexInput->isClone = true; return cloneIndexInput; } OutputFile::OutputFile(const String& path) { this->path = path; file = newInstance(path, std::ios::binary | std::ios::out); } OutputFile::~OutputFile() { } bool OutputFile::write(const uint8_t* b, int32_t offset, int32_t length) { if (!file->is_open()) { return false; } try { file->write((char*)b + offset, length); return file->good(); } catch (...) { return false; } } void OutputFile::close() { file.reset(); } void OutputFile::setPosition(int64_t position) { file->seekp((std::streamoff)position); if (!file->good()) { boost::throw_exception(IOException()); } } int64_t OutputFile::getLength() { return FileUtils::fileLength(path); } void OutputFile::setLength(int64_t length) { FileUtils::setFileLength(path, length); } void OutputFile::flush() { if (file->is_open()) { file->flush(); } } bool OutputFile::isValid() { return (file && file->is_open() && file->good()); } SimpleFSIndexOutput::SimpleFSIndexOutput(const String& path) { file = newLucene(path); isOpen = true; } SimpleFSIndexOutput::~SimpleFSIndexOutput() { } void SimpleFSIndexOutput::flushBuffer(const uint8_t* b, int32_t offset, int32_t length) { file->write(b, offset, length); file->flush(); } void SimpleFSIndexOutput::close() { if (isOpen) { BufferedIndexOutput::close(); file.reset(); isOpen = false; } } void SimpleFSIndexOutput::seek(int64_t pos) { BufferedIndexOutput::seek(pos); file->setPosition(pos); } int64_t SimpleFSIndexOutput::length() { return file->getLength(); } void SimpleFSIndexOutput::setLength(int64_t length) { file->setLength(length); } } LucenePlusPlus-rel_3.0.9/src/core/store/SimpleFSLockFactory.cpp000066400000000000000000000050551456444476200245370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "SimpleFSLockFactory.h" #include "_SimpleFSLockFactory.h" #include "FileUtils.h" #include "StringUtils.h" namespace Lucene { SimpleFSLockFactory::SimpleFSLockFactory() { } SimpleFSLockFactory::SimpleFSLockFactory(const String& lockDir) { setLockDir(lockDir); } SimpleFSLockFactory::~SimpleFSLockFactory() { } LockPtr SimpleFSLockFactory::makeLock(const String& lockName) { return newLucene(lockDir, lockPrefix.empty() ? lockName : lockPrefix + L"-" + lockName); } void SimpleFSLockFactory::clearLock(const String& lockName) { if (FileUtils::isDirectory(lockDir)) { String lockPath(FileUtils::joinPath(lockDir, lockPrefix.empty() ? lockName : lockPrefix + L"-" + lockName)); if (FileUtils::fileExists(lockPath) && !FileUtils::removeFile(lockPath)) { boost::throw_exception(IOException(L"Cannot delete " + lockPath)); } } } SimpleFSLock::SimpleFSLock(const String& lockDir, const String& lockFileName) { this->lockDir = lockDir; this->lockFile = lockFileName; } SimpleFSLock::~SimpleFSLock() { } bool SimpleFSLock::obtain() { // Ensure that lockDir exists and is a directory if (!FileUtils::fileExists(lockDir)) { if (!FileUtils::createDirectory(lockDir)) { boost::throw_exception(RuntimeException(L"Cannot create directory: " + lockDir)); } } else if (!FileUtils::isDirectory(lockDir)) { boost::throw_exception(RuntimeException(L"Found regular file where directory expected: " + lockDir)); } boost::filesystem::ofstream f; try { f.open(FileUtils::joinPath(lockDir, lockFile), std::ios::binary | std::ios::out); } catch (...) { } return f.is_open(); } void SimpleFSLock::release() { String path(FileUtils::joinPath(lockDir, lockFile)); if (FileUtils::fileExists(path) && !FileUtils::removeFile(path)) { boost::throw_exception(LockReleaseFailedException(L"failed to delete " + path)); } } bool SimpleFSLock::isLocked() { return FileUtils::fileExists(FileUtils::joinPath(lockDir, lockFile)); } String SimpleFSLock::toString() { return getClassName() + L"@" + FileUtils::joinPath(lockDir, lockFile); } } LucenePlusPlus-rel_3.0.9/src/core/store/SingleInstanceLockFactory.cpp000066400000000000000000000031151456444476200257560ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SingleInstanceLockFactory.h" #include "_SingleInstanceLockFactory.h" namespace Lucene { SingleInstanceLockFactory::SingleInstanceLockFactory() { locks = HashSet::newInstance(); } SingleInstanceLockFactory::~SingleInstanceLockFactory() { } LockPtr SingleInstanceLockFactory::makeLock(const String& lockName) { // We do not use the LockPrefix at all, because the private HashSet instance // effectively scopes the locking to this single Directory instance. return newLucene(locks, lockName); } void SingleInstanceLockFactory::clearLock(const String& lockName) { SyncLock syncLock(&locks); locks.remove(lockName); } SingleInstanceLock::SingleInstanceLock(HashSet locks, const String& lockName) { this->locks = locks; this->lockName = lockName; } SingleInstanceLock::~SingleInstanceLock() { } bool SingleInstanceLock::obtain() { SyncLock syncLock(&locks); return locks.add(lockName); } void SingleInstanceLock::release() { SyncLock syncLock(&locks); locks.remove(lockName); } bool SingleInstanceLock::isLocked() { SyncLock syncLock(&locks); return locks.contains(lockName); } String SingleInstanceLock::toString() { return lockName; } } LucenePlusPlus-rel_3.0.9/src/core/util/000077500000000000000000000000001456444476200200245ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/util/Attribute.cpp000066400000000000000000000013121456444476200224700ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Attribute.h" namespace Lucene { Attribute::~Attribute() { } int32_t Attribute::hashCode() { return LuceneObject::hashCode(); } bool Attribute::equals(const LuceneObjectPtr& other) { return LuceneObject::equals(other); } LuceneObjectPtr Attribute::clone(const LuceneObjectPtr& other) { return LuceneObject::clone(other); } } LucenePlusPlus-rel_3.0.9/src/core/util/AttributeSource.cpp000066400000000000000000000165721456444476200236670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "AttributeSource.h" #include "Attribute.h" namespace Lucene { AttributeFactory::AttributeFactory() { } AttributeFactory::~AttributeFactory() { } AttributePtr AttributeFactory::createAttributeInstance(const String& className) { return AttributePtr(); // override } AttributeFactoryPtr AttributeFactory::DEFAULT_ATTRIBUTE_FACTORY() { static AttributeFactoryPtr _DEFAULT_ATTRIBUTE_FACTORY; LUCENE_RUN_ONCE( _DEFAULT_ATTRIBUTE_FACTORY = newLucene(); CycleCheck::addStatic(_DEFAULT_ATTRIBUTE_FACTORY); ); return _DEFAULT_ATTRIBUTE_FACTORY; } AttributeSource::AttributeSource() { this->attributes = MapStringAttribute::newInstance(); this->factory = AttributeFactory::DEFAULT_ATTRIBUTE_FACTORY(); } AttributeSource::AttributeSource(const AttributeSourcePtr& input) { if (!input) { boost::throw_exception(IllegalArgumentException(L"input AttributeSource must not be null")); } this->attributes = input->attributes; this->factory = input->factory; } AttributeSource::AttributeSource(const AttributeFactoryPtr& factory) { this->attributes = MapStringAttribute::newInstance(); this->factory = factory; } AttributeSource::~AttributeSource() { } AttributeFactoryPtr AttributeSource::getAttributeFactory() { return this->factory; } void AttributeSource::addAttribute(const String& className, const AttributePtr& attrImpl) { // invalidate state to force recomputation in captureState() currentState.reset(); attributes.put(className, attrImpl); } bool AttributeSource::hasAttributes() { return !attributes.empty(); } AttributePtr AttributeSource::getAttribute(const String& className) { return attributes.get(className); } bool AttributeSource::hasAttribute(const String& className) { return attributes.contains(className); } void AttributeSource::computeCurrentState() { currentState = newLucene(); AttributeSourceStatePtr c(currentState); MapStringAttribute::iterator attrImpl = attributes.begin(); c->attribute = attrImpl->second; ++attrImpl; while (attrImpl != attributes.end()) { c->next = newLucene(); c = c->next; c->attribute = attrImpl->second; ++attrImpl; } } void AttributeSource::clearAttributes() { if (hasAttributes()) { if (!currentState) { computeCurrentState(); } for (MapStringAttribute::iterator attrImpl = attributes.begin(); attrImpl != attributes.end(); ++attrImpl) { attrImpl->second->clear(); } } } AttributeSourceStatePtr AttributeSource::captureState() { if (!hasAttributes()) { return AttributeSourceStatePtr(); } if (!currentState) { computeCurrentState(); } return boost::dynamic_pointer_cast(currentState->clone()); } void AttributeSource::restoreState(const AttributeSourceStatePtr& state) { AttributeSourceStatePtr _state(state); if (!_state) { return; } do { MapStringAttribute::iterator attrImpl = attributes.find(_state->attribute->getClassName()); if (attrImpl == attributes.end()) { boost::throw_exception(IllegalArgumentException(L"State contains an AttributeImpl that is not in this AttributeSource")); } _state->attribute->copyTo(attrImpl->second); _state = _state->next; } while (_state); } int32_t AttributeSource::hashCode() { int32_t code = 0; for (MapStringAttribute::iterator attrImpl = attributes.begin(); attrImpl != attributes.end(); ++attrImpl) { code = code * 31 + attrImpl->second->hashCode(); } return code; } bool AttributeSource::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } AttributeSourcePtr otherAttributeSource = boost::dynamic_pointer_cast(other); if (otherAttributeSource) { if (hasAttributes()) { if (!otherAttributeSource->hasAttributes()) { return false; } if (attributes.size() != otherAttributeSource->attributes.size()) { return false; } // it is only equal if all attribute impls are the same in the same order if (!currentState) { computeCurrentState(); } AttributeSourceStatePtr thisState(currentState); if (!otherAttributeSource->currentState) { otherAttributeSource->computeCurrentState(); } AttributeSourceStatePtr otherState(otherAttributeSource->currentState); while (thisState && otherState) { if (otherState->attribute->getClassName() != thisState->attribute->getClassName() || !otherState->attribute->equals(thisState->attribute)) { return false; } thisState = thisState->next; otherState = otherState->next; } return true; } else { return !otherAttributeSource->hasAttributes(); } } else { return false; } } String AttributeSource::toString() { StringStream buf; buf << L"("; if (hasAttributes()) { if (!currentState) { computeCurrentState(); } for (AttributeSourceStatePtr state(currentState); state; state = state->next) { if (state != currentState) { buf << L","; } buf << state->attribute->toString(); } } buf << ")"; return buf.str(); } AttributeSourcePtr AttributeSource::cloneAttributes() { AttributeSourcePtr clone(newLucene(this->factory)); if (hasAttributes()) { if (!currentState) { computeCurrentState(); } for (AttributeSourceStatePtr state(currentState); state; state = state->next) { clone->attributes.put(state->attribute->getClassName(), boost::dynamic_pointer_cast(state->attribute->clone())); } } return clone; } Collection AttributeSource::getAttributes() { Collection attrImpls(Collection::newInstance()); if (hasAttributes()) { if (!currentState) { computeCurrentState(); } for (AttributeSourceStatePtr state(currentState); state; state = state->next) { attrImpls.add(state->attribute); } } return attrImpls; } DefaultAttributeFactory::~DefaultAttributeFactory() { } AttributePtr DefaultAttributeFactory::createAttributeInstance(const String& className) { return AttributePtr(); } AttributeSourceState::~AttributeSourceState() { } LuceneObjectPtr AttributeSourceState::clone(const LuceneObjectPtr& other) { AttributeSourceStatePtr clone(newLucene()); clone->attribute = boost::dynamic_pointer_cast(attribute->clone()); if (next) { clone->next = boost::dynamic_pointer_cast(next->clone()); } return clone; } } LucenePlusPlus-rel_3.0.9/src/core/util/Base64.cpp000066400000000000000000000074731456444476200215670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Base64.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { const String Base64::BASE64_CHARS = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; Base64::~Base64() { } String Base64::encode(ByteArray bytes) { return encode(bytes.get(), bytes.size()); } String Base64::encode(const uint8_t* bytes, int32_t length) { String result; uint8_t byteArray3[3]; uint8_t byteArray4[4]; int32_t i = 0; while (length--) { byteArray3[i++] = *(bytes++); if (i == 3) { byteArray4[0] = (byteArray3[0] & 0xfc) >> 2; byteArray4[1] = ((byteArray3[0] & 0x03) << 4) + ((byteArray3[1] & 0xf0) >> 4); byteArray4[2] = ((byteArray3[1] & 0x0f) << 2) + ((byteArray3[2] & 0xc0) >> 6); byteArray4[3] = byteArray3[2] & 0x3f; for (i = 0; i < 4; ++i) { result += BASE64_CHARS[byteArray4[i]]; } i = 0; } } if (i != 0) { for (int32_t j = i; j < 3; ++j) { byteArray3[j] = 0; } byteArray4[0] = (byteArray3[0] & 0xfc) >> 2; byteArray4[1] = ((byteArray3[0] & 0x03) << 4) + ((byteArray3[1] & 0xf0) >> 4); byteArray4[2] = ((byteArray3[1] & 0x0f) << 2) + ((byteArray3[2] & 0xc0) >> 6); byteArray4[3] = byteArray3[2] & 0x3f; for (int32_t j = 0; j < i + 1; ++j) { result += BASE64_CHARS[byteArray4[j]]; } while (i++ < 3) { result += L'='; } } return result; } ByteArray Base64::decode(const String& str) { int32_t length = str.length(); uint8_t byteArray4[4]; uint8_t byteArray3[3]; int32_t i = 0; int32_t charIndex = 0; ByteArray result(ByteArray::newInstance(length / 2)); int32_t resultIndex = 0; while (length-- && str[charIndex] != L'=' && isBase64(str[charIndex])) { byteArray4[i++] = (uint8_t)str[charIndex++]; if (i == 4) { for (i = 0; i < 4; ++i) { byteArray4[i] = static_cast(BASE64_CHARS.find(byteArray4[i])); } byteArray3[0] = (byteArray4[0] << 2) + ((byteArray4[1] & 0x30) >> 4); byteArray3[1] = ((byteArray4[1] & 0xf) << 4) + ((byteArray4[2] & 0x3c) >> 2); byteArray3[2] = ((byteArray4[2] & 0x3) << 6) + byteArray4[3]; for (i = 0; i < 3; ++i) { if (resultIndex >= result.size()) { result.resize((int32_t)((double)result.size() * 1.5)); } result[resultIndex++] = byteArray3[i]; } i = 0; } } if (i != 0) { for (int32_t j = i; j < 4; ++j) { byteArray4[j] = 0; } for (int32_t j = 0; j < 4; ++j) { byteArray4[j] = static_cast(BASE64_CHARS.find(byteArray4[j])); } byteArray3[0] = (byteArray4[0] << 2) + ((byteArray4[1] & 0x30) >> 4); byteArray3[1] = ((byteArray4[1] & 0xf) << 4) + ((byteArray4[2] & 0x3c) >> 2); byteArray3[2] = ((byteArray4[2] & 0x3) << 6) + byteArray4[3]; for (int32_t j = 0; j < i - 1; ++j) { if (resultIndex >= result.size()) { result.resize((int32_t)((double)result.size() * 1.5)); } result[resultIndex++] = byteArray3[j]; } } result.resize(resultIndex); return result; } bool Base64::isBase64(wchar_t ch) { return (UnicodeUtil::isAlnum(ch) || ch == L'+' || ch == L'/'); } } LucenePlusPlus-rel_3.0.9/src/core/util/BitSet.cpp000066400000000000000000000200411456444476200217170ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BitSet.h" #include "BitUtil.h" namespace Lucene { BitSet::BitSet(uint32_t size) : bitSet(size) { } BitSet::~BitSet() { } const uint64_t* BitSet::getBits() { return bitSet.empty() ? NULL : static_cast(&bitSet.m_bits[0]); } void BitSet::clear() { bitSet.clear(); } void BitSet::clear(uint32_t bitIndex) { if (bitIndex <= bitSet.size()) { bitSet.set(bitIndex, false); } } void BitSet::fastClear(uint32_t bitIndex) { bitSet.set(bitIndex, false); } void BitSet::clear(uint32_t fromIndex, uint32_t toIndex) { toIndex = std::min(toIndex, (uint32_t)bitSet.size()); for (bitset_type::size_type i = std::min(fromIndex, (uint32_t)bitSet.size()); i < toIndex; ++i) { bitSet.set(i, false); } } void BitSet::fastClear(uint32_t fromIndex, uint32_t toIndex) { for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) { bitSet.set(i, false); } } void BitSet::set(uint32_t bitIndex) { if (bitIndex >= bitSet.size()) { resize(bitIndex + 1); } bitSet.set(bitIndex, true); } void BitSet::fastSet(uint32_t bitIndex) { bitSet.set(bitIndex, true); } void BitSet::set(uint32_t bitIndex, bool value) { if (bitIndex >= bitSet.size()) { resize(bitIndex + 1); } bitSet.set(bitIndex, value); } void BitSet::fastSet(uint32_t bitIndex, bool value) { bitSet.set(bitIndex, value); } void BitSet::set(uint32_t fromIndex, uint32_t toIndex) { if (toIndex >= bitSet.size()) { resize(toIndex + 1); } for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) { bitSet.set(i, true); } } void BitSet::fastSet(uint32_t fromIndex, uint32_t toIndex) { for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) { bitSet.set(i, true); } } void BitSet::set(uint32_t fromIndex, uint32_t toIndex, bool value) { if (toIndex >= bitSet.size()) { resize(toIndex + 1); } for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) { bitSet.set(i, value); } } void BitSet::fastSet(uint32_t fromIndex, uint32_t toIndex, bool value) { for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) { bitSet.set(i, value); } } void BitSet::flip(uint32_t bitIndex) { if (bitIndex >= bitSet.size()) { resize(bitIndex + 1); } bitSet.flip(bitIndex); } void BitSet::fastFlip(uint32_t bitIndex) { bitSet.flip(bitIndex); } void BitSet::flip(uint32_t fromIndex, uint32_t toIndex) { if (toIndex >= bitSet.size()) { resize(toIndex + 1); } for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) { bitSet.flip(i); } } void BitSet::fastFlip(uint32_t fromIndex, uint32_t toIndex) { for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) { bitSet.flip(i); } } uint32_t BitSet::size() const { return bitSet.num_blocks() * sizeof(bitset_type::block_type) * 8; } uint32_t BitSet::numBlocks() const { return bitSet.num_blocks(); } bool BitSet::isEmpty() const { return bitSet.none(); } bool BitSet::get(uint32_t bitIndex) const { return bitIndex < bitSet.size() ? bitSet.test(bitIndex) : false; } bool BitSet::fastGet(uint32_t bitIndex) const { return bitSet.test(bitIndex); } int32_t BitSet::nextSetBit(uint32_t fromIndex) const { bitset_type::size_type next = fromIndex == 0 ? bitSet.find_first() : bitSet.find_next(fromIndex - 1); return next == bitset_type::npos ? -1 : next; } void BitSet::_and(const BitSetPtr& set) { bitset_type::size_type minBlocks = std::min(bitSet.num_blocks(), set->bitSet.num_blocks()); for (bitset_type::size_type i = 0; i < minBlocks; ++i) { bitSet.m_bits[i] &= set->bitSet.m_bits[i]; } if (bitSet.num_blocks() > minBlocks) { std::fill(bitSet.m_bits.begin() + minBlocks, bitSet.m_bits.end(), bitset_type::block_type(0)); } } void BitSet::_or(const BitSetPtr& set) { bitset_type::size_type minBlocks = std::min(bitSet.num_blocks(), set->bitSet.num_blocks()); if (set->bitSet.size() > bitSet.size()) { resize(set->bitSet.size()); } for (bitset_type::size_type i = 0; i < minBlocks; ++i) { bitSet.m_bits[i] |= set->bitSet.m_bits[i]; } if (bitSet.num_blocks() > minBlocks) { std::copy(set->bitSet.m_bits.begin() + minBlocks, set->bitSet.m_bits.end(), bitSet.m_bits.begin() + minBlocks); } } void BitSet::_xor(const BitSetPtr& set) { bitset_type::size_type minBlocks = std::min(bitSet.num_blocks(), set->bitSet.num_blocks()); if (set->bitSet.size() > bitSet.size()) { resize(set->bitSet.size()); } for (bitset_type::size_type i = 0; i < minBlocks; ++i) { bitSet.m_bits[i] ^= set->bitSet.m_bits[i]; } if (bitSet.num_blocks() > minBlocks) { std::copy(set->bitSet.m_bits.begin() + minBlocks, set->bitSet.m_bits.end(), bitSet.m_bits.begin() + minBlocks); } } void BitSet::andNot(const BitSetPtr& set) { bitset_type::size_type minBlocks = std::min(bitSet.num_blocks(), set->bitSet.num_blocks()); for (bitset_type::size_type i = 0; i < minBlocks; ++i) { bitSet.m_bits[i] &= ~set->bitSet.m_bits[i]; } } bool BitSet::intersectsBitSet(const BitSetPtr& set) const { return bitSet.intersects(set->bitSet); } uint32_t BitSet::cardinality() { return bitSet.num_blocks() == 0 ? 0 : (uint32_t)BitUtil::pop_array((int64_t*)getBits(), 0, bitSet.num_blocks()); } void BitSet::resize(uint32_t size) { bitset_type::size_type old_num_blocks = bitSet.num_blocks(); bitset_type::size_type required_blocks = bitSet.calc_num_blocks(size); if (required_blocks != old_num_blocks) { bitSet.m_bits.resize(required_blocks, bitset_type::block_type(0)); } bitSet.m_num_bits = size; uint64_t extra_bits = static_cast(bitSet.size() % bitSet.bits_per_block); if (extra_bits != 0) { bitSet.m_bits.back() &= ~(~static_cast(0) << extra_bits); } } bool BitSet::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } BitSetPtr otherBitSet(boost::dynamic_pointer_cast(other)); if (!otherBitSet) { return false; } BitSetPtr first = bitSet.num_blocks() < otherBitSet->bitSet.num_blocks() ? otherBitSet : shared_from_this(); BitSetPtr second = bitSet.num_blocks() < otherBitSet->bitSet.num_blocks() ? shared_from_this() : otherBitSet; bitset_type::size_type firstLength = first->bitSet.num_blocks(); bitset_type::size_type secondLength = second->bitSet.num_blocks(); for (bitset_type::size_type i = secondLength; i < firstLength; ++i) { if (first->bitSet.m_bits[i] != 0) { return false; } } for (bitset_type::size_type i = 0; i < secondLength; ++i) { if (first->bitSet.m_bits[i] != second->bitSet.m_bits[i]) { return false; } } return true; } int32_t BitSet::hashCode() { // Start with a zero hash and use a mix that results in zero if the input is zero. // This effectively truncates trailing zeros without an explicit check. int64_t hash = 0; uint32_t maxSize = bitSet.num_blocks(); const uint64_t* bits = getBits(); for (uint32_t bit = 0; bit < maxSize; ++bit) { hash ^= bits[bit]; hash = (hash << 1) | (hash >> 63); // rotate left } // Fold leftmost bits into right and add a constant to prevent empty sets from // returning 0, which is too common. return (int32_t)((hash >> 32) ^ hash) + 0x98761234; } LuceneObjectPtr BitSet::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(); BitSetPtr cloneBitSet(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); cloneBitSet->bitSet = bitSet; return cloneBitSet; } } LucenePlusPlus-rel_3.0.9/src/core/util/BitUtil.cpp000066400000000000000000000356641456444476200221220ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BitUtil.h" #include "MiscUtils.h" namespace Lucene { const uint8_t BitUtil::ntzTable[] = { 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 }; BitUtil::~BitUtil() { } int32_t BitUtil::pop(int64_t x) { x = x - (MiscUtils::unsignedShift(x, (int64_t)1) & 0x5555555555555555LL); x = (x & 0x3333333333333333LL) + (MiscUtils::unsignedShift(x, (int64_t)2) & 0x3333333333333333LL); x = (x + MiscUtils::unsignedShift(x, (int64_t)4)) & 0x0f0f0f0f0f0f0f0fLL; x = x + MiscUtils::unsignedShift(x, (int64_t)8); x = x + MiscUtils::unsignedShift(x, (int64_t)16); x = x + MiscUtils::unsignedShift(x, (int64_t)32); return (int32_t)x & 0x7f; } int64_t BitUtil::pop_array(const int64_t* A, int32_t wordOffset, int32_t numWords) { int32_t n = wordOffset + numWords; int64_t tot = 0; int64_t tot8 = 0; int64_t ones = 0; int64_t twos = 0; int64_t fours = 0; int32_t i = wordOffset; for (; i <= n - 8; i += 8) { int64_t twosA; CSA(twosA, ones, ones, A[i], A[i + 1]); int64_t twosB; CSA(twosB, ones, ones, A[i + 2], A[i + 3]); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); CSA(twosA, ones, ones, A[i + 4], A[i + 5]); CSA(twosB, ones, ones, A[i + 6], A[i + 7]); int64_t foursB; CSA(foursB, twos, twos, twosA, twosB); int64_t eights; CSA(eights, fours, fours, foursA, foursB); tot8 += pop(eights); } // Handle trailing words in a binary-search manner. // Derived from the loop above by setting specific elements to 0. if (i <= n - 4) { int64_t twosA; CSA(twosA, ones, ones, A[i], A[i + 1]); int64_t twosB; CSA(twosB, ones, ones, A[i + 2], A[i + 3]); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 4; } if (i <= n - 2) { int64_t twosA; CSA(twosA, ones, ones, A[i], A[i + 1]); int64_t foursA = twos & twosA; twos = twos ^ twosA; int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 2; } if (i < n) { tot += pop(A[i]); } tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); return tot; } int64_t BitUtil::pop_intersect(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords) { int32_t n = wordOffset + numWords; int64_t tot = 0; int64_t tot8 = 0; int64_t ones = 0; int64_t twos = 0; int64_t fours = 0; int32_t i = wordOffset; for (; i <= n - 8; i += 8) { int64_t twosA; CSA(twosA, ones, ones, (A[i] & B[i]), (A[i + 1] & B[i + 1])); int64_t twosB; CSA(twosB, ones, ones, (A[i + 2] & B[i + 2]), (A[i + 3] & B[i + 3])); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); CSA(twosA, ones, ones, (A[i + 4] & B[i + 4]), (A[i + 5] & B[i + 5])); CSA(twosB, ones, ones, (A[i + 6] & B[i + 6]), (A[i + 7] & B[i + 7])); int64_t foursB; CSA(foursB, twos, twos, twosA, twosB); int64_t eights; CSA(eights, fours, fours, foursA, foursB); tot8 += pop(eights); } if (i <= n - 4) { int64_t twosA; CSA(twosA, ones, ones, (A[i] & B[i]), (A[i + 1] & B[i + 1])); int64_t twosB; CSA(twosB, ones, ones, (A[i + 2] & B[i + 2]), (A[i + 3] & B[i + 3])); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 4; } if (i <= n - 2) { int64_t twosA; CSA(twosA, ones, ones, (A[i] & B[i]), (A[i + 1] & B[i + 1])); int64_t foursA = twos & twosA; twos = twos ^ twosA; int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 2; } if (i < n) { tot += pop((A[i] & B[i])); } tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); return tot; } int64_t BitUtil::pop_union(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords) { int32_t n = wordOffset + numWords; int64_t tot = 0; int64_t tot8 = 0; int64_t ones = 0; int64_t twos = 0; int64_t fours = 0; int32_t i = wordOffset; for (; i <= n - 8; i += 8) { int64_t twosA; CSA(twosA, ones, ones, (A[i] | B[i]), (A[i + 1] | B[i + 1])); int64_t twosB; CSA(twosB, ones, ones, (A[i + 2] | B[i + 2]), (A[i + 3] | B[i + 3])); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); CSA(twosA, ones, ones, (A[i + 4] | B[i + 4]), (A[i + 5] | B[i + 5])); CSA(twosB, ones, ones, (A[i + 6] | B[i + 6]), (A[i + 7] | B[i + 7])); int64_t foursB; CSA(foursB, twos, twos, twosA, twosB); int64_t eights; CSA(eights, fours, fours, foursA, foursB); tot8 += pop(eights); } if (i <= n - 4) { int64_t twosA; CSA(twosA, ones, ones, (A[i] | B[i]), (A[i + 1] | B[i + 1])); int64_t twosB; CSA(twosB, ones, ones, (A[i + 2] | B[i + 2]), (A[i + 3] | B[i + 3])); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 4; } if (i <= n - 2) { int64_t twosA; CSA(twosA, ones, ones, (A[i] | B[i]), (A[i + 1] | B[i + 1])); int64_t foursA = twos & twosA; twos = twos ^ twosA; int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 2; } if (i < n) { tot += pop((A[i] | B[i])); } tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); return tot; } int64_t BitUtil::pop_andnot(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords) { int32_t n = wordOffset + numWords; int64_t tot = 0; int64_t tot8 = 0; int64_t ones = 0; int64_t twos = 0; int64_t fours = 0; int32_t i = wordOffset; for (; i <= n - 8; i += 8) { int64_t twosA; CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i + 1] & ~B[i + 1])); int64_t twosB; CSA(twosB, ones, ones, (A[i + 2] & ~B[i + 2]), (A[i + 3] & ~B[i + 3])); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); CSA(twosA, ones, ones, (A[i + 4] & ~B[i + 4]), (A[i + 5] & ~B[i + 5])); CSA(twosB, ones, ones, (A[i + 6] & ~B[i + 6]), (A[i + 7] & ~B[i + 7])); int64_t foursB; CSA(foursB, twos, twos, twosA, twosB); int64_t eights; CSA(eights, fours, fours, foursA, foursB); tot8 += pop(eights); } if (i <= n - 4) { int64_t twosA; CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i + 1] & ~B[i + 1])); int64_t twosB; CSA(twosB, ones, ones, (A[i + 2] & ~B[i + 2]), (A[i + 3] & ~B[i + 3])); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 4; } if (i <= n - 2) { int64_t twosA; CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i + 1] & ~B[i + 1])); int64_t foursA = twos & twosA; twos = twos ^ twosA; int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 2; } if (i < n) { tot += pop((A[i] & ~B[i])); } tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); return tot; } int64_t BitUtil::pop_xor(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords) { int32_t n = wordOffset + numWords; int64_t tot = 0; int64_t tot8 = 0; int64_t ones = 0; int64_t twos = 0; int64_t fours = 0; int32_t i = wordOffset; for (; i <= n - 8; i += 8) { int64_t twosA; CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i + 1] ^ B[i + 1])); int64_t twosB; CSA(twosB, ones, ones, (A[i + 2] ^ B[i + 2]), (A[i + 3] ^ B[i + 3])); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); CSA(twosA, ones, ones, (A[i + 4] ^ B[i + 4]), (A[i + 5] ^ B[i + 5])); CSA(twosB, ones, ones, (A[i + 6] ^ B[i + 6]), (A[i + 7] ^ B[i + 7])); int64_t foursB; CSA(foursB, twos, twos, twosA, twosB); int64_t eights; CSA(eights, fours, fours, foursA, foursB); tot8 += pop(eights); } if (i <= n - 4) { int64_t twosA; CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i + 1] ^ B[i + 1])); int64_t twosB; CSA(twosB, ones, ones, (A[i + 2] ^ B[i + 2]), (A[i + 3] ^ B[i + 3])); int64_t foursA; CSA(foursA, twos, twos, twosA, twosB); int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 4; } if (i <= n - 2) { int64_t twosA; CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i + 1] ^ B[i + 1])); int64_t foursA = twos & twosA; twos = twos ^ twosA; int64_t eights = fours & foursA; fours = fours ^ foursA; tot8 += pop(eights); i += 2; } if (i < n) { tot += pop((A[i] ^ B[i])); } tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); return tot; } void BitUtil::CSA(int64_t& h, int64_t& l, int64_t a, int64_t b, int64_t c) { int64_t u = a ^ b; h = (a & b) | (u & c); l = u ^ c; } int32_t BitUtil::ntz(int64_t val) { // A full binary search to determine the low byte was slower than a linear search for nextSetBit(). // This is most likely because the implementation of nextSetBit() shifts bits to the right, increasing // the probability that the first non-zero byte is in the rhs. // This implementation does a single binary search at the top level only so that all other bit shifting // can be done on ints instead of longs to remain friendly to 32 bit architectures. In addition, the // case of a non-zero first byte is checked for first because it is the most common in dense bit arrays. int32_t lower = (int32_t)val; int32_t lowByte = lower & 0xff; if (lowByte != 0) { return ntzTable[lowByte]; } if (lower != 0) { lowByte = MiscUtils::unsignedShift(lower, 8) & 0xff; if (lowByte != 0) { return ntzTable[lowByte] + 8; } lowByte = MiscUtils::unsignedShift(lower, 16) & 0xff; if (lowByte != 0) { return ntzTable[lowByte] + 16; } // no need to mask off low byte for the last byte in the 32 bit word // no need to check for zero on the last byte either. return ntzTable[MiscUtils::unsignedShift(lower, 24)] + 24; } else { // grab upper 32 bits int32_t upper = (int32_t)(val >> 32); lowByte = upper & 0xff; if (lowByte != 0) { return ntzTable[lowByte] + 32; } lowByte = MiscUtils::unsignedShift(upper, 8) & 0xff; if (lowByte != 0) { return ntzTable[lowByte] + 40; } lowByte = MiscUtils::unsignedShift(upper, 16) & 0xff; if (lowByte != 0) { return ntzTable[lowByte] + 48; } // no need to mask off low byte for the last byte in the 32 bit word // no need to check for zero on the last byte either. return ntzTable[MiscUtils::unsignedShift(upper, 24)] + 56; } } int32_t BitUtil::ntz(int32_t val) { // This implementation does a single binary search at the top level only. In addition, the case // of a non-zero first byte is checked for first because it is the most common in dense bit arrays. int32_t lowByte = val & 0xff; if (lowByte != 0) { return ntzTable[lowByte]; } lowByte = MiscUtils::unsignedShift(val, 8) & 0xff; if (lowByte != 0) { return ntzTable[lowByte] + 8; } lowByte = MiscUtils::unsignedShift(val, 16) & 0xff; if (lowByte != 0) { return ntzTable[lowByte] + 16; } // no need to mask off low byte for the last byte. // no need to check for zero on the last byte either. return ntzTable[MiscUtils::unsignedShift(val, 24)] + 24; } int32_t BitUtil::ntz2(int64_t x) { int32_t n = 0; int32_t y = (int32_t)x; if (y == 0) { // the only 64 bit shift necessary n += 32; y = (int32_t)MiscUtils::unsignedShift(x, (int64_t)32); } if ((y & 0x0000ffff) == 0) { n += 16; y = MiscUtils::unsignedShift(y, 16); } if ((y & 0x000000ff) == 0) { n += 8; y = MiscUtils::unsignedShift(y, 8); } return (ntzTable[y & 0xff]) + n; } int32_t BitUtil::ntz3(int64_t x) { int32_t n = 1; // do the first step as a long, all others as ints. int32_t y = (int32_t)x; if (y == 0) { n += 32; y = (int32_t)MiscUtils::unsignedShift(x, (int64_t)32); } if ((y & 0x0000ffff) == 0) { n += 16; y = MiscUtils::unsignedShift(y, 16); } if ((y & 0x000000ff) == 0) { n += 8; y = MiscUtils::unsignedShift(y, 8); } if ((y & 0x0000000f) == 0) { n += 4; y = MiscUtils::unsignedShift(y, 4); } if ((y & 0x00000003) == 0) { n += 2; y = MiscUtils::unsignedShift(y, 2); } return n - (y & 1); } bool BitUtil::isPowerOfTwo(int32_t v) { return ((v & (v - 1)) == 0); } bool BitUtil::isPowerOfTwo(int64_t v) { return ((v & (v - 1)) == 0); } int32_t BitUtil::nextHighestPowerOfTwo(int32_t v) { --v; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; return ++v; } int64_t BitUtil::nextHighestPowerOfTwo(int64_t v) { --v; v |= v >> 1; v |= v >> 2; v |= v >> 4; v |= v >> 8; v |= v >> 16; v |= v >> 32; return ++v; } } LucenePlusPlus-rel_3.0.9/src/core/util/BitVector.cpp000066400000000000000000000173421456444476200224400ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BitVector.h" #include "Directory.h" #include "IndexInput.h" #include "IndexOutput.h" #include "TestPoint.h" #include "MiscUtils.h" namespace Lucene { const uint8_t BitVector::BYTE_COUNTS[] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 }; BitVector::BitVector(int32_t n) { _size = n; bits = ByteArray::newInstance((_size >> 3) + 1); MiscUtils::arrayFill(bits.get(), 0, bits.size(), 0); _count = 0; } BitVector::BitVector(ByteArray bits, int32_t size) { this->bits = bits; this->_size = size; this->_count = -1; } BitVector::BitVector(const DirectoryPtr& d, const String& name) { IndexInputPtr input(d->openInput(name)); LuceneException finally; try { _size = input->readInt(); // read size if (_size == -1) { readDgaps(input); } else { readBits(input); } } catch (LuceneException& e) { finally = e; } input->close(); finally.throwException(); } BitVector::~BitVector() { } LuceneObjectPtr BitVector::clone(const LuceneObjectPtr& other) { ByteArray copyBits(ByteArray::newInstance(bits.size())); MiscUtils::arrayCopy(bits.get(), 0, copyBits.get(), 0, bits.size()); BitVectorPtr clone = newLucene(copyBits, _size); clone->_count = _count; return clone; } void BitVector::set(int32_t bit) { if (bit >= _size) { boost::throw_exception(IndexOutOfBoundsException()); } bits[bit >> 3] |= 1 << (bit & 7); _count = -1; } bool BitVector::getAndSet(int32_t bit) { if (bit >= _size) { boost::throw_exception(IndexOutOfBoundsException()); } int32_t pos = (bit >> 3); int32_t v = bits[pos]; int32_t flag = 1 << (bit & 7); if ((flag & v) != 0) { return true; } else { bits[pos] = (uint8_t)(v | flag); if (_count != -1) { ++_count; } return false; } } void BitVector::clear(int32_t bit) { if (bit >= _size) { boost::throw_exception(IndexOutOfBoundsException()); } bits[bit >> 3] &= ~(1 << (bit & 7)); _count = -1; } bool BitVector::get(int32_t bit) { BOOST_ASSERT(bit >= 0 && bit < _size); return (bits[bit >> 3] & (1 << (bit & 7))) != 0; } int32_t BitVector::size() { return _size; } int32_t BitVector::count() { // if the vector has been modified if (_count == -1) { int32_t c = 0; int32_t end = bits.size(); for (int32_t i = 0; i < end; ++i) { c += BYTE_COUNTS[bits[i] & 0xff]; // sum bits per byte } _count = c; } return _count; } int32_t BitVector::getRecomputedCount() { int32_t c = 0; int32_t end = bits.size(); for (int32_t i = 0; i < end; ++i) { c += BYTE_COUNTS[bits[i] & 0xff]; // sum bits per byte } return c; } void BitVector::write(const DirectoryPtr& d, const String& name) { TestScope testScope(L"BitVector", L"write"); IndexOutputPtr output(d->createOutput(name)); LuceneException finally; try { if (isSparse()) { writeDgaps(output); // sparse bit-set more efficiently saved as d-gaps. } else { writeBits(output); } } catch (LuceneException& e) { finally = e; } output->close(); finally.throwException(); } void BitVector::writeBits(const IndexOutputPtr& output) { output->writeInt(size()); // write size output->writeInt(count()); // write count output->writeBytes(bits.get(), bits.size()); } void BitVector::writeDgaps(const IndexOutputPtr& output) { output->writeInt(-1); // mark using d-gaps output->writeInt(size()); // write size output->writeInt(count()); // write count int32_t last = 0; int32_t n = count(); int32_t m = bits.size(); for (int32_t i = 0; i < m && n > 0; ++i) { if (bits[i] != 0) { output->writeVInt(i-last); output->writeByte(bits[i]); last = i; n -= BYTE_COUNTS[bits[i] & 0xff]; } } } bool BitVector::isSparse() { // note: order of comparisons below set to favor smaller values (no binary range search.) // note: adding 4 because we start with ((int) -1) to indicate d-gaps format. // note: we write the d-gap for the byte number, and the byte (bits[i]) itself, therefore // multiplying count by (8+8) or (8+16) or (8+24) etc.: // - first 8 for writing bits[i] (1 byte vs. 1 bit), and // - second part for writing the byte-number d-gap as vint. // note: factor is for read/write of byte-arrays being faster than vints. int32_t factor = 10; if (bits.size() < (1 << 7)) { return factor * (4 + (8 + 8) * count()) < size(); } if (bits.size() < (1 << 14)) { return factor * (4 + (8 + 16) * count()) < size(); } if (bits.size() < (1 << 21)) { return factor * (4 + (8 + 24) * count()) < size(); } if (bits.size() < (1 << 28)) { return factor * (4 + (8 + 32) * count()) < size(); } return factor * (4 + (8 + 40) * count()) < size(); } void BitVector::readBits(const IndexInputPtr& input) { _count = input->readInt(); // read count bits = ByteArray::newInstance((_size >> 3) + 1); // allocate bits MiscUtils::arrayFill(bits.get(), 0, bits.size(), 0); input->readBytes(bits.get(), 0, bits.size()); } void BitVector::readDgaps(const IndexInputPtr& input) { _size = input->readInt(); // (re)read size _count = input->readInt(); // read count bits = ByteArray::newInstance((_size >> 3) + 1); // allocate bits MiscUtils::arrayFill(bits.get(), 0, bits.size(), 0); int32_t last = 0; int32_t n = count(); while (n > 0) { last += input->readVInt(); bits[last] = input->readByte(); n -= BYTE_COUNTS[bits[last] & 0xff]; } } BitVectorPtr BitVector::subset(int32_t start, int32_t end) { if (start < 0 || end > size() || end < start) { boost::throw_exception(IndexOutOfBoundsException()); } // Special case -- return empty vector is start == end if (end == start) { return newLucene(0); } ByteArray bits(ByteArray::newInstance(MiscUtils::unsignedShift(end - start - 1, 3) + 1)); int32_t s = MiscUtils::unsignedShift(start, 3); for (int32_t i = 0; i < bits.size(); ++i) { int32_t cur = 0xff & this->bits[i + s]; int32_t next = i + s + 1 >= this->bits.size() ? 0 : 0xff & this->bits[i + s + 1]; bits[i] = (uint8_t)(MiscUtils::unsignedShift(cur, (start & 7)) | ((next << (8 - (start & 7))))); } int32_t bitsToClear = (bits.size() * 8 - (end - start)) % 8; bits[bits.size() - 1] &= ~(0xff << (8 - bitsToClear)); return newLucene(bits, end - start); } } LucenePlusPlus-rel_3.0.9/src/core/util/BufferedReader.cpp000066400000000000000000000057341456444476200234060ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "BufferedReader.h" #include "MiscUtils.h" namespace Lucene { const int32_t BufferedReader::READER_BUFFER = 8192; BufferedReader::BufferedReader(const ReaderPtr& reader, int32_t size) { this->reader = reader; this->bufferSize = size; this->bufferLength = 0; this->bufferPosition = 0; } BufferedReader::~BufferedReader() { } int32_t BufferedReader::read() { if (bufferPosition >= bufferLength) { if (refill() == READER_EOF) { return READER_EOF; } } return buffer[bufferPosition++]; } int32_t BufferedReader::peek() { if (bufferPosition >= bufferLength) { if (refill() == READER_EOF) { return READER_EOF; } } return buffer[bufferPosition]; } int32_t BufferedReader::read(wchar_t* b, int32_t offset, int32_t length) { if (length == 0) { return 0; } int32_t remaining = length; while (remaining > 0) { int32_t available = bufferLength - bufferPosition; if (remaining <= available) { // the buffer contains enough data to satisfy this request MiscUtils::arrayCopy(buffer.get(), bufferPosition, b, offset, remaining); bufferPosition += remaining; remaining = 0; } else if (available > 0) { // the buffer does not have enough data, first serve all we've got MiscUtils::arrayCopy(buffer.get(), bufferPosition, b, offset, available); bufferPosition += available; offset += available; remaining -= available; } else if (refill() == READER_EOF) { length -= remaining; break; } } return length == 0 ? READER_EOF : length; } bool BufferedReader::readLine(String& line) { line.clear(); wchar_t ch = (wchar_t)read(); while (ch != (wchar_t)READER_EOF && ch != L'\r' && ch != L'\n') { line += ch; ch = (wchar_t)read(); } if (ch == '\r' && (wchar_t)peek() == L'\n') { read(); } return (!line.empty() || ch != (wchar_t)READER_EOF); } int32_t BufferedReader::refill() { if (!buffer) { buffer = CharArray::newInstance(bufferSize); // allocate buffer lazily } int32_t readLength = reader->read(buffer.get(), 0, bufferSize); bufferLength = readLength == READER_EOF ? 0 : readLength; bufferPosition = 0; return readLength; } void BufferedReader::close() { reader->close(); bufferLength = 0; bufferPosition = 0; } bool BufferedReader::markSupported() { return false; } void BufferedReader::reset() { reader->reset(); bufferLength = 0; bufferPosition = 0; } } LucenePlusPlus-rel_3.0.9/src/core/util/CharFolder.cpp000066400000000000000000000025701456444476200225450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "CharFolder.h" #include "MiscUtils.h" #include "UnicodeUtils.h" namespace Lucene { bool CharFolder::lowerCache = CharFolder::fillLower(); bool CharFolder::upperCache = CharFolder::fillUpper(); wchar_t CharFolder::lowerChars[CHAR_MAX - CHAR_MIN + 1]; wchar_t CharFolder::upperChars[CHAR_MAX - CHAR_MIN + 1]; CharFolder::~CharFolder() { } wchar_t CharFolder::toLower(wchar_t ch) { return (ch > CHAR_MIN && ch < CHAR_MAX) ? lowerChars[ch - CHAR_MIN] : UnicodeUtil::toLower(ch); } wchar_t CharFolder::toUpper(wchar_t ch) { return (ch > CHAR_MIN && ch < CHAR_MAX) ? upperChars[ch - CHAR_MIN] : UnicodeUtil::toUpper(ch); } bool CharFolder::fillLower() { for (int32_t index = CHAR_MIN; index < CHAR_MAX; ++index) { lowerChars[index - CHAR_MIN] = UnicodeUtil::toLower((wchar_t)index); } return true; } bool CharFolder::fillUpper() { for (int32_t index = CHAR_MIN; index < CHAR_MAX; ++index) { upperChars[index - CHAR_MIN] = UnicodeUtil::toUpper((wchar_t)index); } return true; } } LucenePlusPlus-rel_3.0.9/src/core/util/Collator.cpp000066400000000000000000000013621456444476200223110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "Collator.h" namespace Lucene { Collator::Collator(std::locale locale) : collate(std::use_facet< std::collate >(locale)) { } Collator::~Collator() { } int32_t Collator::compare(const String& first, const String& second) { return collate.compare(first.c_str(), first.c_str() + first.length(), second.c_str(), second.c_str() + second.length()); } } LucenePlusPlus-rel_3.0.9/src/core/util/Constants.cpp000066400000000000000000000027501456444476200225100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Constants.h" namespace Lucene { #if defined(linux) || defined(__linux) || defined(__linux__) String Constants::OS_NAME = L"Linux"; #elif defined(sun) || defined(__sun) String Constants::OS_NAME = L"Sun"; #elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(_WIN64) || defined(__WIN64__) || defined(WIN64) String Constants::OS_NAME = L"Windows"; #elif defined(macintosh) || defined(__APPLE__) || defined(__APPLE_CC__) String Constants::OS_NAME = L"Mac"; #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__FreeBSD_kernel__) String Constants::OS_NAME = L"BSD"; #elif defined(__GNU__) String Constants::OS_NAME = L"HURD"; #else String Constants::OS_NAME = L"UNKNOWN"; #endif String Constants::LUCENE_MAIN_VERSION = L"3.0.9"; String Constants::LUCENE_VERSION = L"3.0.9"; Constants::Constants() { // private } Constants::~Constants() { } LuceneVersion::LuceneVersion() { // private } LuceneVersion::~LuceneVersion() { } bool LuceneVersion::onOrAfter(LuceneVersion::Version first, LuceneVersion::Version second) { return (first >= second); } } LucenePlusPlus-rel_3.0.9/src/core/util/CycleCheck.cpp000066400000000000000000000037611456444476200225340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "CycleCheck.h" namespace Lucene { MapStringInt CycleCheck::cycleMap; Set CycleCheck::staticRefs; CycleCheck::~CycleCheck() { } void CycleCheck::addRef(const String& className, int32_t ref) { if (!cycleMap) { cycleMap = MapStringInt::newInstance(); } SyncLock lockRef(&cycleMap); MapStringInt::iterator classRef = cycleMap.find(className); if (classRef == cycleMap.end()) { cycleMap.put(className, 1); } else { classRef->second += ref; if (classRef->second < 0) { boost::throw_exception(RuntimeException(L"invalid class reference")); } } } void CycleCheck::addStatic(LuceneObjectPtr* staticRef) { #ifdef LPP_USE_CYCLIC_CHECK LUCENE_RUN_ONCE( staticRefs = Set::newInstance(); ); staticRefs.add(staticRef); #endif } void CycleCheck::dumpRefs() { // destroy all registered statics if (staticRefs) { for (Set::iterator staticRef = staticRefs.begin(); staticRef != staticRefs.end(); ++staticRef) { (*staticRef)->reset(); } } if (cycleMap) { SyncLock lockRef(&cycleMap); bool reportCycles = true; for (MapStringInt::iterator classRef = cycleMap.begin(); classRef != cycleMap.end(); ++classRef) { if (classRef->second > 0) { if (reportCycles) { std::wcout << L"Cyclic references detected!\n"; reportCycles = false; } std::wcout << classRef->first << L": " << classRef->second << L"\n"; } } } } } LucenePlusPlus-rel_3.0.9/src/core/util/DocIdBitSet.cpp000066400000000000000000000037571456444476200226410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "DocIdBitSet.h" #include "_DocIdBitSet.h" #include "BitSet.h" namespace Lucene { DocIdBitSet::DocIdBitSet() { } DocIdBitSet::DocIdBitSet(const BitSetPtr& bitSet) { this->bitSet = bitSet; } DocIdBitSet::~DocIdBitSet() { } DocIdSetIteratorPtr DocIdBitSet::iterator() { return newLucene(bitSet); } bool DocIdBitSet::isCacheable() { return true; } BitSetPtr DocIdBitSet::getBitSet() { return bitSet; } bool DocIdBitSet::equals(const LuceneObjectPtr& other) { if (DocIdSet::equals(other)) { return true; } DocIdBitSetPtr otherBitSet(boost::dynamic_pointer_cast(other)); return bitSet->equals(otherBitSet->bitSet); } int32_t DocIdBitSet::hashCode() { return bitSet->hashCode(); } LuceneObjectPtr DocIdBitSet::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(); DocIdBitSetPtr cloneBitSet(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); cloneBitSet->bitSet = boost::dynamic_pointer_cast(bitSet->clone()); return cloneBitSet; } DocIdBitSetIterator::DocIdBitSetIterator(const BitSetPtr& bitSet) { this->bitSet = bitSet; this->docId = -1; } DocIdBitSetIterator::~DocIdBitSetIterator() { } int32_t DocIdBitSetIterator::docID() { return docId; } int32_t DocIdBitSetIterator::nextDoc() { int32_t doc = bitSet->nextSetBit(docId + 1); docId = doc == -1 ? NO_MORE_DOCS : doc; return docId; } int32_t DocIdBitSetIterator::advance(int32_t target) { int32_t doc = bitSet->nextSetBit(target); docId = doc == -1 ? NO_MORE_DOCS : doc; return docId; } } LucenePlusPlus-rel_3.0.9/src/core/util/FieldCacheSanityChecker.cpp000066400000000000000000000232431456444476200251600ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "FieldCacheSanityChecker.h" #include "_FieldCacheSanityChecker.h" #include "FieldCache.h" #include "IndexReader.h" #include "StringUtils.h" #include "VariantUtils.h" namespace Lucene { FieldCacheSanityChecker::FieldCacheSanityChecker() { } FieldCacheSanityChecker::~FieldCacheSanityChecker() { } Collection FieldCacheSanityChecker::checkSanity(const FieldCachePtr& cache) { return checkSanity(cache->getCacheEntries()); } Collection FieldCacheSanityChecker::checkSanity(Collection cacheEntries) { FieldCacheSanityCheckerPtr sanityChecker(newLucene()); return sanityChecker->check(cacheEntries); } Collection FieldCacheSanityChecker::check(Collection cacheEntries) { if (!cacheEntries || cacheEntries.empty()) { return Collection::newInstance(); } // Maps the (valId) identityhashCode of cache values to sets of CacheEntry instances MapSetIntFieldCacheEntry valIdToItems(MapSetIntFieldCacheEntry::map_type::newInstance()); // Maps ReaderField keys to Sets of ValueIds MapSetReaderFieldInt readerFieldToValIds(MapSetReaderFieldInt::map_type::newInstance()); // Any keys that we know result in more then one valId SetReaderField valMismatchKeys(SetReaderField::newInstance()); // iterate over all the cacheEntries to get the mappings we'll need for (int32_t i = 0; i < cacheEntries.size(); ++i) { FieldCacheEntryPtr item(cacheEntries[i]); boost::any val(item->getValue()); if (VariantUtils::typeOf(val)) { continue; } ReaderFieldPtr rf(newLucene(item->getReaderKey(), item->getFieldName())); int32_t valId = VariantUtils::hashCode(val); // indirect mapping, so the MapOfSet will dedup identical valIds for us valIdToItems.put(valId, item); if (1 < readerFieldToValIds.put(rf, valId)) { valMismatchKeys.add(rf); } } Collection insanity(Collection::newInstance()); Collection mismatch(checkValueMismatch(valIdToItems, readerFieldToValIds, valMismatchKeys)); insanity.addAll(mismatch.begin(), mismatch.end()); Collection subreaders(checkSubreaders(valIdToItems, readerFieldToValIds)); insanity.addAll(subreaders.begin(), subreaders.end()); return insanity; } Collection FieldCacheSanityChecker::checkValueMismatch(MapSetIntFieldCacheEntry valIdToItems, MapSetReaderFieldInt readerFieldToValIds, SetReaderField valMismatchKeys) { Collection insanity(Collection::newInstance()); if (!valMismatchKeys.empty()) { // we have multiple values for some ReaderFields MapSetReaderFieldInt::map_type rfMap = readerFieldToValIds.getMap(); MapSetIntFieldCacheEntry::map_type valMap = valIdToItems.getMap(); for (SetReaderField::iterator rf = valMismatchKeys.begin(); rf != valMismatchKeys.end(); ++rf) { Collection badEntries(Collection::newInstance()); MapSetReaderFieldInt::set_type values(rfMap.get(*rf)); for (MapSetReaderFieldInt::set_type::iterator value = values.begin(); value != values.end(); ++value) { MapSetIntFieldCacheEntry::set_type cacheEntries(valMap.get(*value)); for (MapSetIntFieldCacheEntry::set_type::iterator cacheEntry = cacheEntries.begin(); cacheEntry != cacheEntries.end(); ++cacheEntry) { badEntries.add(*cacheEntry); } } insanity.add(newLucene(VALUEMISMATCH, L"Multiple distinct value objects for " + (*rf)->toString(), badEntries)); } } return insanity; } Collection FieldCacheSanityChecker::checkSubreaders(MapSetIntFieldCacheEntry valIdToItems, MapSetReaderFieldInt readerFieldToValIds) { Collection insanity(Collection::newInstance()); MapReaderFieldSetReaderField badChildren(MapReaderFieldSetReaderField::newInstance()); MapSetReaderFieldReaderField badKids(badChildren); // wrapper MapSetIntFieldCacheEntry::map_type viToItemSets = valIdToItems.getMap(); MapSetReaderFieldInt::map_type rfToValIdSets = readerFieldToValIds.getMap(); SetReaderField seen(SetReaderField::newInstance()); for (MapSetReaderFieldInt::map_type::iterator rf = rfToValIdSets.begin(); rf != rfToValIdSets.end(); ++rf) { if (seen.contains(rf->first)) { continue; } Collection kids(getAllDecendentReaderKeys(rf->first->readerKey)); for (Collection::iterator kidKey = kids.begin(); kidKey != kids.end(); ++kidKey) { ReaderFieldPtr kid(newLucene(*kidKey, rf->first->fieldName)); if (badChildren.contains(kid)) { // we've already process this kid as RF and found other problems track those problems as our own badKids.put(rf->first, kid); badKids.putAll(rf->first, badChildren.get(kid)); badChildren.remove(kid); } else if (rfToValIdSets.contains(kid)) { // we have cache entries for the kid badKids.put(rf->first, kid); } seen.add(kid); } seen.add(rf->first); } // every mapping in badKids represents an Insanity for (MapReaderFieldSetReaderField::iterator parent = badChildren.begin(); parent != badChildren.end(); ++parent) { SetReaderField kids = parent->second; Collection badEntries(Collection::newInstance()); // put parent entries in first MapSetReaderFieldInt::set_type values(rfToValIdSets.get(parent->first)); for (MapSetReaderFieldInt::set_type::iterator value = values.begin(); value != values.end(); ++value) { MapSetIntFieldCacheEntry::set_type cacheEntries(viToItemSets.get(*value)); badEntries.addAll(cacheEntries.begin(), cacheEntries.end()); } // now the entries for the descendants for (SetReaderField::iterator kid = kids.begin(); kid != kids.end(); ++kid) { MapSetReaderFieldInt::set_type values(rfToValIdSets.get(*kid)); for (MapSetReaderFieldInt::set_type::iterator value = values.begin(); value != values.end(); ++value) { MapSetIntFieldCacheEntry::set_type cacheEntries(viToItemSets.get(*value)); badEntries.addAll(cacheEntries.begin(), cacheEntries.end()); } } insanity.add(newLucene(SUBREADER, L"Found caches for descendants of " + parent->first->toString(), badEntries)); } return insanity; } Collection FieldCacheSanityChecker::getAllDecendentReaderKeys(const LuceneObjectPtr& seed) { Collection all(Collection::newInstance()); // will grow as we iter all.add(seed); for (int32_t i = 0; i < all.size(); ++i) { IndexReaderPtr indexReader(boost::dynamic_pointer_cast(all[i])); if (indexReader) { Collection subs(indexReader->getSequentialSubReaders()); for (int32_t j = 0; subs && j < subs.size(); ++j) { all.add(subs[j]->getFieldCacheKey()); } } } // need to remove the first, because it was the seed all.remove(all.begin()); return all; } ReaderField::ReaderField(const LuceneObjectPtr& readerKey, const String& fieldName) { this->readerKey = readerKey; this->fieldName = fieldName; } ReaderField::~ReaderField() { } int32_t ReaderField::hashCode() { return readerKey->hashCode() * StringUtils::hashCode(fieldName); } bool ReaderField::equals(const LuceneObjectPtr& other) { ReaderFieldPtr otherReaderField(boost::dynamic_pointer_cast(other)); if (!otherReaderField) { return false; } return (readerKey->equals(otherReaderField->readerKey) && fieldName == otherReaderField->fieldName); } String ReaderField::toString() { return readerKey->toString() + L"+" + fieldName; } Insanity::Insanity(FieldCacheSanityChecker::InsanityType type, const String& msg, Collection entries) { if (!entries || entries.empty()) { boost::throw_exception(IllegalArgumentException(L"Insanity requires non-null/non-empty CacheEntry[]")); } this->type = type; this->msg = msg; this->entries = entries; } Insanity::~Insanity() { } FieldCacheSanityChecker::InsanityType Insanity::getType() { return type; } String Insanity::getMsg() { return msg; } Collection Insanity::getCacheEntries() { return entries; } String Insanity::toString() { StringStream buffer; switch (type) { case FieldCacheSanityChecker::SUBREADER: buffer << L"SUBREADER: "; break; case FieldCacheSanityChecker::VALUEMISMATCH: buffer << L"VALUEMISMATCH: "; break; case FieldCacheSanityChecker::EXPECTED: buffer << L"EXPECTED: "; break; } buffer << msg << L"\n"; for (Collection::iterator ce = entries.begin(); ce != entries.end(); ++ce) { buffer << L"\t" << (*ce)->toString() << L"\n"; } return buffer.str(); } } LucenePlusPlus-rel_3.0.9/src/core/util/FileReader.cpp000066400000000000000000000036221456444476200225350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "FileReader.h" #include "MiscUtils.h" #include "FileUtils.h" #include "StringUtils.h" namespace Lucene { const int32_t FileReader::FILE_EOF = Reader::READER_EOF; const int32_t FileReader::FILE_ERROR = -1; FileReader::FileReader(const String& fileName) { this->file = newInstance(fileName, std::ios::binary | std::ios::in); if (!file->is_open()) { boost::throw_exception(FileNotFoundException(fileName)); } _length = FileUtils::fileLength(fileName); } FileReader::~FileReader() { } int32_t FileReader::read() { wchar_t buffer; return read(&buffer, 0, 1) == FILE_EOF ? FILE_EOF : buffer; } int32_t FileReader::read(wchar_t* buffer, int32_t offset, int32_t length) { try { if (file->eof()) { return FILE_EOF; } if (!fileBuffer) { fileBuffer = ByteArray::newInstance(length); } if (length > fileBuffer.size()) { fileBuffer.resize(length); } file->read((char*)fileBuffer.get(), length); int32_t readLength = file->gcount(); MiscUtils::arrayCopy(fileBuffer.get(), 0, buffer, offset, readLength); return readLength == 0 ? FILE_EOF : readLength; } catch (...) { return FILE_ERROR; } } void FileReader::close() { file->close(); } bool FileReader::markSupported() { return false; } void FileReader::reset() { file->clear(); file->seekg((std::streamoff)0); } int64_t FileReader::length() { return _length; } } LucenePlusPlus-rel_3.0.9/src/core/util/FileUtils.cpp000066400000000000000000000077461456444476200224460ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include #include #include "LuceneThread.h" #include "StringUtils.h" #include "FileUtils.h" #if defined(_WIN32) || defined(_WIN64) #include #include #include #else #include #endif namespace Lucene { namespace FileUtils { bool fileExists(const String& path) { boost::system::error_code ec; return boost::filesystem::exists(path.c_str(), ec); } uint64_t fileModified(const String& path) { boost::system::error_code ec; uint64_t t = (uint64_t)boost::filesystem::last_write_time(path.c_str(), ec); return ec ? 0 : t; } bool touchFile(const String& path) { boost::system::error_code ec; boost::filesystem::last_write_time(path.c_str(), time(NULL), ec); return !ec; } int64_t fileLength(const String& path) { boost::system::error_code ec; int64_t fileSize = (int64_t)boost::filesystem::file_size(path.c_str(), ec); for (int32_t i = 0; !ec && fileSize == 0 && i < 100; ++i) { LuceneThread::threadYield(); fileSize = (int64_t)boost::filesystem::file_size(path.c_str(), ec); } return ec ? 0 : fileSize; } bool setFileLength(const String& path, int64_t length) { if (!fileExists(path)) { return false; } #if defined(_WIN32) || defined(_WIN64) int32_t fd = _wopen(path.c_str(), _O_WRONLY | _O_CREAT | _O_BINARY, _S_IWRITE); return _chsize(fd, (long)length) == 0; #else return truncate(boost::filesystem::path(path).c_str(), (off_t)length) == 0; #endif } bool removeFile(const String& path) { boost::system::error_code ec; return boost::filesystem::remove(path.c_str(), ec); } bool copyFile(const String& source, const String& dest) { boost::system::error_code ec; boost::filesystem::copy_file(source.c_str(), dest.c_str(), ec); return !ec; } bool createDirectory(const String& path) { boost::system::error_code ec; return boost::filesystem::create_directory(path.c_str(), ec) && !ec; } bool removeDirectory(const String& path) { boost::system::error_code ec; boost::filesystem::remove_all(path.c_str(), ec); return !ec; } bool isDirectory(const String& path) { boost::system::error_code ec; return boost::filesystem::is_directory(path.c_str(), ec); } bool listDirectory(const String& path, bool filesOnly, HashSet dirList) { boost::system::error_code ec; boost::filesystem::directory_iterator dir(path.c_str(), ec); if (ec) { return false; } for (; dir != boost::filesystem::directory_iterator(); ++dir) { if (!filesOnly || !boost::filesystem::is_directory(dir->status())) { dirList.add(dir->path().filename().wstring().c_str()); } } return true; } bool copyDirectory(const String& source, const String& dest) { HashSet dirList(HashSet::newInstance()); if (!listDirectory(source, true, dirList)) { return false; } createDirectory(dest); for (HashSet::iterator file = dirList.begin(); file != dirList.end(); ++file) { copyFile(joinPath(source, *file), joinPath(dest, *file)); } return true; } String joinPath(const String& path, const String& file) { boost::filesystem::path join(path.c_str()); join /= file.c_str(); return join.wstring().c_str(); } String extractPath(const String& path) { boost::filesystem::wpath parentPath(path.c_str()); return parentPath.parent_path().wstring().c_str(); } String extractFile(const String& path) { boost::filesystem::wpath fileName(path.c_str()); return fileName.filename().wstring().c_str(); } } } LucenePlusPlus-rel_3.0.9/src/core/util/InfoStream.cpp000066400000000000000000000017311456444476200226010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "InfoStream.h" #include "StringUtils.h" namespace Lucene { InfoStream::InfoStream() { } InfoStream::~InfoStream() { } InfoStreamFile::InfoStreamFile(const String& path) : file(path) { } InfoStreamFile::~InfoStreamFile() { } InfoStreamFile& InfoStreamFile::operator<< (const String& t) { file << t; return *this; } InfoStreamOut::~InfoStreamOut() { } InfoStreamOut& InfoStreamOut::operator<< (const String& t) { std::wcout << t; return *this; } InfoStreamNull::~InfoStreamNull() { } InfoStreamNull& InfoStreamNull::operator<< (const String& t) { return *this; } } LucenePlusPlus-rel_3.0.9/src/core/util/InputStreamReader.cpp000066400000000000000000000021671456444476200241340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "InputStreamReader.h" #include "BufferedReader.h" #include "UTF8Stream.h" namespace Lucene { InputStreamReader::InputStreamReader(const ReaderPtr& reader) { this->reader = reader; this->decoder = newLucene(newLucene(reader, 1024)); } InputStreamReader::~InputStreamReader() { } int32_t InputStreamReader::read() { int32_t buffer; return read((wchar_t*)&buffer, 0, 1) == READER_EOF ? READER_EOF : buffer; } int32_t InputStreamReader::read(wchar_t* b, int32_t offset, int32_t length) { return decoder->decode(b + offset, length); } void InputStreamReader::close() { reader->close(); } bool InputStreamReader::markSupported() { return false; } void InputStreamReader::reset() { reader->reset(); } } LucenePlusPlus-rel_3.0.9/src/core/util/LuceneAllocator.cpp000066400000000000000000000022111456444476200236000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LuceneAllocator.h" namespace Lucene { void* AllocMemory(size_t size) { #if (defined(_WIN32) || defined(_WIN64)) && !defined(NDEBUG) return _malloc_dbg(size, _NORMAL_BLOCK, __FILE__, __LINE__); #else return malloc(size); #endif } void* ReallocMemory(void* memory, size_t size) { if (memory == NULL) { return AllocMemory(size); } if (size == 0) { FreeMemory(memory); return NULL; } #if defined(_WIN32) && !defined(NDEBUG) return _realloc_dbg(memory, size, _NORMAL_BLOCK, __FILE__, __LINE__); #else return realloc(memory, size); #endif } void FreeMemory(void* memory) { if (memory == NULL) { return; } #if defined(_WIN32) && !defined(NDEBUG) _free_dbg(memory, _NORMAL_BLOCK); #else free(memory); #endif } } LucenePlusPlus-rel_3.0.9/src/core/util/LuceneException.cpp000066400000000000000000000077751456444476200236420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LuceneException.h" #include "StringUtils.h" namespace Lucene { LuceneException::LuceneException(const String& error, ExceptionType type) throw() { this->error = error; this->type = type; SingleStringStream ss; ss << "LuceneException[" << type << "]: " << StringUtils::toUTF8(error); this->_what = ss.str(); } LuceneException::~LuceneException() throw() { } LuceneException::ExceptionType LuceneException::getType() const { return type; } String LuceneException::getError() const { return error; } bool LuceneException::isNull() const { return (type == Null); } void LuceneException::throwException() { switch (type) { case LuceneException::AlreadyClosed: boost::throw_exception(AlreadyClosedException(error, type)); case LuceneException::Compression: boost::throw_exception(CompressionException(error, type)); case LuceneException::CorruptIndex: boost::throw_exception(CorruptIndexException(error, type)); case LuceneException::FieldReader: boost::throw_exception(FieldReaderException(error, type)); case LuceneException::FileNotFound: boost::throw_exception(FileNotFoundException(error, type)); case LuceneException::IllegalArgument: boost::throw_exception(IllegalArgumentException(error, type)); case LuceneException::IllegalState: boost::throw_exception(IllegalStateException(error, type)); case LuceneException::IndexOutOfBounds: boost::throw_exception(IndexOutOfBoundsException(error, type)); case LuceneException::IO: boost::throw_exception(IOException(error, type)); case LuceneException::LockObtainFailed: boost::throw_exception(LockObtainFailedException(error, type)); case LuceneException::LockReleaseFailed: boost::throw_exception(LockReleaseFailedException(error, type)); case LuceneException::Lookahead: boost::throw_exception(LookaheadSuccess(error, type)); case LuceneException::MergeAborted: boost::throw_exception(MergeAbortedException(error, type)); case LuceneException::Merge: boost::throw_exception(MergeException(error, type)); case LuceneException::NoSuchDirectory: boost::throw_exception(NoSuchDirectoryException(error, type)); case LuceneException::NullPointer: boost::throw_exception(NullPointerException(error, type)); case LuceneException::NumberFormat: boost::throw_exception(NumberFormatException(error, type)); case LuceneException::OutOfMemory: boost::throw_exception(OutOfMemoryError(error, type)); case LuceneException::Parse: boost::throw_exception(ParseException(error, type)); case LuceneException::QueryParser: boost::throw_exception(QueryParserError(error, type)); case LuceneException::Runtime: boost::throw_exception(RuntimeException(error, type)); case LuceneException::StaleReader: boost::throw_exception(StaleReaderException(error, type)); case LuceneException::StopFillCache: boost::throw_exception(StopFillCacheException(error, type)); case LuceneException::Temporary: boost::throw_exception(TemporaryException(error, type)); case LuceneException::TimeExceeded: boost::throw_exception(TimeExceededException(error, type)); case LuceneException::TooManyClauses: boost::throw_exception(TooManyClausesException(error, type)); case LuceneException::UnsupportedOperation: boost::throw_exception(UnsupportedOperationException(error, type)); case LuceneException::Null: // silence static analyzer break; } } const char* LuceneException::what() const throw() { return _what.c_str(); } } LucenePlusPlus-rel_3.0.9/src/core/util/LuceneObject.cpp000066400000000000000000000021521456444476200230720ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LuceneObject.h" #include "StringUtils.h" namespace Lucene { LuceneObject::LuceneObject() { } LuceneObject::~LuceneObject() { } void LuceneObject::initialize() { // override } LuceneObjectPtr LuceneObject::clone(const LuceneObjectPtr& other) { if (!other) { boost::throw_exception(UnsupportedOperationException(L"clone must not be null")); } return other; } int32_t LuceneObject::hashCode() { return (int32_t)(int64_t)this; } bool LuceneObject::equals(const LuceneObjectPtr& other) { return (other && this == other.get()); } int32_t LuceneObject::compareTo(const LuceneObjectPtr& other) { return (int32_t)(this - other.get()); } String LuceneObject::toString() { return StringUtils::toString(hashCode()); } } LucenePlusPlus-rel_3.0.9/src/core/util/LuceneSignal.cpp000066400000000000000000000025731456444476200231100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LuceneSignal.h" #include "Synchronize.h" namespace Lucene { LuceneSignal::LuceneSignal(const SynchronizePtr& objectLock) { this->objectLock = objectLock; } LuceneSignal::~LuceneSignal() { } void LuceneSignal::createSignal(LuceneSignalPtr& signal, const SynchronizePtr& objectLock) { static boost::mutex lockMutex; boost::mutex::scoped_lock syncLock(lockMutex); if (!signal) { signal = newInstance(objectLock); } } void LuceneSignal::wait(int32_t timeout) { int32_t relockCount = objectLock ? objectLock->unlockAll() : 0; boost::mutex::scoped_lock waitLock(waitMutex); while (!signalCondition.timed_wait(waitMutex, boost::posix_time::milliseconds(timeout))) { if (timeout != 0 || signalCondition.timed_wait(waitMutex, boost::posix_time::milliseconds(10))) { break; } } for (int32_t relock = 0; relock < relockCount; ++relock) { objectLock->lock(); } } void LuceneSignal::notifyAll() { signalCondition.notify_all(); } } LucenePlusPlus-rel_3.0.9/src/core/util/LuceneSync.cpp000066400000000000000000000020401456444476200225740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LuceneSync.h" #include "Synchronize.h" #include "LuceneSignal.h" namespace Lucene { LuceneSync::~LuceneSync() { } SynchronizePtr LuceneSync::getSync() { Synchronize::createSync(objectLock); return objectLock; } LuceneSignalPtr LuceneSync::getSignal() { LuceneSignal::createSignal(objectSignal, getSync()); return objectSignal; } void LuceneSync::lock(int32_t timeout) { getSync()->lock(); } void LuceneSync::unlock() { getSync()->unlock(); } bool LuceneSync::holdsLock() { return getSync()->holdsLock(); } void LuceneSync::wait(int32_t timeout) { getSignal()->wait(timeout); } void LuceneSync::notifyAll() { getSignal()->notifyAll(); } } LucenePlusPlus-rel_3.0.9/src/core/util/LuceneThread.cpp000066400000000000000000000053571456444476200231050ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "LuceneThread.h" namespace Lucene { #if defined(_WIN32) || defined(_WIN64) const int32_t LuceneThread::MAX_THREAD_PRIORITY = THREAD_PRIORITY_HIGHEST; const int32_t LuceneThread::NORM_THREAD_PRIORITY = THREAD_PRIORITY_NORMAL; const int32_t LuceneThread::MIN_THREAD_PRIORITY = THREAD_PRIORITY_LOWEST; #else const int32_t LuceneThread::MAX_THREAD_PRIORITY = 2; const int32_t LuceneThread::NORM_THREAD_PRIORITY = 0; const int32_t LuceneThread::MIN_THREAD_PRIORITY = -2; #endif LuceneThread::LuceneThread() { running = false; } LuceneThread::~LuceneThread() { } void LuceneThread::start() { setRunning(false); thread = newInstance(LuceneThread::runThread, this); setRunning(true); } void LuceneThread::runThread(LuceneThread* thread) { LuceneThreadPtr threadObject(thread->shared_from_this()); try { threadObject->run(); } catch (...) { } threadObject->setRunning(false); threadObject.reset(); } void LuceneThread::setRunning(bool running) { SyncLock syncLock(this); this->running = running; } bool LuceneThread::isRunning() { SyncLock syncLock(this); return running; } bool LuceneThread::isAlive() { return (thread && isRunning()); } void LuceneThread::setPriority(int32_t priority) { #if defined(_WIN32) || defined(_WIN64) if (thread) { SetThreadPriority(thread->native_handle(), priority); } #endif } int32_t LuceneThread::getPriority() { #if defined(_WIN32) || defined(_WIN64) return thread ? GetThreadPriority(thread->native_handle()) : NORM_THREAD_PRIORITY; #else return NORM_THREAD_PRIORITY; #endif } void LuceneThread::yield() { if (thread) { thread->yield(); } } bool LuceneThread::join(int32_t timeout) { while (isAlive() && !thread->timed_join(boost::posix_time::milliseconds(timeout))) { if (timeout != 0) { return false; } if (thread->timed_join(boost::posix_time::milliseconds(10))) { return true; } } return true; } int64_t LuceneThread::currentId() { #if defined(_WIN32) || defined(_WIN64) return (int64_t)GetCurrentThreadId(); #else return (int64_t)pthread_self(); #endif } void LuceneThread::threadSleep(int32_t time) { boost::this_thread::sleep(boost::posix_time::milliseconds(time)); } void LuceneThread::threadYield() { boost::this_thread::yield(); } } LucenePlusPlus-rel_3.0.9/src/core/util/MiscUtils.cpp000066400000000000000000000077721456444476200224610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MiscUtils.h" #include "LuceneObject.h" namespace Lucene { const uint32_t MiscUtils::SINGLE_EXPONENT_MASK = 0x7f800000; const uint32_t MiscUtils::SINGLE_MANTISSA_MASK = 0x007fffff; const uint32_t MiscUtils::SINGLE_NAN_BITS = (MiscUtils::SINGLE_EXPONENT_MASK | 0x00400000); const uint64_t MiscUtils::DOUBLE_SIGN_MASK = 0x8000000000000000LL; const uint64_t MiscUtils::DOUBLE_EXPONENT_MASK = 0x7ff0000000000000LL; const uint64_t MiscUtils::DOUBLE_MANTISSA_MASK = 0x000fffffffffffffLL; const uint64_t MiscUtils::DOUBLE_NAN_BITS = DOUBLE_EXPONENT_MASK | 0x0008000000000000LL; uint64_t MiscUtils::getTimeMillis(boost::posix_time::ptime time) { return boost::posix_time::time_duration(time - boost::posix_time::ptime(boost::gregorian::date(1970, 1, 1))).total_milliseconds(); } uint64_t MiscUtils::currentTimeMillis() { return getTimeMillis(boost::posix_time::microsec_clock::universal_time()); } int32_t MiscUtils::getNextSize(int32_t targetSize) { return (targetSize >> 3) + (targetSize < 9 ? 3 : 6) + targetSize; } int32_t MiscUtils::getShrinkSize(int32_t currentSize, int32_t targetSize) { int32_t newSize = getNextSize(targetSize); return (newSize < currentSize / 2) ? newSize : currentSize; } int32_t MiscUtils::bytesDifference(uint8_t* bytes1, int32_t len1, uint8_t* bytes2, int32_t len2) { int32_t len = std::min(len1, len2); for (int32_t i = 0; i < len; ++i) { if (bytes1[i] != bytes2[i]) { return i; } } return len; } int32_t MiscUtils::hashCode(const wchar_t* array, int32_t start, int32_t end) { return hashCode(array + start, array + end, hashNumeric); } int32_t MiscUtils::hashCode(const uint8_t* array, int32_t start, int32_t end) { return hashCode(array + start, array + end, hashNumeric); } int32_t MiscUtils::hashCode(bool value) { return value ? 1231 : 1237; } int32_t MiscUtils::doubleToIntBits(double value) { int32_t intValue = 0; float floatValue = (float)value; std::memcpy(&intValue, &floatValue, sizeof(float)); if ((intValue & SINGLE_EXPONENT_MASK) == SINGLE_EXPONENT_MASK) { if (intValue & SINGLE_MANTISSA_MASK) { return SINGLE_NAN_BITS; } } return intValue; } int32_t MiscUtils::doubleToRawIntBits(double value) { int32_t intValue = 0; float floatValue = (float)value; std::memcpy(&intValue, &floatValue, sizeof(float)); return intValue; } double MiscUtils::intBitsToDouble(int32_t bits) { float floatValue = 0; std::memcpy(&floatValue, &bits, sizeof(int32_t)); return (double)floatValue; } int64_t MiscUtils::doubleToLongBits(double value) { int64_t longValue = 0; std::memcpy(&longValue, &value, sizeof(double)); if ((longValue & DOUBLE_EXPONENT_MASK) == DOUBLE_EXPONENT_MASK) { if (longValue & DOUBLE_MANTISSA_MASK) { return DOUBLE_NAN_BITS; } } return longValue; } int64_t MiscUtils::doubleToRawLongBits(double value) { int64_t longValue = 0; std::memcpy(&longValue, &value, sizeof(double)); return longValue; } double MiscUtils::longBitsToDouble(int64_t bits) { double doubleValue = 0; std::memcpy(&doubleValue, &bits, sizeof(int64_t)); return doubleValue; } bool MiscUtils::isInfinite(double value) { return (value == std::numeric_limits::infinity() || value == -std::numeric_limits::infinity()); } bool MiscUtils::isNaN(double value) { return (value != value); } bool MiscUtils::equalTypes(const LuceneObjectPtr& first, const LuceneObjectPtr& second) { const LuceneObject& firstRef(*first); const LuceneObject& secondRef(*second); return (typeid(firstRef) == typeid(secondRef)); } } LucenePlusPlus-rel_3.0.9/src/core/util/NumericUtils.cpp000066400000000000000000000230561456444476200231610ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "NumericUtils.h" #include "MiscUtils.h" #include "StringUtils.h" namespace Lucene { /// The default precision step used by {@link NumericField}, {@link NumericTokenStream}, {@link NumericRangeQuery}, /// and {@link NumericRangeFilter} as default. const int32_t NumericUtils::PRECISION_STEP_DEFAULT = 4; /// Longs are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_LONG + /// shift in the first character. const wchar_t NumericUtils::SHIFT_START_LONG = (wchar_t)0x20; /// The maximum term length (used for char[] buffer size) for encoding long values. /// @see #longToPrefixCoded(long,int,char[]) const int32_t NumericUtils::BUF_SIZE_LONG = 63 / 7 + 2; /// Integers are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_INT + /// shift in the first character. const wchar_t NumericUtils::SHIFT_START_INT = (wchar_t)0x60; /// The maximum term length (used for char[] buffer size) for encoding int values. /// @see #intToPrefixCoded(int,int,char[]) const int32_t NumericUtils::BUF_SIZE_INT = 31 / 7 + 2; NumericUtils::~NumericUtils() { } int32_t NumericUtils::longToPrefixCoded(int64_t val, int32_t shift, CharArray buffer) { if (shift > 63 || shift < 0) { boost::throw_exception(IllegalArgumentException(L"Illegal shift value, must be 0..63")); } int32_t nChars = (63 - shift) / 7 + 1; int32_t len = nChars + 1; buffer[0] = (wchar_t)(SHIFT_START_LONG + shift); int64_t sortableBits = val ^ 0x8000000000000000LL; sortableBits = MiscUtils::unsignedShift(sortableBits, (int64_t)shift); while (nChars >= 1) { // Store 7 bits per character for good efficiency when UTF-8 encoding. The whole number is // right-justified so that lucene can prefix-encode the terms more efficiently. buffer[nChars--] = (wchar_t)(sortableBits & 0x7f); sortableBits = MiscUtils::unsignedShift(sortableBits, (int64_t)7); } return len; } String NumericUtils::longToPrefixCoded(int64_t val, int32_t shift) { CharArray buffer(CharArray::newInstance(BUF_SIZE_LONG)); int32_t len = longToPrefixCoded(val, shift, buffer); return String(buffer.get(), len); } String NumericUtils::longToPrefixCoded(int64_t val) { return longToPrefixCoded(val, 0); } int32_t NumericUtils::intToPrefixCoded(int32_t val, int32_t shift, CharArray buffer) { if (shift > 31 || shift < 0) { boost::throw_exception(IllegalArgumentException(L"Illegal shift value, must be 0..31")); } int32_t nChars = (31 - shift) / 7 + 1; int32_t len = nChars + 1; buffer[0] = (wchar_t)(SHIFT_START_INT + shift); int32_t sortableBits = val ^ 0x80000000; sortableBits = MiscUtils::unsignedShift(sortableBits, shift); while (nChars >= 1) { // Store 7 bits per character for good efficiency when UTF-8 encoding. The whole number is // right-justified so that lucene can prefix-encode the terms more efficiently. buffer[nChars--] = (wchar_t)(sortableBits & 0x7f); sortableBits = MiscUtils::unsignedShift(sortableBits, 7); } return len; } String NumericUtils::intToPrefixCoded(int32_t val, int32_t shift) { CharArray buffer(CharArray::newInstance(BUF_SIZE_INT)); int32_t len = intToPrefixCoded(val, shift, buffer); return String(buffer.get(), len); } String NumericUtils::intToPrefixCoded(int32_t val) { return intToPrefixCoded(val, 0); } int64_t NumericUtils::prefixCodedToLong(const String& prefixCoded) { int32_t shift = prefixCoded[0] - SHIFT_START_LONG; if (shift > 63 || shift < 0) { boost::throw_exception(NumberFormatException(L"Invalid shift value in prefixCoded string (is encoded value really a LONG?)")); } int64_t sortableBits = 0; for (int32_t i = 1, len = prefixCoded.length(); i < len; ++i) { sortableBits <<= 7; wchar_t ch = prefixCoded[i]; if (ch > 0x7f) { boost::throw_exception(NumberFormatException(L"Invalid prefixCoded numerical value representation (char " + StringUtils::toString(ch, 16) + L" at position " + StringUtils::toString(i) + L" is invalid)")); } sortableBits |= (int64_t)ch; } return (sortableBits << shift) ^ 0x8000000000000000LL; } int32_t NumericUtils::prefixCodedToInt(const String& prefixCoded) { int32_t shift = prefixCoded[0] - SHIFT_START_INT; if (shift > 31 || shift < 0) { boost::throw_exception(NumberFormatException(L"Invalid shift value in prefixCoded string (is encoded value really a INT?)")); } int32_t sortableBits = 0; for (int32_t i = 1, len = prefixCoded.length(); i < len; ++i) { sortableBits <<= 7; wchar_t ch = prefixCoded[i]; if (ch > 0x7f) { boost::throw_exception(NumberFormatException(L"Invalid prefixCoded numerical value representation (char " + StringUtils::toString(ch, 16) + L" at position " + StringUtils::toString(i) + L" is invalid)")); } sortableBits |= (int32_t)ch; } return (sortableBits << shift) ^ 0x80000000; } int64_t NumericUtils::doubleToSortableLong(double val) { int64_t f = MiscUtils::doubleToRawLongBits(val); if (f < 0) { f ^= 0x7fffffffffffffffLL; } return f; } String NumericUtils::doubleToPrefixCoded(double val) { return longToPrefixCoded(doubleToSortableLong(val)); } double NumericUtils::sortableLongToDouble(int64_t val) { if (val < 0) { val ^= 0x7fffffffffffffffLL; } return MiscUtils::longBitsToDouble(val); } double NumericUtils::prefixCodedToDouble(const String& val) { return sortableLongToDouble(prefixCodedToLong(val)); } void NumericUtils::splitLongRange(const LongRangeBuilderPtr& builder, int32_t precisionStep, int64_t minBound, int64_t maxBound) { splitRange(builder, 64, precisionStep, minBound, maxBound); } void NumericUtils::splitIntRange(const IntRangeBuilderPtr& builder, int32_t precisionStep, int32_t minBound, int32_t maxBound) { splitRange(builder, 32, precisionStep, (int64_t)minBound, (int64_t)maxBound); } void NumericUtils::splitRange(const LuceneObjectPtr& builder, int32_t valSize, int32_t precisionStep, int64_t minBound, int64_t maxBound) { if (precisionStep < 1) { boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); } if (minBound > maxBound) { return; } for (int32_t shift = 0; ; shift += precisionStep) { // calculate new bounds for inner precision int64_t diff = (int64_t)1 << (shift + precisionStep); int64_t mask = (((int64_t)1 << precisionStep) - (int64_t)1) << shift; bool hasLower = ((minBound & mask) != 0); bool hasUpper = ((maxBound & mask) != mask); int64_t nextMinBound = ((hasLower ? (minBound + diff) : minBound) & ~mask); int64_t nextMaxBound = ((hasUpper ? (maxBound - diff) : maxBound) & ~mask); bool lowerWrapped = nextMinBound < minBound; bool upperWrapped = nextMaxBound > maxBound; if (shift + precisionStep >= valSize || nextMinBound>nextMaxBound || lowerWrapped || upperWrapped) { // We are in the lowest precision or the next precision is not available. addRange(builder, valSize, minBound, maxBound, shift); break; // exit the split recursion loop } if (hasLower) { addRange(builder, valSize, minBound, minBound | mask, shift); } if (hasUpper) { addRange(builder, valSize, maxBound & ~mask, maxBound, shift); } // recurse to next precision minBound = nextMinBound; maxBound = nextMaxBound; } } void NumericUtils::addRange(const LuceneObjectPtr& builder, int32_t valSize, int64_t minBound, int64_t maxBound, int32_t shift) { // for the max bound set all lower bits (that were shifted away): this is important for testing or other // usages of the splitted range (eg. to reconstruct the full range). The prefixEncoding will remove the // bits anyway, so they do not hurt! maxBound |= ((int64_t)1 << shift) - (int64_t)1; // delegate to correct range builder switch (valSize) { case 64: boost::dynamic_pointer_cast(builder)->addRange(minBound, maxBound, shift); break; case 32: boost::dynamic_pointer_cast(builder)->addRange((int32_t)minBound, (int32_t)maxBound, shift); break; default: boost::throw_exception(IllegalArgumentException(L"valSize must be 32 or 64.")); } } LongRangeBuilder::~LongRangeBuilder() { } void LongRangeBuilder::addRange(const String& minPrefixCoded, const String& maxPrefixCoded) { boost::throw_exception(UnsupportedOperationException()); } void LongRangeBuilder::addRange(int64_t min, int64_t max, int32_t shift) { addRange(NumericUtils::longToPrefixCoded(min, shift), NumericUtils::longToPrefixCoded(max, shift)); } IntRangeBuilder::~IntRangeBuilder() { } void IntRangeBuilder::addRange(const String& minPrefixCoded, const String& maxPrefixCoded) { boost::throw_exception(UnsupportedOperationException()); } void IntRangeBuilder::addRange(int32_t min, int32_t max, int32_t shift) { addRange(NumericUtils::intToPrefixCoded(min, shift), NumericUtils::intToPrefixCoded(max, shift)); } } LucenePlusPlus-rel_3.0.9/src/core/util/OpenBitSet.cpp000066400000000000000000000400361456444476200225470ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "OpenBitSet.h" #include "OpenBitSetIterator.h" #include "BitUtil.h" #include "MiscUtils.h" namespace Lucene { OpenBitSet::OpenBitSet(int64_t numBits) { bits = LongArray::newInstance(bits2words(numBits)); MiscUtils::arrayFill(bits.get(), 0, bits.size(), 0LL); wlen = bits.size(); } OpenBitSet::OpenBitSet(LongArray bits, int32_t numWords) { this->bits = bits; this->wlen = numWords; } OpenBitSet::~OpenBitSet() { } DocIdSetIteratorPtr OpenBitSet::iterator() { return newLucene(bits, wlen); } bool OpenBitSet::isCacheable() { return true; } int64_t OpenBitSet::capacity() { return bits.size() << 6; } int64_t OpenBitSet::size() { return capacity(); } bool OpenBitSet::isEmpty() { return (cardinality() == 0); } LongArray OpenBitSet::getBits() { return bits; } void OpenBitSet::setBits(LongArray bits) { this->bits = bits; } int32_t OpenBitSet::getNumWords() { return wlen; } void OpenBitSet::setNumWords(int32_t numWords) { this->wlen = numWords; } bool OpenBitSet::get(int32_t index) { int32_t i = index >> 6; // div 64 // signed shift will keep a negative index and force an array-index-out-of-bounds-exception, // removing the need for an explicit check. if (i >= bits.size()) { return false; } int32_t bit = (index & 0x3f); // mod 64 int64_t bitmask = 1LL << bit; return ((bits[i] & bitmask) != 0); } bool OpenBitSet::fastGet(int32_t index) { int32_t i = index >> 6; // div 64 // signed shift will keep a negative index and force an array-index-out-of-bounds-exception, // removing the need for an explicit check. int32_t bit = (index & 0x3f); // mod 64 int64_t bitmask = 1LL << bit; return ((bits[i] & bitmask) != 0); } bool OpenBitSet::get(int64_t index) { int32_t i = (int32_t)(index >> 6); // div 64 if (i >= bits.size()) { return false; } int32_t bit = ((int32_t)index & 0x3f); // mod 64 int64_t bitmask = 1LL << bit; return ((bits[i] & bitmask) != 0); } bool OpenBitSet::fastGet(int64_t index) { int32_t i = (int32_t)(index >> 6); // div 64 int32_t bit = ((int32_t)index & 0x3f); // mod 64 int64_t bitmask = 1LL << bit; return ((bits[i] & bitmask) != 0); } int32_t OpenBitSet::getBit(int32_t index) { int32_t i = index >> 6; // div 64 int32_t bit = (index & 0x3f); // mod 64 return (int32_t)MiscUtils::unsignedShift(bits[i], (int64_t)bit) & 0x01; } void OpenBitSet::set(int64_t index) { int32_t wordNum = expandingWordNum(index); int32_t bit = (int32_t)index & 0x3f; int64_t bitmask = 1LL << bit; bits[wordNum] |= bitmask; } void OpenBitSet::fastSet(int32_t index) { int32_t wordNum = index >> 6; // div 64 int32_t bit = index & 0x3f; int64_t bitmask = 1LL << bit; bits[wordNum] |= bitmask; } void OpenBitSet::fastSet(int64_t index) { int32_t wordNum = (int32_t)(index >> 6); int32_t bit = (int32_t)index & 0x3f; int64_t bitmask = 1LL << bit; bits[wordNum] |= bitmask; } void OpenBitSet::set(int64_t startIndex, int64_t endIndex) { if (endIndex <= startIndex) { return; } int32_t startWord = (int32_t)(startIndex >> 6); // since endIndex is one past the end, this is index of the last word to be changed int32_t endWord = expandingWordNum(endIndex - 1); int64_t startmask = -1LL << (startIndex & 0x3f); int64_t endmask = MiscUtils::unsignedShift(-1LL, -endIndex); if (startWord == endWord) { bits[startWord] |= (startmask & endmask); return; } bits[startWord] |= startmask; MiscUtils::arrayFill(bits.get(), startWord + 1, endWord, -1LL); bits[endWord] |= endmask; } int32_t OpenBitSet::expandingWordNum(int64_t index) { int32_t wordNum = (int32_t)(index >> 6); if (wordNum >= wlen) { ensureCapacity(index + 1); wlen = wordNum + 1; } return wordNum; } void OpenBitSet::fastClear(int32_t index) { int32_t wordNum = index >> 6; int32_t bit = (index & 0x03f); int64_t bitmask = 1LL << bit; bits[wordNum] &= ~bitmask; } void OpenBitSet::fastClear(int64_t index) { int32_t wordNum = (int32_t)(index >> 6); int32_t bit = (int32_t)index & 0x3f; int64_t bitmask = 1LL << bit; bits[wordNum] &= ~bitmask; } void OpenBitSet::clear(int64_t index) { int32_t wordNum = (int32_t)(index >> 6); if (wordNum >= wlen) { return; } int32_t bit = (int32_t)index & 0x3f; int64_t bitmask = 1LL << bit; bits[wordNum] &= ~bitmask; } void OpenBitSet::clear(int32_t startIndex, int32_t endIndex) { if (endIndex <= startIndex) { return; } int32_t startWord = (startIndex >> 6); if (startWord >= wlen) { return; } // since endIndex is one past the end, this is index of the last word to be changed. int32_t endWord = ((endIndex - 1) >> 6); int64_t startmask = -1LL << (startIndex & 0x3f); int64_t endmask = MiscUtils::unsignedShift(-1LL, (int64_t)-endIndex); // invert masks since we are clearing startmask = ~startmask; endmask = ~endmask; if (startWord == endWord) { bits[startWord] &= (startmask | endmask); return; } bits[startWord] &= startmask; int32_t middle = std::min(wlen, endWord); MiscUtils::arrayFill(bits.get(), startWord + 1, middle, 0LL); if (endWord < wlen) { bits[endWord] &= endmask; } } void OpenBitSet::clear(int64_t startIndex, int64_t endIndex) { if (endIndex <= startIndex) { return; } int32_t startWord = (int32_t)(startIndex>>6); if (startWord >= wlen) { return; } // since endIndex is one past the end, this is index of the last word to be changed. int32_t endWord = (int32_t)((endIndex - 1) >> 6); int64_t startmask = -1LL << (startIndex & 0x3f); int64_t endmask = MiscUtils::unsignedShift(-1LL, -endIndex); // invert masks since we are clearing startmask = ~startmask; endmask = ~endmask; if (startWord == endWord) { bits[startWord] &= (startmask | endmask); return; } bits[startWord] &= startmask; int32_t middle = std::min(wlen, endWord); MiscUtils::arrayFill(bits.get(), startWord + 1, middle, 0LL); if (endWord < wlen) { bits[endWord] &= endmask; } } bool OpenBitSet::getAndSet(int32_t index) { int32_t wordNum = index >> 6; // div 64 int32_t bit = (index & 0x3f); // mod 64 int64_t bitmask = 1LL << bit; bool val = ((bits[wordNum] & bitmask) != 0); bits[wordNum] |= bitmask; return val; } bool OpenBitSet::getAndSet(int64_t index) { int32_t wordNum = (int32_t)(index >> 6); // div 64 int32_t bit = (int32_t)index & 0x3f; // mod 64 int64_t bitmask = 1LL << bit; bool val = ((bits[wordNum] & bitmask) != 0); bits[wordNum] |= bitmask; return val; } void OpenBitSet::fastFlip(int32_t index) { int32_t wordNum = index >> 6; // div 64 int32_t bit = (index & 0x3f); // mod 64 int64_t bitmask = 1LL << bit; bits[wordNum] ^= bitmask; } void OpenBitSet::fastFlip(int64_t index) { int32_t wordNum = (int32_t)(index >> 6); // div 64 int32_t bit = (int32_t)index & 0x3f; // mod 64 int64_t bitmask = 1LL << bit; bits[wordNum] ^= bitmask; } void OpenBitSet::flip(int64_t index) { int32_t wordNum = expandingWordNum(index); int32_t bit = (int32_t)index & 0x3f; // mod 64 int64_t bitmask = 1LL << bit; bits[wordNum] ^= bitmask; } bool OpenBitSet::flipAndGet(int32_t index) { int32_t wordNum = index >> 6; // div 64 int32_t bit = (index & 0x3f); // mod 64 int64_t bitmask = 1LL << bit; bits[wordNum] ^= bitmask; return ((bits[wordNum] & bitmask) != 0); } bool OpenBitSet::flipAndGet(int64_t index) { int32_t wordNum = (int32_t)(index >> 6); // div 64 int32_t bit = (int32_t)index & 0x3f; // mod 64 int64_t bitmask = 1LL << bit; bits[wordNum] ^= bitmask; return ((bits[wordNum] & bitmask) != 0); } void OpenBitSet::flip(int64_t startIndex, int64_t endIndex) { if (endIndex <= startIndex) { return; } int32_t startWord = (int32_t)(startIndex >> 6); // since endIndex is one past the end, this is index of the last word to be changed. int32_t endWord = expandingWordNum(endIndex - 1); int64_t startmask = -1LL << (startIndex & 0x3f); int64_t endmask = MiscUtils::unsignedShift(-1LL, -endIndex); if (startWord == endWord) { bits[startWord] ^= (startmask & endmask); return; } bits[startWord] ^= startmask; for (int32_t i = startWord + 1; i < endWord; ++i) { bits[i] = ~bits[i]; } bits[endWord] ^= endmask; } int64_t OpenBitSet::cardinality() { return BitUtil::pop_array(bits.get(), 0, wlen); } int64_t OpenBitSet::intersectionCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b) { return BitUtil::pop_intersect(a->bits.get(), b->bits.get(), 0, std::min(a->wlen, b->wlen)); } int64_t OpenBitSet::unionCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b) { int64_t tot = BitUtil::pop_union(a->bits.get(), b->bits.get(), 0, std::min(a->wlen, b->wlen)); if (a->wlen < b->wlen) { tot += BitUtil::pop_array(b->bits.get(), a->wlen, b->wlen - a->wlen); } else if (a->wlen > b->wlen) { tot += BitUtil::pop_array(a->bits.get(), b->wlen, a->wlen - b->wlen); } return tot; } int64_t OpenBitSet::andNotCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b) { int64_t tot = BitUtil::pop_andnot(a->bits.get(), b->bits.get(), 0, std::min(a->wlen, b->wlen)); if (a->wlen > b->wlen) { tot += BitUtil::pop_array(a->bits.get(), b->wlen, a->wlen - b->wlen); } return tot; } int64_t OpenBitSet::xorCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b) { int64_t tot = BitUtil::pop_xor(a->bits.get(), b->bits.get(), 0, std::min(a->wlen, b->wlen)); if (a->wlen < b->wlen) { tot += BitUtil::pop_array(b->bits.get(), a->wlen, b->wlen - a->wlen); } else if (a->wlen > b->wlen) { tot += BitUtil::pop_array(a->bits.get(), b->wlen, a->wlen - b->wlen); } return tot; } int32_t OpenBitSet::nextSetBit(int32_t index) { int32_t i = MiscUtils::unsignedShift(index, 6); if (i >= wlen) { return -1; } int32_t subIndex = (index & 0x3f); // index within the word int64_t word = MiscUtils::unsignedShift(bits[i], (int64_t)subIndex); // skip all the bits to the right of index if (word != 0) { return (i << 6) + subIndex + BitUtil::ntz(word); } while (++i < wlen) { word = bits[i]; if (word != 0) { return (i << 6) + BitUtil::ntz(word); } } return -1; } int64_t OpenBitSet::nextSetBit(int64_t index) { int32_t i = (int32_t)(index >> 6); if (i >= wlen) { return -1; } int32_t subIndex = (int32_t)index & 0x3f; // index within the word int64_t word = bits[i] >> subIndex; // skip all the bits to the right of index if (word != 0) { return ((int64_t)i << 6) + (subIndex + BitUtil::ntz(word)); } while (++i < wlen) { word = bits[i]; if (word != 0) { return ((int64_t)i << 6) + BitUtil::ntz(word); } } return -1; } LuceneObjectPtr OpenBitSet::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(); OpenBitSetPtr cloneSet(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); cloneSet->wlen = wlen; cloneSet->bits = LongArray::newInstance(bits.size()); MiscUtils::arrayCopy(bits.get(), 0, cloneSet->bits.get(), 0, bits.size()); return cloneSet; } void OpenBitSet::intersect(const OpenBitSetPtr& other) { int32_t newLen= std::min(this->wlen, other->wlen); LongArray thisArr = this->bits; LongArray otherArr = other->bits; // testing against zero can be more efficient int32_t pos = newLen; while (--pos >= 0) { thisArr[pos] &= otherArr[pos]; } if (this->wlen > newLen) { // fill zeros from the new shorter length to the old length MiscUtils::arrayFill(bits.get(), newLen, this->wlen, 0LL); } this->wlen = newLen; } void OpenBitSet::_union(const OpenBitSetPtr& other) { int32_t newLen = std::max(wlen, other->wlen); ensureCapacityWords(newLen); LongArray thisArr = this->bits; LongArray otherArr = other->bits; int32_t pos = std::min(wlen, other->wlen); while (--pos >= 0) { thisArr[pos] |= otherArr[pos]; } if (this->wlen < newLen) { MiscUtils::arrayCopy(otherArr.get(), this->wlen, thisArr.get(), this->wlen, newLen - this->wlen); } this->wlen = newLen; } void OpenBitSet::remove(const OpenBitSetPtr& other) { int32_t idx = std::min(wlen, other->wlen); LongArray thisArr = this->bits; LongArray otherArr = other->bits; while (--idx >= 0) { thisArr[idx] &= ~otherArr[idx]; } } void OpenBitSet::_xor(const OpenBitSetPtr& other) { int32_t newLen = std::max(wlen, other->wlen); ensureCapacityWords(newLen); LongArray thisArr = this->bits; LongArray otherArr = other->bits; int32_t pos = std::min(wlen, other->wlen); while (--pos >= 0) { thisArr[pos] ^= otherArr[pos]; } if (this->wlen < newLen) { MiscUtils::arrayCopy(otherArr.get(), this->wlen, thisArr.get(), this->wlen, newLen - this->wlen); } this->wlen = newLen; } void OpenBitSet::_and(const OpenBitSetPtr& other) { intersect(other); } void OpenBitSet::_or(const OpenBitSetPtr& other) { _union(other); } void OpenBitSet::andNot(const OpenBitSetPtr& other) { remove(other); } bool OpenBitSet::intersects(const OpenBitSetPtr& other) { int32_t pos = std::min(this->wlen, other->wlen); LongArray thisArr = this->bits; LongArray otherArr = other->bits; while (--pos >= 0) { if ((thisArr[pos] & otherArr[pos]) !=0 ) { return true; } } return false; } void OpenBitSet::ensureCapacityWords(int32_t numWords) { int32_t length = bits.size(); if (length < numWords) { bits.resize(MiscUtils::getNextSize(numWords)); MiscUtils::arrayFill(bits.get(), length, bits.size(), 0LL); } } void OpenBitSet::ensureCapacity(int64_t numBits) { ensureCapacityWords(bits2words(numBits)); } void OpenBitSet::trimTrailingZeros() { int32_t idx = wlen - 1; while (idx >= 0 && bits[idx] == 0) { --idx; } wlen = idx + 1; } int32_t OpenBitSet::bits2words(int64_t numBits) { return (int32_t)(MiscUtils::unsignedShift(numBits - 1, (int64_t)6) + 1); } bool OpenBitSet::equals(const LuceneObjectPtr& other) { if (LuceneObject::equals(other)) { return true; } OpenBitSetPtr otherBitSet(boost::dynamic_pointer_cast(other)); if (!otherBitSet) { return false; } OpenBitSetPtr a; OpenBitSetPtr b = otherBitSet; // make a the larger set if (b->wlen > this->wlen) { a = b; b = shared_from_this(); } else { a = shared_from_this(); } // check for any set bits out of the range of b for (int32_t i = a->wlen - 1; i >= b->wlen; --i) { if (a->bits[i] !=0 ) { return false; } } for (int32_t i = b->wlen - 1; i >= 0; --i) { if (a->bits[i] != b->bits[i]) { return false; } } return true; } int32_t OpenBitSet::hashCode() { // Start with a zero hash and use a mix that results in zero if the input is zero. // This effectively truncates trailing zeros without an explicit check. int64_t hash = 0; for (int32_t i = bits.size(); --i >= 0;) { hash ^= bits[i]; hash = (hash << 1) | MiscUtils::unsignedShift(hash, (int64_t)63); // rotate left } // Fold leftmost bits into right and add a constant to prevent empty sets from // returning 0, which is too common. return (int32_t)((hash >> 32) ^ hash) + 0x98761234; } } LucenePlusPlus-rel_3.0.9/src/core/util/OpenBitSetDISI.cpp000066400000000000000000000031221456444476200232130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "OpenBitSetDISI.h" namespace Lucene { OpenBitSetDISI::OpenBitSetDISI(const DocIdSetIteratorPtr& disi, int32_t maxSize) : OpenBitSet(maxSize) { inPlaceOr(disi); } OpenBitSetDISI::OpenBitSetDISI(int32_t maxSize) : OpenBitSet(maxSize) { } OpenBitSetDISI::~OpenBitSetDISI() { } void OpenBitSetDISI::inPlaceOr(const DocIdSetIteratorPtr& disi) { int32_t doc; int32_t _size = size(); while ((doc = disi->nextDoc()) < _size) { set(doc); } } void OpenBitSetDISI::inPlaceAnd(const DocIdSetIteratorPtr& disi) { int32_t bitSetDoc = nextSetBit((int32_t)0); int32_t disiDoc; while (bitSetDoc != -1 && (disiDoc = disi->advance(bitSetDoc)) != DocIdSetIterator::NO_MORE_DOCS) { clear(bitSetDoc, disiDoc); bitSetDoc = nextSetBit(disiDoc + 1); } if (bitSetDoc != -1) { clear((int64_t)bitSetDoc, size()); } } void OpenBitSetDISI::inPlaceNot(const DocIdSetIteratorPtr& disi) { int32_t doc; int32_t _size = size(); while ((doc = disi->nextDoc()) < _size) { clear(doc); } } void OpenBitSetDISI::inPlaceXor(const DocIdSetIteratorPtr& disi) { int32_t doc; int32_t _size = size(); while ((doc = disi->nextDoc()) < _size) { flip(doc); } } } LucenePlusPlus-rel_3.0.9/src/core/util/OpenBitSetIterator.cpp000066400000000000000000000127221456444476200242620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "OpenBitSetIterator.h" #include "OpenBitSet.h" #include "MiscUtils.h" namespace Lucene { /// The General Idea: instead of having an array per byte that has the offsets of the /// next set bit, that array could be packed inside a 32 bit integer (8 4 bit numbers). /// That should be faster than accessing an array for each index, and the total array /// size is kept smaller (256*sizeof(int32_t))=1K const int32_t OpenBitSetIterator::bitlist[] = { 0x0, 0x1, 0x2, 0x21, 0x3, 0x31, 0x32, 0x321, 0x4, 0x41, 0x42, 0x421, 0x43, 0x431, 0x432, 0x4321, 0x5, 0x51, 0x52, 0x521, 0x53, 0x531, 0x532, 0x5321, 0x54, 0x541, 0x542, 0x5421, 0x543, 0x5431, 0x5432, 0x54321, 0x6, 0x61, 0x62, 0x621, 0x63, 0x631, 0x632, 0x6321, 0x64, 0x641, 0x642, 0x6421, 0x643, 0x6431, 0x6432, 0x64321, 0x65, 0x651, 0x652, 0x6521, 0x653, 0x6531, 0x6532, 0x65321, 0x654, 0x6541, 0x6542, 0x65421, 0x6543, 0x65431, 0x65432, 0x654321, 0x7, 0x71, 0x72, 0x721, 0x73, 0x731, 0x732, 0x7321, 0x74, 0x741, 0x742, 0x7421, 0x743, 0x7431, 0x7432, 0x74321, 0x75, 0x751, 0x752, 0x7521, 0x753, 0x7531, 0x7532, 0x75321, 0x754, 0x7541, 0x7542, 0x75421, 0x7543, 0x75431, 0x75432, 0x754321, 0x76, 0x761, 0x762, 0x7621, 0x763, 0x7631, 0x7632, 0x76321, 0x764, 0x7641, 0x7642, 0x76421, 0x7643, 0x76431, 0x76432, 0x764321, 0x765, 0x7651, 0x7652, 0x76521, 0x7653, 0x76531, 0x76532, 0x765321, 0x7654, 0x76541, 0x76542, 0x765421, 0x76543, 0x765431, 0x765432, 0x7654321, 0x8, 0x81, 0x82, 0x821, 0x83, 0x831, 0x832, 0x8321, 0x84, 0x841, 0x842, 0x8421, 0x843, 0x8431, 0x8432, 0x84321, 0x85, 0x851, 0x852, 0x8521, 0x853, 0x8531, 0x8532, 0x85321, 0x854, 0x8541, 0x8542, 0x85421, 0x8543, 0x85431, 0x85432, 0x854321, 0x86, 0x861, 0x862, 0x8621, 0x863, 0x8631, 0x8632, 0x86321, 0x864, 0x8641, 0x8642, 0x86421, 0x8643, 0x86431, 0x86432, 0x864321, 0x865, 0x8651, 0x8652, 0x86521, 0x8653, 0x86531, 0x86532, 0x865321, 0x8654, 0x86541, 0x86542, 0x865421, 0x86543, 0x865431, 0x865432, 0x8654321, 0x87, 0x871, 0x872, 0x8721, 0x873, 0x8731, 0x8732, 0x87321, 0x874, 0x8741, 0x8742, 0x87421, 0x8743, 0x87431, 0x87432, 0x874321, 0x875, 0x8751, 0x8752, 0x87521, 0x8753, 0x87531, 0x87532, 0x875321, 0x8754, 0x87541, 0x87542, 0x875421, 0x87543, 0x875431, 0x875432, 0x8754321, 0x876, 0x8761, 0x8762, 0x87621, 0x8763, 0x87631, 0x87632, 0x876321, 0x8764, 0x87641, 0x87642, 0x876421, 0x87643, 0x876431, 0x876432, 0x8764321, 0x8765, 0x87651, 0x87652, 0x876521, 0x87653, 0x876531, 0x876532, 0x8765321, 0x87654, 0x876541, 0x876542, 0x8765421, 0x876543, 0x8765431, 0x8765432, static_cast(0x87654321) }; OpenBitSetIterator::OpenBitSetIterator(const OpenBitSetPtr& bitSet) { arr = bitSet->getBits(); words = bitSet->getNumWords(); i = -1; word = 0; wordShift = 0; indexArray = 0; curDocId = -1; } OpenBitSetIterator::OpenBitSetIterator(LongArray bits, int32_t numWords) { arr = bits; words = numWords; i = -1; word = 0; wordShift = 0; indexArray = 0; curDocId = -1; } OpenBitSetIterator::~OpenBitSetIterator() { } void OpenBitSetIterator::shift() { if ((int32_t)word == 0) { wordShift += 32; word = MiscUtils::unsignedShift(word, (int64_t)32); } if ((word & 0x0000ffff) == 0) { wordShift += 16; word = MiscUtils::unsignedShift(word, (int64_t)16); } if ((word & 0x000000ff) == 0) { wordShift += 8; word = MiscUtils::unsignedShift(word, (int64_t)8); } indexArray = bitlist[(int32_t)word & 0xff]; } int32_t OpenBitSetIterator::nextDoc() { if (indexArray == 0) { if (word != 0) { word = MiscUtils::unsignedShift(word, (int64_t)8); wordShift += 8; } while (word == 0) { if (++i >= words) { curDocId = NO_MORE_DOCS; return curDocId; } word = arr[i]; wordShift = -1; // loop invariant code motion should move this } // after the first time, should I go with a linear search, or stick with the binary search in shift? shift(); } int32_t bitIndex = (indexArray & 0x0f) + wordShift; indexArray = MiscUtils::unsignedShift(indexArray, 4); curDocId = (i << 6) + bitIndex; return curDocId; } int32_t OpenBitSetIterator::advance(int32_t target) { indexArray = 0; i = target >> 6; if (i >= words) { word = 0; // setup so next() will also return -1 curDocId = NO_MORE_DOCS; return curDocId; } wordShift = target & 0x3f; word = MiscUtils::unsignedShift(arr[i], (int64_t)wordShift); if (word != 0) { --wordShift; // compensate for 1 based arrIndex } else { while (word == 0) { if (++i >= words) { curDocId = NO_MORE_DOCS; return curDocId; } word = arr[i]; } wordShift = -1; } shift(); int32_t bitIndex = (indexArray & 0x0f) + wordShift; indexArray = MiscUtils::unsignedShift(indexArray, 4); curDocId = (i << 6) + bitIndex; return curDocId; } int32_t OpenBitSetIterator::docID() { return curDocId; } } LucenePlusPlus-rel_3.0.9/src/core/util/Random.cpp000066400000000000000000000024061456444476200217520ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Random.h" #include "MiscUtils.h" namespace Lucene { Random::Random() { this->seed = (int64_t)MiscUtils::currentTimeMillis(); } Random::Random(int64_t seed) { this->seed = seed; } Random::~Random() { } void Random::setSeed(int64_t seed) { this->seed = (seed ^ 0x5deece66dLL) & (((int64_t)1 << 48) - 1); } int32_t Random::nextInt(int32_t limit) { if ((limit & -limit) == limit) { return (int32_t)((limit * (int64_t)next(31)) >> 31); } int32_t bits = 0; int32_t val = 0; do { bits = next(31); val = bits % limit; } while (bits - val + (limit - 1) < 0); return val; } double Random::nextDouble() { return ((double)(((int64_t)next(26) << 27) + next(27)) / (double)((int64_t)1 << 53)); } int32_t Random::next(int32_t bits) { seed = (seed * 0x5deece66dLL + 0xb) & (((int64_t)1 << 48) - 1); return (int32_t)(seed >> (48 - bits)); } } LucenePlusPlus-rel_3.0.9/src/core/util/Reader.cpp000066400000000000000000000015761456444476200217430ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "Reader.h" namespace Lucene { const int32_t Reader::READER_EOF = -1; Reader::Reader() { } Reader::~Reader() { } int32_t Reader::read() { wchar_t buffer; return read(&buffer, 0, 1) == READER_EOF ? READER_EOF : buffer; } int64_t Reader::skip(int64_t n) { return 0; // override } bool Reader::markSupported() { return false; // override } void Reader::mark(int32_t readAheadLimit) { // override } void Reader::reset() { // override } int64_t Reader::length() { return 0; // override } } LucenePlusPlus-rel_3.0.9/src/core/util/ReaderUtil.cpp000066400000000000000000000037751456444476200226040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ReaderUtil.h" #include "IndexReader.h" namespace Lucene { ReaderUtil::~ReaderUtil() { } void ReaderUtil::gatherSubReaders(Collection allSubReaders, const IndexReaderPtr& reader) { Collection subReaders(reader->getSequentialSubReaders()); if (!subReaders) { // Add the reader itself, and do not recurse allSubReaders.add(reader); } else { for (Collection::iterator subReader = subReaders.begin(); subReader != subReaders.end(); ++subReader) { gatherSubReaders(allSubReaders, *subReader); } } } IndexReaderPtr ReaderUtil::subReader(int32_t doc, const IndexReaderPtr& reader) { Collection subReaders(Collection::newInstance()); ReaderUtil::gatherSubReaders(subReaders, reader); Collection docStarts(Collection::newInstance(subReaders.size())); int32_t maxDoc = 0; for (int32_t i = 0; i < subReaders.size(); ++i) { docStarts[i] = maxDoc; maxDoc += subReaders[i]->maxDoc(); } return subReaders[ReaderUtil::subIndex(doc, docStarts)]; } IndexReaderPtr ReaderUtil::subReader(const IndexReaderPtr& reader, int32_t subIndex) { Collection subReaders(Collection::newInstance()); ReaderUtil::gatherSubReaders(subReaders, reader); return subReaders[subIndex]; } int32_t ReaderUtil::subIndex(int32_t n, Collection docStarts) { // Binary search to locate reader Collection::iterator index = std::upper_bound(docStarts.begin(), docStarts.end(), n); return (std::distance(docStarts.begin(), index) - 1); } } LucenePlusPlus-rel_3.0.9/src/core/util/ScorerDocQueue.cpp000066400000000000000000000075751456444476200234360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ScorerDocQueue.h" #include "_ScorerDocQueue.h" #include "Scorer.h" #include "MiscUtils.h" namespace Lucene { ScorerDocQueue::ScorerDocQueue(int32_t maxSize) { this->_size = 0; int32_t heapSize = maxSize + 1; heap = Collection::newInstance(heapSize); this->maxSize = maxSize; topHSD = heap[1]; // initially null } ScorerDocQueue::~ScorerDocQueue() { } void ScorerDocQueue::put(const ScorerPtr& scorer) { heap[++_size] = newLucene(scorer); upHeap(); } bool ScorerDocQueue::insert(const ScorerPtr& scorer) { if (_size < maxSize) { put(scorer); return true; } else { int32_t docNr = scorer->docID(); if ((_size > 0) && (!(docNr < topHSD->doc))) { // heap[1] is top() heap[1] = newLucene(scorer, docNr); downHeap(); return true; } else { return false; } } } ScorerPtr ScorerDocQueue::top() { return topHSD->scorer; } int32_t ScorerDocQueue::topDoc() { return topHSD->doc; } double ScorerDocQueue::topScore() { return topHSD->scorer->score(); } bool ScorerDocQueue::topNextAndAdjustElsePop() { return checkAdjustElsePop(topHSD->scorer->nextDoc() != DocIdSetIterator::NO_MORE_DOCS); } bool ScorerDocQueue::topSkipToAndAdjustElsePop(int32_t target) { return checkAdjustElsePop(topHSD->scorer->advance(target) != DocIdSetIterator::NO_MORE_DOCS); } bool ScorerDocQueue::checkAdjustElsePop(bool cond) { if (cond) { // see also adjustTop topHSD->doc = topHSD->scorer->docID(); } else { // see also popNoResult heap[1] = heap[_size]; // move last to first heap[_size--].reset(); } downHeap(); return cond; } ScorerPtr ScorerDocQueue::pop() { ScorerPtr result(topHSD->scorer); popNoResult(); return result; } void ScorerDocQueue::popNoResult() { heap[1] = heap[_size]; // move last to first heap[_size--].reset(); downHeap(); // adjust heap } void ScorerDocQueue::adjustTop() { topHSD->adjust(); downHeap(); } int32_t ScorerDocQueue::size() { return _size; } void ScorerDocQueue::clear() { for (int32_t i = 0; i <= _size; ++i) { heap[i].reset(); } _size = 0; } void ScorerDocQueue::upHeap() { int32_t i = _size; HeapedScorerDocPtr node(heap[i]); // save bottom node int32_t j = MiscUtils::unsignedShift(i, 1); while ((j > 0) && (node->doc < heap[j]->doc)) { heap[i] = heap[j]; // shift parents down i = j; j = MiscUtils::unsignedShift(j, 1); } heap[i] = node; // install saved node topHSD = heap[1]; } void ScorerDocQueue::downHeap() { int32_t i = 1; HeapedScorerDocPtr node(heap[i]); // save top node int32_t j = i << 1; // find smaller child int32_t k = j + 1; if ((k <= _size) && (heap[k]->doc < heap[j]->doc)) { j = k; } while ((j <= _size) && (heap[j]->doc < node->doc)) { heap[i] = heap[j]; // shift up child i = j; j = i << 1; k = j + 1; if (k <= _size && (heap[k]->doc < heap[j]->doc)) { j = k; } } heap[i] = node; // install saved node topHSD = heap[1]; } HeapedScorerDoc::HeapedScorerDoc(const ScorerPtr& scorer) { this->scorer = scorer; this->doc = scorer->docID(); } HeapedScorerDoc::HeapedScorerDoc(const ScorerPtr& scorer, int32_t doc) { this->scorer = scorer; this->doc = doc; } HeapedScorerDoc::~HeapedScorerDoc() { } void HeapedScorerDoc::adjust() { doc = scorer->docID(); } } LucenePlusPlus-rel_3.0.9/src/core/util/SmallDouble.cpp000066400000000000000000000025641456444476200227420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SmallDouble.h" #include "MiscUtils.h" namespace Lucene { SmallDouble::~SmallDouble() { } uint8_t SmallDouble::doubleToByte(double f) { if (f < 0.0) { // round negatives up to zero f = 0.0; } if (f == 0.0) { // zero is a special case return 0; } int32_t bits = MiscUtils::doubleToIntBits(f); int32_t mantissa = (bits & 0xffffff) >> 21; int32_t exponent = (((bits >> 24) & 0x7f) - 63) + 15; if (exponent > 31) { // overflow: use max value exponent = 31; mantissa = 7; } if (exponent < 0) { // underflow: use min value exponent = 0; mantissa = 1; } return (uint8_t)((exponent << 3) | mantissa); // pack into a uint8_t } double SmallDouble::byteToDouble(uint8_t b) { if (b == 0) { // zero is a special case return 0.0; } int32_t mantissa = b & 7; int32_t exponent = (b >> 3) & 31; int32_t bits = ((exponent + (63 - 15)) << 24) | (mantissa << 21); return MiscUtils::intBitsToDouble(bits); } } LucenePlusPlus-rel_3.0.9/src/core/util/SortedVIntList.cpp000066400000000000000000000110131456444476200234210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "SortedVIntList.h" #include "_SortedVIntList.h" #include "BitSet.h" #include "OpenBitSet.h" #include "DocIdSetIterator.h" #include "MiscUtils.h" namespace Lucene { /// When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set, a SortedVIntList representing the /// index numbers of the set bits will be smaller than that BitSet. const int32_t SortedVIntList::BITS2VINTLIST_SIZE = 8; const int32_t SortedVIntList::VB1 = 0x7f; const int32_t SortedVIntList::BIT_SHIFT = 7; const int32_t SortedVIntList::MAX_BYTES_PER_INT = (31 / SortedVIntList::BIT_SHIFT) + 1; SortedVIntList::SortedVIntList(Collection sortedInts) { lastInt = 0; initBytes(); for (int32_t i = 0; i < sortedInts.size(); ++i) { addInt(sortedInts[i]); } bytes.resize(lastBytePos); } SortedVIntList::SortedVIntList(Collection sortedInts, int32_t inputSize) { lastInt = 0; initBytes(); for (int32_t i = 0; i < inputSize; ++i) { addInt(sortedInts[i]); } bytes.resize(lastBytePos); } SortedVIntList::SortedVIntList(const BitSetPtr& bits) { lastInt = 0; initBytes(); int32_t nextInt = bits->nextSetBit(0); while (nextInt != -1) { addInt(nextInt); nextInt = bits->nextSetBit(nextInt + 1); } bytes.resize(lastBytePos); } SortedVIntList::SortedVIntList(const OpenBitSetPtr& bits) { lastInt = 0; initBytes(); int32_t nextInt = bits->nextSetBit((int32_t)0); while (nextInt != -1) { addInt(nextInt); nextInt = bits->nextSetBit(nextInt + 1); } bytes.resize(lastBytePos); } SortedVIntList::SortedVIntList(const DocIdSetIteratorPtr& docIdSetIterator) { lastInt = 0; initBytes(); int32_t doc; while ((doc = docIdSetIterator->nextDoc()) != DocIdSetIterator::NO_MORE_DOCS) { addInt(doc); } bytes.resize(lastBytePos); } SortedVIntList::~SortedVIntList() { } void SortedVIntList::initBytes() { _size = 0; bytes = ByteArray::newInstance(128); // initial byte size lastBytePos = 0; } void SortedVIntList::addInt(int32_t nextInt) { int32_t diff = nextInt - lastInt; if (diff < 0) { boost::throw_exception(IllegalArgumentException(L"Input not sorted or first element negative.")); } if (!bytes || (lastBytePos + MAX_BYTES_PER_INT) > bytes.size()) { // biggest possible int does not fit bytes.resize((bytes.size() * 2) + MAX_BYTES_PER_INT); } // See IndexOutput.writeVInt() while ((diff & ~VB1) != 0) { // The high bit of the next byte needs to be set. bytes[lastBytePos++] = (uint8_t)((diff & VB1) | ~VB1); diff = MiscUtils::unsignedShift(diff, BIT_SHIFT); } bytes[lastBytePos++] = (uint8_t)diff; // Last byte, high bit not set. ++_size; lastInt = nextInt; } int32_t SortedVIntList::size() { return _size; } int32_t SortedVIntList::getByteSize() { return bytes ? bytes.size() : 0; } bool SortedVIntList::isCacheable() { return true; } DocIdSetIteratorPtr SortedVIntList::iterator() { return newLucene(shared_from_this()); } SortedDocIdSetIterator::SortedDocIdSetIterator(const SortedVIntListPtr& list) { _list = list; bytePos = 0; lastInt = 0; doc = -1; } SortedDocIdSetIterator::~SortedDocIdSetIterator() { } void SortedDocIdSetIterator::advance() { SortedVIntListPtr list(_list); // See IndexInput.readVInt() uint8_t b = list->bytes[bytePos++]; lastInt += b & list->VB1; for (int32_t s = list->BIT_SHIFT; (b & ~list->VB1) != 0; s += list->BIT_SHIFT) { b = list->bytes[bytePos++]; lastInt += (b & list->VB1) << s; } } int32_t SortedDocIdSetIterator::docID() { return doc; } int32_t SortedDocIdSetIterator::nextDoc() { SortedVIntListPtr list(_list); if (bytePos >= list->lastBytePos) { doc = NO_MORE_DOCS; } else { advance(); doc = lastInt; } return doc; } int32_t SortedDocIdSetIterator::advance(int32_t target) { SortedVIntListPtr list(_list); while (bytePos < list->lastBytePos) { advance(); if (lastInt >= target) { doc = lastInt; return doc; } } doc = NO_MORE_DOCS; return doc; } } LucenePlusPlus-rel_3.0.9/src/core/util/StringReader.cpp000066400000000000000000000023221456444476200231200ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "StringReader.h" namespace Lucene { StringReader::StringReader(const String& str) { this->str = str; this->position = 0; } StringReader::~StringReader() { } int32_t StringReader::read() { return position == (int32_t)str.length() ? READER_EOF : (int32_t)str[position++]; } int32_t StringReader::read(wchar_t* buffer, int32_t offset, int32_t length) { if (position >= (int32_t)str.length()) { return READER_EOF; } int32_t readChars = std::min(length, (int32_t)str.length() - position); std::wcsncpy(buffer + offset, str.c_str() + position, readChars); position += readChars; return readChars; } void StringReader::close() { str.clear(); } bool StringReader::markSupported() { return false; } void StringReader::reset() { position = 0; } int64_t StringReader::length() { return str.length(); } } LucenePlusPlus-rel_3.0.9/src/core/util/StringUtils.cpp000066400000000000000000000145531456444476200230270ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "StringUtils.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "UTF8Stream.h" #include "Reader.h" #include "CharFolder.h" namespace Lucene { /// Maximum length of UTF encoding. const int32_t StringUtils::MAX_ENCODING_UTF8_SIZE = 4; /// Default character radix. const int32_t StringUtils::CHARACTER_MAX_RADIX = 36; int32_t StringUtils::toUnicode(const uint8_t* utf8, int32_t length, CharArray unicode) { if (length == 0) { return 0; } UTF8Decoder utf8Decoder(utf8, utf8 + length); int32_t decodeLength = utf8Decoder.decode(unicode.get(), unicode.size()); return decodeLength == Reader::READER_EOF ? 0 : decodeLength; } int32_t StringUtils::toUnicode(const uint8_t* utf8, int32_t length, const UnicodeResultPtr& unicodeResult) { if (length == 0) { unicodeResult->length = 0; } else { if (length > unicodeResult->result.size()) { unicodeResult->result.resize(length); } unicodeResult->length = toUnicode(utf8, length, unicodeResult->result); } return unicodeResult->length; } String StringUtils::toUnicode(const uint8_t* utf8, int32_t length) { if (length == 0) { return L""; } CharArray unicode(CharArray::newInstance(length)); int32_t result = toUnicode(utf8, length, unicode); return String(unicode.get(), result); } String StringUtils::toUnicode(const SingleString& s) { return s.empty() ? L"" : toUnicode((uint8_t*)s.c_str(), s.length()); } int32_t StringUtils::toUTF8(const wchar_t* unicode, int32_t length, ByteArray utf8) { if (length == 0) { return 0; } UTF8Encoder utf8Encoder(unicode, unicode + length); int32_t encodeLength = utf8Encoder.encode(utf8.get(), utf8.size()); return encodeLength == Reader::READER_EOF ? 0 : encodeLength; } int32_t StringUtils::toUTF8(const wchar_t* unicode, int32_t length, const UTF8ResultPtr& utf8Result) { if (length == 0) { utf8Result->length = 0; } else { if (length * MAX_ENCODING_UTF8_SIZE > utf8Result->result.size()) { utf8Result->result.resize(length * MAX_ENCODING_UTF8_SIZE); } utf8Result->length = toUTF8(unicode, length, utf8Result->result); } return utf8Result->length; } SingleString StringUtils::toUTF8(const wchar_t* unicode, int32_t length) { if (length == 0) { return ""; } ByteArray utf8(ByteArray::newInstance(length * MAX_ENCODING_UTF8_SIZE)); int32_t result = toUTF8(unicode, length, utf8); return SingleString((char*)utf8.get(), result); } SingleString StringUtils::toUTF8(const String& s) { return s.empty() ? "" : toUTF8(s.c_str(), s.size()); } void StringUtils::toLower(String& str) { CharFolder::toLower(str.begin(), str.end()); } String StringUtils::toLower(const String& str) { String lowerStr(str); toLower(lowerStr); return lowerStr; } void StringUtils::toUpper(String& str) { CharFolder::toUpper(str.begin(), str.end()); } String StringUtils::toUpper(const String& str) { String upperStr(str); toUpper(upperStr); return upperStr; } int32_t StringUtils::compareCase(const String& first, const String& second) { return (toLower(first) == toLower(second)); } Collection StringUtils::split(const String& str, const String& delim) { std::vector tokens; boost::split(tokens, str, boost::is_any_of(delim.c_str())); return Collection::newInstance(tokens.begin(), tokens.end()); } int32_t StringUtils::toInt(const String& value) { if (value.empty()) { boost::throw_exception(NumberFormatException()); } if (value.size() > 1 && value[0] == L'-' && !UnicodeUtil::isDigit(value[1])) { boost::throw_exception(NumberFormatException()); } if (value[0] != L'-' && !UnicodeUtil::isDigit(value[0])) { boost::throw_exception(NumberFormatException()); } return (int32_t)std::wcstol(value.c_str(), NULL, 10); } int64_t StringUtils::toLong(const String& value) { if (value.empty()) { boost::throw_exception(NumberFormatException()); } if (value.size() > 1 && value[0] == L'-' && !UnicodeUtil::isDigit(value[1])) { boost::throw_exception(NumberFormatException()); } if (value[0] != L'-' && !UnicodeUtil::isDigit(value[0])) { boost::throw_exception(NumberFormatException()); } #if defined(_WIN32) || defined(_WIN64) return _wcstoi64(value.c_str(), 0, 10); #else return wcstoll(value.c_str(), 0, 10); #endif } int64_t StringUtils::toLong(const String& value, int32_t base) { int64_t longValue = 0; for (String::const_iterator ptr = value.begin(); ptr != value.end(); ++ptr) { longValue = UnicodeUtil::isDigit(*ptr) ? (base * longValue) + (*ptr - L'0') : (base * longValue) + (*ptr - L'a' + 10); } return longValue; } double StringUtils::toDouble(const String& value) { if (value.empty()) { boost::throw_exception(NumberFormatException()); } if (value.length() > 1 && (value[0] == L'-' || value[0] == L'.') && !UnicodeUtil::isDigit(value[1])) { boost::throw_exception(NumberFormatException()); } if (value[0] != L'-' && value[0] != L'.' && !UnicodeUtil::isDigit(value[0])) { boost::throw_exception(NumberFormatException()); } return std::wcstod(value.c_str(), NULL); } int32_t StringUtils::hashCode(const String& value) { int32_t hashCode = 0; for (String::const_iterator ptr = value.begin(); ptr != value.end(); ++ptr) { hashCode = hashCode * 31 + *ptr; } return hashCode; } String StringUtils::toString(int64_t value, int32_t base) { static const wchar_t* digits = L"0123456789abcdefghijklmnopqrstuvwxyz"; int32_t bufferSize = (sizeof(int32_t) << 3) + 1; CharArray baseOutput(CharArray::newInstance(bufferSize)); wchar_t* ptr = baseOutput.get() + bufferSize - 1; *ptr = L'\0'; do { *--ptr = digits[value % base]; value /= base; } while (ptr > baseOutput.get() && value > 0); return String(ptr, (baseOutput.get() + bufferSize - 1) - ptr); } } LucenePlusPlus-rel_3.0.9/src/core/util/Synchronize.cpp000066400000000000000000000033211456444476200230420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include #include "Synchronize.h" #include "LuceneThread.h" namespace Lucene { Synchronize::Synchronize() { lockThread = 0; recursionCount = 0; } Synchronize::~Synchronize() { } void Synchronize::createSync(SynchronizePtr& sync) { static boost::mutex lockMutex; boost::mutex::scoped_lock syncLock(lockMutex); if (!sync) { sync = newInstance(); } } void Synchronize::lock(int32_t timeout) { if (timeout > 0) { mutexSynchronize.timed_lock(boost::posix_time::milliseconds(timeout)); } else { mutexSynchronize.lock(); } lockThread = LuceneThread::currentId(); ++recursionCount; } void Synchronize::unlock() { if (--recursionCount == 0) { lockThread = 0; } mutexSynchronize.unlock(); } int32_t Synchronize::unlockAll() { int32_t count = recursionCount; for (int32_t unlock = 0; unlock < count; ++unlock) { this->unlock(); } return count; } bool Synchronize::holdsLock() { return (lockThread == LuceneThread::currentId() && recursionCount > 0); } SyncLock::SyncLock(const SynchronizePtr& sync, int32_t timeout) { this->sync = sync; lock(timeout); } SyncLock::~SyncLock() { if (sync) { sync->unlock(); } } void SyncLock::lock(int32_t timeout) { if (sync) { sync->lock(timeout); } } } LucenePlusPlus-rel_3.0.9/src/core/util/TestPoint.cpp000066400000000000000000000032561456444476200224670ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "TestPoint.h" namespace Lucene { MapStringInt TestPoint::testMethods = MapStringInt::newInstance(); bool TestPoint::enable = false; TestPoint::~TestPoint() { } void TestPoint::enableTestPoints() { enable = true; } void TestPoint::clear() { SyncLock syncLock(&testMethods); testMethods.clear(); } void TestPoint::setTestPoint(const String& object, const String& method, bool point) { if (enable) { SyncLock syncLock(&testMethods); testMethods.put(object + L":" + method, point); testMethods.put(method, point); } } bool TestPoint::getTestPoint(const String& object, const String& method) { SyncLock syncLock(&testMethods); MapStringInt::iterator testMethod = testMethods.find(object + L":" + method); return testMethod == testMethods.end() ? false : (testMethod->second != 0); } bool TestPoint::getTestPoint(const String& method) { SyncLock syncLock(&testMethods); MapStringInt::iterator testMethod = testMethods.find(method); return testMethod == testMethods.end() ? false : (testMethod->second != 0); } TestScope::TestScope(const String& object, const String& method) { this->object = object; this->method = method; TestPoint::setTestPoint(object, method, true); } TestScope::~TestScope() { TestPoint::setTestPoint(object, method, false); } } LucenePlusPlus-rel_3.0.9/src/core/util/ThreadPool.cpp000066400000000000000000000020561456444476200225740ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "ThreadPool.h" namespace Lucene { Future::~Future() { } const int32_t ThreadPool::THREADPOOL_SIZE = 5; ThreadPool::ThreadPool() { work.reset(new boost::asio::io_service::work(io_service)); for (int32_t i = 0; i < THREADPOOL_SIZE; ++i) { threadGroup.create_thread(boost::bind(&boost::asio::io_service::run, &io_service)); } } ThreadPool::~ThreadPool() { work.reset(); // stop all threads threadGroup.join_all(); // wait for all competition } ThreadPoolPtr ThreadPool::getInstance() { static ThreadPoolPtr threadPool; LUCENE_RUN_ONCE( threadPool = newLucene(); CycleCheck::addStatic(threadPool); ); return threadPool; } } LucenePlusPlus-rel_3.0.9/src/core/util/UTF8Stream.cpp000066400000000000000000000261241456444476200224370ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "UTF8Stream.h" #include "Reader.h" namespace Lucene { const uint16_t UTF8Base::LEAD_SURROGATE_MIN = 0xd800u; const uint16_t UTF8Base::LEAD_SURROGATE_MAX = 0xdbffu; const uint16_t UTF8Base::TRAIL_SURROGATE_MIN = 0xdc00u; const uint16_t UTF8Base::TRAIL_SURROGATE_MAX = 0xdfffu; const uint16_t UTF8Base::LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10); const uint32_t UTF8Base::SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN; // Maximum valid value for a Unicode code point const uint32_t UTF8Base::CODE_POINT_MAX = 0x0010ffffu; #ifdef LPP_UNICODE_CHAR_SIZE_2 const wchar_t UTF8Base::UNICODE_REPLACEMENT_CHAR = (wchar_t)0xfffd; const wchar_t UTF8Base::UNICODE_TERMINATOR = (wchar_t)0xffff; #else const wchar_t UTF8Base::UNICODE_REPLACEMENT_CHAR = (wchar_t)0x0001fffd; const wchar_t UTF8Base::UNICODE_TERMINATOR = (wchar_t)0x0001ffff; #endif UTF8Base::~UTF8Base() { } inline uint8_t UTF8Base::mask8(uint32_t b) { return static_cast(0xff & b); } inline uint16_t UTF8Base::mask16(uint32_t c) { return static_cast(0xffff & c); } inline bool UTF8Base::isTrail(uint32_t b) { return ((mask8(b) >> 6) == 0x2); } inline bool UTF8Base::isSurrogate(uint32_t cp) { return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); } inline bool UTF8Base::isLeadSurrogate(uint32_t cp) { return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX); } inline bool UTF8Base::isTrailSurrogate(uint32_t cp) { return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); } inline bool UTF8Base::isValidCodePoint(uint32_t cp) { return (cp <= CODE_POINT_MAX && !isSurrogate(cp) && cp != 0xfffe && cp != 0xffff); } inline bool UTF8Base::isOverlongSequence(uint32_t cp, int32_t length) { if (cp < 0x80) { if (length != 1) { return true; } } else if (cp < 0x800) { if (length != 2) { return true; } } else if (cp < 0x10000) { if (length != 3) { return true; } } return false; } UTF8Encoder::UTF8Encoder(const wchar_t* unicodeBegin, const wchar_t* unicodeEnd) { this->unicodeBegin = unicodeBegin; this->unicodeEnd = unicodeEnd; } UTF8Encoder::~UTF8Encoder() { } uint32_t UTF8Encoder::readNext() { return unicodeBegin == unicodeEnd ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*unicodeBegin++; } inline uint8_t* UTF8Encoder::appendChar(uint8_t* utf8, uint32_t cp) { if (cp < 0x80) { // one octet *(utf8++) = static_cast(cp); } else if (cp < 0x800) { // two octets *(utf8++) = static_cast((cp >> 6) | 0xc0); *(utf8++) = static_cast((cp & 0x3f) | 0x80); } else if (cp < 0x10000) { // three octets *(utf8++) = static_cast((cp >> 12) | 0xe0); *(utf8++) = static_cast(((cp >> 6) & 0x3f) | 0x80); *(utf8++) = static_cast((cp & 0x3f) | 0x80); } else { // four octets *(utf8++) = static_cast((cp >> 18) | 0xf0); *(utf8++) = static_cast(((cp >> 12) & 0x3f) | 0x80); *(utf8++) = static_cast(((cp >> 6) & 0x3f) | 0x80); *(utf8++) = static_cast((cp & 0x3f) | 0x80); } return utf8; } int32_t UTF8Encoder::utf16to8(uint8_t* utf8, int32_t length) { uint8_t* start = utf8; uint32_t next = readNext(); while (next != UNICODE_TERMINATOR) { uint32_t cp = mask16(next); if (isLeadSurrogate(cp)) { next = readNext(); if (next == UNICODE_TERMINATOR) { return 0; } uint32_t trail_surrogate = mask16(next); if (!isTrailSurrogate(trail_surrogate)) { return 0; } cp = (cp << 10) + trail_surrogate + SURROGATE_OFFSET; } else if (isTrailSurrogate(cp)) { return 0; } if (!isValidCodePoint(cp)) { return 0; } utf8 = appendChar(utf8, cp); if ((utf8 - start) >= length) { break; } next = readNext(); } return ((utf8 - start) == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : (utf8 - start); } int32_t UTF8Encoder::utf32to8(uint8_t* utf8, int32_t length) { uint8_t* start = utf8; uint32_t next = readNext(); while (next != UNICODE_TERMINATOR) { if (!isValidCodePoint(next)) { return 0; } utf8 = appendChar(utf8, next); if ((utf8 - start) >= length) { break; } next = readNext(); } return ((utf8 - start) == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : (utf8 - start); } int32_t UTF8Encoder::encode(uint8_t* utf8, int32_t length) { #ifdef LPP_UNICODE_CHAR_SIZE_2 return utf16to8(utf8, length); #else return utf32to8(utf8, length); #endif } UTF8EncoderStream::UTF8EncoderStream(const ReaderPtr& reader) : UTF8Encoder(NULL, NULL) { this->reader = reader; } UTF8EncoderStream::~UTF8EncoderStream() { } uint32_t UTF8EncoderStream::readNext() { int32_t next = reader->read(); return next == Reader::READER_EOF ? UNICODE_TERMINATOR : (uint32_t)next; } UTF8Decoder::UTF8Decoder(const uint8_t* utf8Begin, const uint8_t* utf8End) { this->utf8Begin = utf8Begin; this->utf8End = utf8End; } UTF8Decoder::~UTF8Decoder() { } uint32_t UTF8Decoder::readNext() { return utf8Begin == utf8End ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*utf8Begin++; } inline int32_t UTF8Decoder::sequenceLength(uint32_t cp) { uint8_t lead = mask8(cp); if (lead < 0x80) { return 1; } else if ((lead >> 5) == 0x6) { return 2; } else if ((lead >> 4) == 0xe) { return 3; } else if ((lead >> 3) == 0x1e) { return 4; } return 0; } inline bool UTF8Decoder::getSequence(uint32_t& cp, int32_t length) { cp = mask8(cp); if (length == 1) { return true; } uint32_t next = readNext(); if (next == UNICODE_TERMINATOR) { return false; } if (!isTrail(next)) { return false; } if (length == 2) { cp = ((cp << 6) & 0x7ff) + (next & 0x3f); return true; } if (length == 3) { cp = ((cp << 12) & 0xffff) + ((mask8(next) << 6) & 0xfff); } else { cp = ((cp << 18) & 0x1fffff) + ((mask8(next) << 12) & 0x3ffff); } next = readNext(); if (next == UNICODE_TERMINATOR) { return false; } if (!isTrail(next)) { return false; } if (length == 3) { cp += next & 0x3f; return true; } cp += (mask8(next) << 6) & 0xfff; next = readNext(); if (next == UNICODE_TERMINATOR) { return false; } if (!isTrail(next)) { return false; } cp += next & 0x3f; return true; } inline bool UTF8Decoder::isValidNext(uint32_t& cp) { // Determine the sequence length based on the lead octet int32_t length = sequenceLength(cp); if (length < 1 || length > 4) { return false; } // Now that we have a valid sequence length, get trail octets and calculate the code point if (!getSequence(cp, length)) { return false; } // Decoding succeeded, now security checks return (isValidCodePoint(cp) && !isOverlongSequence(cp, length)); } int32_t UTF8Decoder::utf8to16(wchar_t* unicode, int32_t length) { int32_t position = 0; uint32_t next = readNext(); while (next != UNICODE_TERMINATOR) { if (!isValidNext(next)) { return 0; } if (next > 0xffff) { // make a surrogate pair unicode[position++] = static_cast((next >> 10) + LEAD_OFFSET); unicode[position++] = static_cast((next & 0x3ff) + TRAIL_SURROGATE_MIN); } else { unicode[position++] = static_cast(next); } if (position >= length) { break; } next = readNext(); } return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position; } int32_t UTF8Decoder::utf8to32(wchar_t* unicode, int32_t length) { int32_t position = 0; uint32_t next = readNext(); while (next != UNICODE_TERMINATOR) { if (!isValidNext(next)) { return 0; } unicode[position++] = static_cast(next); if (position >= length) { break; } next = readNext(); } return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position; } int32_t UTF8Decoder::decode(wchar_t* unicode, int32_t length) { #ifdef LPP_UNICODE_CHAR_SIZE_2 return utf8to16(unicode, length); #else return utf8to32(unicode, length); #endif } UTF8DecoderStream::UTF8DecoderStream(const ReaderPtr& reader) : UTF8Decoder(NULL, NULL) { this->reader = reader; } UTF8DecoderStream::~UTF8DecoderStream() { } uint32_t UTF8DecoderStream::readNext() { int32_t next = reader->read(); return next == Reader::READER_EOF ? UNICODE_TERMINATOR : (uint32_t)next; } UTF16Decoder::UTF16Decoder(const uint16_t* utf16Begin, const uint16_t* utf16End) { this->utf16Begin = utf16Begin; this->utf16End = utf16End; } UTF16Decoder::~UTF16Decoder() { } uint32_t UTF16Decoder::readNext() { return utf16Begin == utf16End ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*utf16Begin++; } int32_t UTF16Decoder::utf16to32(wchar_t* unicode, int32_t length) { int32_t position = 0; uint32_t next = readNext(); while (next != UNICODE_TERMINATOR) { uint32_t cp = mask16(next); if (isLeadSurrogate(cp)) { next = readNext(); if (next == UNICODE_TERMINATOR) { return 0; } uint32_t trail_surrogate = mask16(next); if (!isTrailSurrogate(trail_surrogate)) { return 0; } unicode[position++] = static_cast(((cp - LEAD_SURROGATE_MIN) << 10) + (trail_surrogate - TRAIL_SURROGATE_MIN) + 0x0010000); } else if (isTrailSurrogate(cp)) { return 0; } else { unicode[position++] = static_cast(cp); } if (position >= length) { break; } next = readNext(); } return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position; } int32_t UTF16Decoder::utf16to16(wchar_t* unicode, int32_t length) { int32_t position = 0; uint32_t next = readNext(); while (next != UNICODE_TERMINATOR) { unicode[position++] = static_cast(next); if (position >= length) { break; } next = readNext(); } return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position; } int32_t UTF16Decoder::decode(wchar_t* unicode, int32_t length) { #ifdef LPP_UNICODE_CHAR_SIZE_2 return utf16to16(unicode, length); #else return utf16to32(unicode, length); #endif } } LucenePlusPlus-rel_3.0.9/src/core/util/UnicodeUtils.cpp000066400000000000000000000025661456444476200231500ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "MiscUtils.h" #include "UnicodeUtils.h" #include "unicode/guniprop.h" namespace Lucene { UnicodeUtil::~UnicodeUtil() { } bool UnicodeUtil::isAlnum(wchar_t c) { return g_unichar_isalnum(c); } bool UnicodeUtil::isAlpha(wchar_t c) { return g_unichar_isalpha(c); } bool UnicodeUtil::isDigit(wchar_t c) { return g_unichar_isdigit(c); } bool UnicodeUtil::isSpace(wchar_t c) { return g_unichar_isspace(c); } bool UnicodeUtil::isUpper(wchar_t c) { return g_unichar_isupper(c); } bool UnicodeUtil::isLower(wchar_t c) { return g_unichar_islower(c); } bool UnicodeUtil::isOther(wchar_t c) { return (g_unichar_type(c) == G_UNICODE_OTHER_LETTER); } bool UnicodeUtil::isNonSpacing(wchar_t c) { return (g_unichar_type(c) == G_UNICODE_NON_SPACING_MARK); } wchar_t UnicodeUtil::toUpper(wchar_t c) { return (wchar_t)g_unichar_toupper(c); } wchar_t UnicodeUtil::toLower(wchar_t c) { return (wchar_t)g_unichar_tolower(c); } UTF8Result::~UTF8Result() { } UnicodeResult::~UnicodeResult() { } } LucenePlusPlus-rel_3.0.9/src/core/util/md5/000077500000000000000000000000001456444476200205115ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/util/md5/md5.c000066400000000000000000000302221456444476200213410ustar00rootroot00000000000000/* Copyright (C) 1999, 2000, 2002 Aladdin Enterprises. All rights reserved. This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. L. Peter Deutsch ghost@aladdin.com */ /* $Id: md5.c,v 1.6 2002/04/13 19:20:28 lpd Exp $ */ /* Independent implementation of MD5 (RFC 1321). This code implements the MD5 Algorithm defined in RFC 1321, whose text is available at http://www.ietf.org/rfc/rfc1321.txt The code is derived from the text of the RFC, including the test suite (section A.5) but excluding the rest of Appendix A. It does not include any code or documentation that is identified in the RFC as being copyrighted. The original and principal author of md5.c is L. Peter Deutsch . Other authors are noted in the change history that follows (in reverse chronological order): 2002-04-13 lpd Clarified derivation from RFC 1321; now handles byte order either statically or dynamically; added missing #include in library. 2002-03-11 lpd Corrected argument list for main(), and added int return type, in test program and T value program. 2002-02-21 lpd Added missing #include in test program. 2000-07-03 lpd Patched to eliminate warnings about "constant is unsigned in ANSI C, signed in traditional"; made test program self-checking. 1999-11-04 lpd Edited comments slightly for automatic TOC extraction. 1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5). 1999-05-03 lpd Original version. */ #include "md5.h" #include #undef BYTE_ORDER /* 1 = big-endian, -1 = little-endian, 0 = unknown */ #ifdef ARCH_IS_BIG_ENDIAN # define BYTE_ORDER (ARCH_IS_BIG_ENDIAN ? 1 : -1) #else # define BYTE_ORDER 0 #endif #define T_MASK ((md5_word_t)~0) #define T1 /* 0xd76aa478 */ (T_MASK ^ 0x28955b87) #define T2 /* 0xe8c7b756 */ (T_MASK ^ 0x173848a9) #define T3 0x242070db #define T4 /* 0xc1bdceee */ (T_MASK ^ 0x3e423111) #define T5 /* 0xf57c0faf */ (T_MASK ^ 0x0a83f050) #define T6 0x4787c62a #define T7 /* 0xa8304613 */ (T_MASK ^ 0x57cfb9ec) #define T8 /* 0xfd469501 */ (T_MASK ^ 0x02b96afe) #define T9 0x698098d8 #define T10 /* 0x8b44f7af */ (T_MASK ^ 0x74bb0850) #define T11 /* 0xffff5bb1 */ (T_MASK ^ 0x0000a44e) #define T12 /* 0x895cd7be */ (T_MASK ^ 0x76a32841) #define T13 0x6b901122 #define T14 /* 0xfd987193 */ (T_MASK ^ 0x02678e6c) #define T15 /* 0xa679438e */ (T_MASK ^ 0x5986bc71) #define T16 0x49b40821 #define T17 /* 0xf61e2562 */ (T_MASK ^ 0x09e1da9d) #define T18 /* 0xc040b340 */ (T_MASK ^ 0x3fbf4cbf) #define T19 0x265e5a51 #define T20 /* 0xe9b6c7aa */ (T_MASK ^ 0x16493855) #define T21 /* 0xd62f105d */ (T_MASK ^ 0x29d0efa2) #define T22 0x02441453 #define T23 /* 0xd8a1e681 */ (T_MASK ^ 0x275e197e) #define T24 /* 0xe7d3fbc8 */ (T_MASK ^ 0x182c0437) #define T25 0x21e1cde6 #define T26 /* 0xc33707d6 */ (T_MASK ^ 0x3cc8f829) #define T27 /* 0xf4d50d87 */ (T_MASK ^ 0x0b2af278) #define T28 0x455a14ed #define T29 /* 0xa9e3e905 */ (T_MASK ^ 0x561c16fa) #define T30 /* 0xfcefa3f8 */ (T_MASK ^ 0x03105c07) #define T31 0x676f02d9 #define T32 /* 0x8d2a4c8a */ (T_MASK ^ 0x72d5b375) #define T33 /* 0xfffa3942 */ (T_MASK ^ 0x0005c6bd) #define T34 /* 0x8771f681 */ (T_MASK ^ 0x788e097e) #define T35 0x6d9d6122 #define T36 /* 0xfde5380c */ (T_MASK ^ 0x021ac7f3) #define T37 /* 0xa4beea44 */ (T_MASK ^ 0x5b4115bb) #define T38 0x4bdecfa9 #define T39 /* 0xf6bb4b60 */ (T_MASK ^ 0x0944b49f) #define T40 /* 0xbebfbc70 */ (T_MASK ^ 0x4140438f) #define T41 0x289b7ec6 #define T42 /* 0xeaa127fa */ (T_MASK ^ 0x155ed805) #define T43 /* 0xd4ef3085 */ (T_MASK ^ 0x2b10cf7a) #define T44 0x04881d05 #define T45 /* 0xd9d4d039 */ (T_MASK ^ 0x262b2fc6) #define T46 /* 0xe6db99e5 */ (T_MASK ^ 0x1924661a) #define T47 0x1fa27cf8 #define T48 /* 0xc4ac5665 */ (T_MASK ^ 0x3b53a99a) #define T49 /* 0xf4292244 */ (T_MASK ^ 0x0bd6ddbb) #define T50 0x432aff97 #define T51 /* 0xab9423a7 */ (T_MASK ^ 0x546bdc58) #define T52 /* 0xfc93a039 */ (T_MASK ^ 0x036c5fc6) #define T53 0x655b59c3 #define T54 /* 0x8f0ccc92 */ (T_MASK ^ 0x70f3336d) #define T55 /* 0xffeff47d */ (T_MASK ^ 0x00100b82) #define T56 /* 0x85845dd1 */ (T_MASK ^ 0x7a7ba22e) #define T57 0x6fa87e4f #define T58 /* 0xfe2ce6e0 */ (T_MASK ^ 0x01d3191f) #define T59 /* 0xa3014314 */ (T_MASK ^ 0x5cfebceb) #define T60 0x4e0811a1 #define T61 /* 0xf7537e82 */ (T_MASK ^ 0x08ac817d) #define T62 /* 0xbd3af235 */ (T_MASK ^ 0x42c50dca) #define T63 0x2ad7d2bb #define T64 /* 0xeb86d391 */ (T_MASK ^ 0x14792c6e) static void md5_process(md5_state_t *pms, const md5_byte_t *data /*[64]*/) { md5_word_t a = pms->abcd[0], b = pms->abcd[1], c = pms->abcd[2], d = pms->abcd[3]; md5_word_t t; #if BYTE_ORDER > 0 /* Define storage only for big-endian CPUs. */ md5_word_t X[16]; #else /* Define storage for little-endian or both types of CPUs. */ md5_word_t xbuf[16]; const md5_word_t *X; #endif { #if BYTE_ORDER == 0 /* * Determine dynamically whether this is a big-endian or * little-endian machine, since we can use a more efficient * algorithm on the latter. */ static const int w = 1; if (*((const md5_byte_t *)&w)) /* dynamic little-endian */ #endif #if BYTE_ORDER <= 0 /* little-endian */ { /* * On little-endian machines, we can process properly aligned * data without copying it. */ if (!((data - (const md5_byte_t *)0) & 3)) { /* data are properly aligned */ X = (const md5_word_t *)data; } else { /* not aligned */ memcpy(xbuf, data, 64); X = xbuf; } } #endif #if BYTE_ORDER == 0 else /* dynamic big-endian */ #endif #if BYTE_ORDER >= 0 /* big-endian */ { /* * On big-endian machines, we must arrange the bytes in the * right order. */ const md5_byte_t *xp = data; int i; # if BYTE_ORDER == 0 X = xbuf; /* (dynamic only) */ # else # define xbuf X /* (static only) */ # endif for (i = 0; i < 16; ++i, xp += 4) xbuf[i] = xp[0] + (xp[1] << 8) + (xp[2] << 16) + (xp[3] << 24); } #endif } #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) /* Round 1. */ /* Let [abcd k s i] denote the operation a = b + ((a + F(b,c,d) + X[k] + T[i]) <<< s). */ #define F(x, y, z) (((x) & (y)) | (~(x) & (z))) #define SET(a, b, c, d, k, s, Ti)\ t = a + F(b,c,d) + X[k] + Ti;\ a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 0, 7, T1); SET(d, a, b, c, 1, 12, T2); SET(c, d, a, b, 2, 17, T3); SET(b, c, d, a, 3, 22, T4); SET(a, b, c, d, 4, 7, T5); SET(d, a, b, c, 5, 12, T6); SET(c, d, a, b, 6, 17, T7); SET(b, c, d, a, 7, 22, T8); SET(a, b, c, d, 8, 7, T9); SET(d, a, b, c, 9, 12, T10); SET(c, d, a, b, 10, 17, T11); SET(b, c, d, a, 11, 22, T12); SET(a, b, c, d, 12, 7, T13); SET(d, a, b, c, 13, 12, T14); SET(c, d, a, b, 14, 17, T15); SET(b, c, d, a, 15, 22, T16); #undef SET /* Round 2. */ /* Let [abcd k s i] denote the operation a = b + ((a + G(b,c,d) + X[k] + T[i]) <<< s). */ #define G(x, y, z) (((x) & (z)) | ((y) & ~(z))) #define SET(a, b, c, d, k, s, Ti)\ t = a + G(b,c,d) + X[k] + Ti;\ a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 1, 5, T17); SET(d, a, b, c, 6, 9, T18); SET(c, d, a, b, 11, 14, T19); SET(b, c, d, a, 0, 20, T20); SET(a, b, c, d, 5, 5, T21); SET(d, a, b, c, 10, 9, T22); SET(c, d, a, b, 15, 14, T23); SET(b, c, d, a, 4, 20, T24); SET(a, b, c, d, 9, 5, T25); SET(d, a, b, c, 14, 9, T26); SET(c, d, a, b, 3, 14, T27); SET(b, c, d, a, 8, 20, T28); SET(a, b, c, d, 13, 5, T29); SET(d, a, b, c, 2, 9, T30); SET(c, d, a, b, 7, 14, T31); SET(b, c, d, a, 12, 20, T32); #undef SET /* Round 3. */ /* Let [abcd k s t] denote the operation a = b + ((a + H(b,c,d) + X[k] + T[i]) <<< s). */ #define H(x, y, z) ((x) ^ (y) ^ (z)) #define SET(a, b, c, d, k, s, Ti)\ t = a + H(b,c,d) + X[k] + Ti;\ a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 5, 4, T33); SET(d, a, b, c, 8, 11, T34); SET(c, d, a, b, 11, 16, T35); SET(b, c, d, a, 14, 23, T36); SET(a, b, c, d, 1, 4, T37); SET(d, a, b, c, 4, 11, T38); SET(c, d, a, b, 7, 16, T39); SET(b, c, d, a, 10, 23, T40); SET(a, b, c, d, 13, 4, T41); SET(d, a, b, c, 0, 11, T42); SET(c, d, a, b, 3, 16, T43); SET(b, c, d, a, 6, 23, T44); SET(a, b, c, d, 9, 4, T45); SET(d, a, b, c, 12, 11, T46); SET(c, d, a, b, 15, 16, T47); SET(b, c, d, a, 2, 23, T48); #undef SET /* Round 4. */ /* Let [abcd k s t] denote the operation a = b + ((a + I(b,c,d) + X[k] + T[i]) <<< s). */ #define I(x, y, z) ((y) ^ ((x) | ~(z))) #define SET(a, b, c, d, k, s, Ti)\ t = a + I(b,c,d) + X[k] + Ti;\ a = ROTATE_LEFT(t, s) + b /* Do the following 16 operations. */ SET(a, b, c, d, 0, 6, T49); SET(d, a, b, c, 7, 10, T50); SET(c, d, a, b, 14, 15, T51); SET(b, c, d, a, 5, 21, T52); SET(a, b, c, d, 12, 6, T53); SET(d, a, b, c, 3, 10, T54); SET(c, d, a, b, 10, 15, T55); SET(b, c, d, a, 1, 21, T56); SET(a, b, c, d, 8, 6, T57); SET(d, a, b, c, 15, 10, T58); SET(c, d, a, b, 6, 15, T59); SET(b, c, d, a, 13, 21, T60); SET(a, b, c, d, 4, 6, T61); SET(d, a, b, c, 11, 10, T62); SET(c, d, a, b, 2, 15, T63); SET(b, c, d, a, 9, 21, T64); #undef SET /* Then perform the following additions. (That is increment each of the four registers by the value it had before this block was started.) */ pms->abcd[0] += a; pms->abcd[1] += b; pms->abcd[2] += c; pms->abcd[3] += d; } void md5_init(md5_state_t *pms) { pms->count[0] = pms->count[1] = 0; pms->abcd[0] = 0x67452301; pms->abcd[1] = /*0xefcdab89*/ T_MASK ^ 0x10325476; pms->abcd[2] = /*0x98badcfe*/ T_MASK ^ 0x67452301; pms->abcd[3] = 0x10325476; } void md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes) { const md5_byte_t *p = data; int left = nbytes; int offset = (pms->count[0] >> 3) & 63; md5_word_t nbits = (md5_word_t)(nbytes << 3); if (nbytes <= 0) return; /* Update the message length. */ pms->count[1] += nbytes >> 29; pms->count[0] += nbits; if (pms->count[0] < nbits) pms->count[1]++; /* Process an initial partial block. */ if (offset) { int copy = (offset + nbytes > 64 ? 64 - offset : nbytes); memcpy(pms->buf + offset, p, copy); if (offset + copy < 64) return; p += copy; left -= copy; md5_process(pms, pms->buf); } /* Process full blocks. */ for (; left >= 64; p += 64, left -= 64) md5_process(pms, p); /* Process a final partial block. */ if (left) memcpy(pms->buf, p, left); } void md5_finish(md5_state_t *pms, md5_byte_t digest[16]) { static const md5_byte_t pad[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; md5_byte_t data[8]; int i; /* Save the length before padding. */ for (i = 0; i < 8; ++i) data[i] = (md5_byte_t)(pms->count[i >> 2] >> ((i & 3) << 3)); /* Pad to 56 bytes mod 64. */ md5_append(pms, pad, ((55 - (pms->count[0] >> 3)) & 63) + 1); /* Append the length. */ md5_append(pms, data, 8); for (i = 0; i < 16; ++i) digest[i] = (md5_byte_t)(pms->abcd[i >> 2] >> ((i & 3) << 3)); } LucenePlusPlus-rel_3.0.9/src/core/util/md5/md5.h000066400000000000000000000065271456444476200213610ustar00rootroot00000000000000/* Copyright (C) 1999, 2002 Aladdin Enterprises. All rights reserved. This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. L. Peter Deutsch ghost@aladdin.com */ /* $Id: md5.h,v 1.4 2002/04/13 19:20:28 lpd Exp $ */ /* Independent implementation of MD5 (RFC 1321). This code implements the MD5 Algorithm defined in RFC 1321, whose text is available at http://www.ietf.org/rfc/rfc1321.txt The code is derived from the text of the RFC, including the test suite (section A.5) but excluding the rest of Appendix A. It does not include any code or documentation that is identified in the RFC as being copyrighted. The original and principal author of md5.h is L. Peter Deutsch . Other authors are noted in the change history that follows (in reverse chronological order): 2002-04-13 lpd Removed support for non-ANSI compilers; removed references to Ghostscript; clarified derivation from RFC 1321; now handles byte order either statically or dynamically. 1999-11-04 lpd Edited comments slightly for automatic TOC extraction. 1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5); added conditionalization for C++ compilation from Martin Purschke . 1999-05-03 lpd Original version. */ #ifndef md5_INCLUDED # define md5_INCLUDED /* * This package supports both compile-time and run-time determination of CPU * byte order. If ARCH_IS_BIG_ENDIAN is defined as 0, the code will be * compiled to run only on little-endian CPUs; if ARCH_IS_BIG_ENDIAN is * defined as non-zero, the code will be compiled to run only on big-endian * CPUs; if ARCH_IS_BIG_ENDIAN is not defined, the code will be compiled to * run on either big- or little-endian CPUs, but will run slightly less * efficiently on either one than if ARCH_IS_BIG_ENDIAN is defined. */ typedef unsigned char md5_byte_t; /* 8-bit byte */ typedef unsigned int md5_word_t; /* 32-bit word */ /* Define the state of the MD5 Algorithm. */ typedef struct md5_state_s { md5_word_t count[2]; /* message length in bits, lsw first */ md5_word_t abcd[4]; /* digest buffer */ md5_byte_t buf[64]; /* accumulate block */ } md5_state_t; #ifdef __cplusplus extern "C" { #endif /* Initialize the algorithm. */ void md5_init(md5_state_t* pms); /* Append a string to the message. */ void md5_append(md5_state_t* pms, const md5_byte_t* data, int nbytes); /* Finish the message and return the digest. */ void md5_finish(md5_state_t* pms, md5_byte_t digest[16]); #ifdef __cplusplus } /* end extern "C" */ #endif #endif /* md5_INCLUDED */ LucenePlusPlus-rel_3.0.9/src/core/util/unicode/000077500000000000000000000000001456444476200214525ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/core/util/unicode/gunichartables.h000066400000000000000000031713441456444476200246330ustar00rootroot00000000000000/* This file is automatically generated. DO NOT EDIT! Instead, edit gen-unicode-tables.pl and re-run. */ // See COPYING file for licensing information. #ifndef CHARTABLES_H #define CHARTABLES_H #define G_UNICODE_DATA_VERSION "5.1.0" #define G_UNICODE_LAST_CHAR 0x10ffff #define G_UNICODE_MAX_TABLE_INDEX 10000 #define G_UNICODE_LAST_CHAR_PART1 0x2FAFF #define G_UNICODE_LAST_PAGE_PART1 762 static const char type_data[][256] = { { /* page 0, index 0 */ G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_FORMAT, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER }, { /* page 1, index 1 */ G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER }, { /* page 2, index 2 */ G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL }, { /* page 3, index 3 */ G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER }, { /* page 4, index 4 */ G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER }, { /* page 5, index 5 */ G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 6, index 6 */ G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_FORMAT, G_UNICODE_ENCLOSING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_LETTER }, { /* page 7, index 7 */ G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 9, index 8 */ G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 10, index 9 */ G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 11, index 10 */ G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 12, index 11 */ G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 13, index 12 */ G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 14, index 13 */ G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 15, index 14 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 16, index 15 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 17, index 16 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 18, index 17 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER }, { /* page 19, index 18 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 20, index 19 */ G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER }, { /* page 22, index 20 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 23, index 21 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 24, index 22 */ G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 25, index 23 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL }, { /* page 26, index 24 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 27, index 25 */ G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 28, index 26 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 29, index 27 */ G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK }, { /* page 30, index 28 */ G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER }, { /* page 31, index 29 */ G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UNASSIGNED }, { /* page 32, index 30 */ G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_LINE_SEPARATOR, G_UNICODE_PARAGRAPH_SEPARATOR, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_OTHER_NUMBER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 33, index 31 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL }, { /* page 35, index 32 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 36, index 33 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER }, { /* page 37, index 34 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL }, { /* page 38, index 35 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 39, index 36 */ G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL }, { /* page 41, index 37 */ G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL }, { /* page 43, index 38 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 44, index 39 */ G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION }, { /* page 45, index 40 */ G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK }, { /* page 46, index 41 */ G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 47, index 42 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 48, index 43 */ G_UNICODE_SPACE_SEPARATOR, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_LETTER_NUMBER, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER }, { /* page 49, index 44 */ G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER }, { /* page 50, index 45 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED }, { /* page 77, index 46 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL }, { /* page 159, index 47 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 160, index 48 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER }, { /* page 164, index 49 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 166, index 50 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 167, index 51 */ G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER }, { /* page 168, index 52 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 169, index 53 */ G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 170, index 54 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 215, index 55 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 250, index 56 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 251, index 57 */ G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER }, { /* page 253, index 58 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 254, index 59 */ G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT }, { /* page 255, index 60 */ G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 256, index 61 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 257, index 62 */ G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 258, index 63 */ G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 259, index 64 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_LETTER_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 260, index 65 */ G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 264, index 66 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 265, index 67 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 266, index 68 */ G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 291, index 69 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 292, index 70 */ G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 464, index 71 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 465, index 72 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 466, index 73 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 467, index 74 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 468, index 75 */ G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER }, { /* page 469, index 76 */ G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER }, { /* page 470, index 77 */ G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER }, { /* page 471, index 78 */ G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER }, { /* page 496, index 79 */ G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 678, index 80 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 762, index 81 */ G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 3584, index 82 */ G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 3585, index 83 */ G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 4095, index 84 */ G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED }, { /* page 4351, index 85 */ G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED } }; /* U+0000 through U+2FAFF */ static const int16_t type_table_part1[763] = { 0 /* page 0 */, 1 /* page 1 */, 2 /* page 2 */, 3 /* page 3 */, 4 /* page 4 */, 5 /* page 5 */, 6 /* page 6 */, 7 /* page 7 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, 8 /* page 9 */, 9 /* page 10 */, 10 /* page 11 */, 11 /* page 12 */, 12 /* page 13 */, 13 /* page 14 */, 14 /* page 15 */, 15 /* page 16 */, 16 /* page 17 */, 17 /* page 18 */, 18 /* page 19 */, 19 /* page 20 */, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 20 /* page 22 */, 21 /* page 23 */, 22 /* page 24 */, 23 /* page 25 */, 24 /* page 26 */, 25 /* page 27 */, 26 /* page 28 */, 27 /* page 29 */, 28 /* page 30 */, 29 /* page 31 */, 30 /* page 32 */, 31 /* page 33 */, G_UNICODE_MATH_SYMBOL + G_UNICODE_MAX_TABLE_INDEX, 32 /* page 35 */, 33 /* page 36 */, 34 /* page 37 */, 35 /* page 38 */, 36 /* page 39 */, G_UNICODE_OTHER_SYMBOL + G_UNICODE_MAX_TABLE_INDEX, 37 /* page 41 */, G_UNICODE_MATH_SYMBOL + G_UNICODE_MAX_TABLE_INDEX, 38 /* page 43 */, 39 /* page 44 */, 40 /* page 45 */, 41 /* page 46 */, 42 /* page 47 */, 43 /* page 48 */, 44 /* page 49 */, 45 /* page 50 */, G_UNICODE_OTHER_SYMBOL + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 46 /* page 77 */, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 47 /* page 159 */, 48 /* page 160 */, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 49 /* page 164 */, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 50 /* page 166 */, 51 /* page 167 */, 52 /* page 168 */, 53 /* page 169 */, 54 /* page 170 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 55 /* page 215 */, G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 56 /* page 250 */, 57 /* page 251 */, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 58 /* page 253 */, 59 /* page 254 */, 60 /* page 255 */, 61 /* page 256 */, 62 /* page 257 */, 63 /* page 258 */, 64 /* page 259 */, 65 /* page 260 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, 66 /* page 264 */, 67 /* page 265 */, 68 /* page 266 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 69 /* page 291 */, 70 /* page 292 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, 71 /* page 464 */, 72 /* page 465 */, 73 /* page 466 */, 74 /* page 467 */, 75 /* page 468 */, 76 /* page 469 */, 77 /* page 470 */, 78 /* page 471 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, 79 /* page 496 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 80 /* page 678 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, 81 /* page 762 */ }; /* U+E0000 through U+10FFFF */ static const int16_t type_table_part2[768] = { 82 /* page 3584 */, 83 /* page 3585 */, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, 84 /* page 4095 */, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, 85 /* page 4351 */ }; static const gunichar attr_data[][256] = { { /* page 0, index 0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x039c, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0000, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x1000000, 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0000, 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x0178 }, { /* page 1, index 1 */ 0x0101, 0x0100, 0x0103, 0x0102, 0x0105, 0x0104, 0x0107, 0x0106, 0x0109, 0x0108, 0x010b, 0x010a, 0x010d, 0x010c, 0x010f, 0x010e, 0x0111, 0x0110, 0x0113, 0x0112, 0x0115, 0x0114, 0x0117, 0x0116, 0x0119, 0x0118, 0x011b, 0x011a, 0x011d, 0x011c, 0x011f, 0x011e, 0x0121, 0x0120, 0x0123, 0x0122, 0x0125, 0x0124, 0x0127, 0x0126, 0x0129, 0x0128, 0x012b, 0x012a, 0x012d, 0x012c, 0x012f, 0x012e, 0x1000007, 0x0049, 0x0133, 0x0132, 0x0135, 0x0134, 0x0137, 0x0136, 0x0000, 0x013a, 0x0139, 0x013c, 0x013b, 0x013e, 0x013d, 0x0140, 0x013f, 0x0142, 0x0141, 0x0144, 0x0143, 0x0146, 0x0145, 0x0148, 0x0147, 0x1000086, 0x014b, 0x014a, 0x014d, 0x014c, 0x014f, 0x014e, 0x0151, 0x0150, 0x0153, 0x0152, 0x0155, 0x0154, 0x0157, 0x0156, 0x0159, 0x0158, 0x015b, 0x015a, 0x015d, 0x015c, 0x015f, 0x015e, 0x0161, 0x0160, 0x0163, 0x0162, 0x0165, 0x0164, 0x0167, 0x0166, 0x0169, 0x0168, 0x016b, 0x016a, 0x016d, 0x016c, 0x016f, 0x016e, 0x0171, 0x0170, 0x0173, 0x0172, 0x0175, 0x0174, 0x0177, 0x0176, 0x00ff, 0x017a, 0x0179, 0x017c, 0x017b, 0x017e, 0x017d, 0x0053, 0x0243, 0x0253, 0x0183, 0x0182, 0x0185, 0x0184, 0x0254, 0x0188, 0x0187, 0x0256, 0x0257, 0x018c, 0x018b, 0x0000, 0x01dd, 0x0259, 0x025b, 0x0192, 0x0191, 0x0260, 0x0263, 0x01f6, 0x0269, 0x0268, 0x0199, 0x0198, 0x023d, 0x0000, 0x026f, 0x0272, 0x0220, 0x0275, 0x01a1, 0x01a0, 0x01a3, 0x01a2, 0x01a5, 0x01a4, 0x0280, 0x01a8, 0x01a7, 0x0283, 0x0000, 0x0000, 0x01ad, 0x01ac, 0x0288, 0x01b0, 0x01af, 0x028a, 0x028b, 0x01b4, 0x01b3, 0x01b6, 0x01b5, 0x0292, 0x01b9, 0x01b8, 0x0000, 0x0000, 0x01bd, 0x01bc, 0x0000, 0x01f7, 0x0000, 0x0000, 0x0000, 0x0000, 0x01c6, 0x0000, 0x01c4, 0x01c9, 0x0000, 0x01c7, 0x01cc, 0x0000, 0x01ca, 0x01ce, 0x01cd, 0x01d0, 0x01cf, 0x01d2, 0x01d1, 0x01d4, 0x01d3, 0x01d6, 0x01d5, 0x01d8, 0x01d7, 0x01da, 0x01d9, 0x01dc, 0x01db, 0x018e, 0x01df, 0x01de, 0x01e1, 0x01e0, 0x01e3, 0x01e2, 0x01e5, 0x01e4, 0x01e7, 0x01e6, 0x01e9, 0x01e8, 0x01eb, 0x01ea, 0x01ed, 0x01ec, 0x01ef, 0x01ee, 0x10000ad, 0x01f3, 0x0000, 0x01f1, 0x01f5, 0x01f4, 0x0195, 0x01bf, 0x01f9, 0x01f8, 0x01fb, 0x01fa, 0x01fd, 0x01fc, 0x01ff, 0x01fe }, { /* page 2, index 2 */ 0x0201, 0x0200, 0x0203, 0x0202, 0x0205, 0x0204, 0x0207, 0x0206, 0x0209, 0x0208, 0x020b, 0x020a, 0x020d, 0x020c, 0x020f, 0x020e, 0x0211, 0x0210, 0x0213, 0x0212, 0x0215, 0x0214, 0x0217, 0x0216, 0x0219, 0x0218, 0x021b, 0x021a, 0x021d, 0x021c, 0x021f, 0x021e, 0x019e, 0x0000, 0x0223, 0x0222, 0x0225, 0x0224, 0x0227, 0x0226, 0x0229, 0x0228, 0x022b, 0x022a, 0x022d, 0x022c, 0x022f, 0x022e, 0x0231, 0x0230, 0x0233, 0x0232, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2c65, 0x023c, 0x023b, 0x019a, 0x2c66, 0x0000, 0x0000, 0x0242, 0x0241, 0x0180, 0x0289, 0x028c, 0x0247, 0x0246, 0x0249, 0x0248, 0x024b, 0x024a, 0x024d, 0x024c, 0x024f, 0x024e, 0x2c6f, 0x2c6d, 0x0000, 0x0181, 0x0186, 0x0000, 0x0189, 0x018a, 0x0000, 0x018f, 0x0000, 0x0190, 0x0000, 0x0000, 0x0000, 0x0000, 0x0193, 0x0000, 0x0000, 0x0194, 0x0000, 0x0000, 0x0000, 0x0000, 0x0197, 0x0196, 0x0000, 0x2c62, 0x0000, 0x0000, 0x0000, 0x019c, 0x0000, 0x2c6e, 0x019d, 0x0000, 0x0000, 0x019f, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2c64, 0x0000, 0x0000, 0x01a6, 0x0000, 0x0000, 0x01a9, 0x0000, 0x0000, 0x0000, 0x0000, 0x01ae, 0x0244, 0x01b1, 0x01b2, 0x0245, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01b7, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 3, index 3 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0371, 0x0370, 0x0373, 0x0372, 0x0000, 0x0000, 0x0377, 0x0376, 0x0000, 0x0000, 0x0000, 0x03fd, 0x03fe, 0x03ff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x03ac, 0x0000, 0x03ad, 0x03ae, 0x03af, 0x0000, 0x03cc, 0x0000, 0x03cd, 0x03ce, 0x100008f, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 0x03c0, 0x03c1, 0x0000, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x0386, 0x0388, 0x0389, 0x038a, 0x100009e, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 0x03a0, 0x03a1, 0x03a3, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x038c, 0x038e, 0x038f, 0x03d7, 0x0392, 0x0398, 0x0000, 0x0000, 0x0000, 0x03a6, 0x03a0, 0x03cf, 0x03d9, 0x03d8, 0x03db, 0x03da, 0x03dd, 0x03dc, 0x03df, 0x03de, 0x03e1, 0x03e0, 0x03e3, 0x03e2, 0x03e5, 0x03e4, 0x03e7, 0x03e6, 0x03e9, 0x03e8, 0x03eb, 0x03ea, 0x03ed, 0x03ec, 0x03ef, 0x03ee, 0x039a, 0x03a1, 0x03f9, 0x0000, 0x03b8, 0x0395, 0x0000, 0x03f8, 0x03f7, 0x03f2, 0x03fb, 0x03fa, 0x0000, 0x037b, 0x037c, 0x037d }, { /* page 4, index 4 */ 0x0450, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 0x0458, 0x0459, 0x045a, 0x045b, 0x045c, 0x045d, 0x045e, 0x045f, 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 0x0400, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x040d, 0x040e, 0x040f, 0x0461, 0x0460, 0x0463, 0x0462, 0x0465, 0x0464, 0x0467, 0x0466, 0x0469, 0x0468, 0x046b, 0x046a, 0x046d, 0x046c, 0x046f, 0x046e, 0x0471, 0x0470, 0x0473, 0x0472, 0x0475, 0x0474, 0x0477, 0x0476, 0x0479, 0x0478, 0x047b, 0x047a, 0x047d, 0x047c, 0x047f, 0x047e, 0x0481, 0x0480, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x048b, 0x048a, 0x048d, 0x048c, 0x048f, 0x048e, 0x0491, 0x0490, 0x0493, 0x0492, 0x0495, 0x0494, 0x0497, 0x0496, 0x0499, 0x0498, 0x049b, 0x049a, 0x049d, 0x049c, 0x049f, 0x049e, 0x04a1, 0x04a0, 0x04a3, 0x04a2, 0x04a5, 0x04a4, 0x04a7, 0x04a6, 0x04a9, 0x04a8, 0x04ab, 0x04aa, 0x04ad, 0x04ac, 0x04af, 0x04ae, 0x04b1, 0x04b0, 0x04b3, 0x04b2, 0x04b5, 0x04b4, 0x04b7, 0x04b6, 0x04b9, 0x04b8, 0x04bb, 0x04ba, 0x04bd, 0x04bc, 0x04bf, 0x04be, 0x04cf, 0x04c2, 0x04c1, 0x04c4, 0x04c3, 0x04c6, 0x04c5, 0x04c8, 0x04c7, 0x04ca, 0x04c9, 0x04cc, 0x04cb, 0x04ce, 0x04cd, 0x04c0, 0x04d1, 0x04d0, 0x04d3, 0x04d2, 0x04d5, 0x04d4, 0x04d7, 0x04d6, 0x04d9, 0x04d8, 0x04db, 0x04da, 0x04dd, 0x04dc, 0x04df, 0x04de, 0x04e1, 0x04e0, 0x04e3, 0x04e2, 0x04e5, 0x04e4, 0x04e7, 0x04e6, 0x04e9, 0x04e8, 0x04eb, 0x04ea, 0x04ed, 0x04ec, 0x04ef, 0x04ee, 0x04f1, 0x04f0, 0x04f3, 0x04f2, 0x04f5, 0x04f4, 0x04f7, 0x04f6, 0x04f9, 0x04f8, 0x04fb, 0x04fa, 0x04fd, 0x04fc, 0x04ff, 0x04fe }, { /* page 5, index 5 */ 0x0501, 0x0500, 0x0503, 0x0502, 0x0505, 0x0504, 0x0507, 0x0506, 0x0509, 0x0508, 0x050b, 0x050a, 0x050d, 0x050c, 0x050f, 0x050e, 0x0511, 0x0510, 0x0513, 0x0512, 0x0515, 0x0514, 0x0517, 0x0516, 0x0519, 0x0518, 0x051b, 0x051a, 0x051d, 0x051c, 0x051f, 0x051e, 0x0521, 0x0520, 0x0523, 0x0522, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0561, 0x0562, 0x0563, 0x0564, 0x0565, 0x0566, 0x0567, 0x0568, 0x0569, 0x056a, 0x056b, 0x056c, 0x056d, 0x056e, 0x056f, 0x0570, 0x0571, 0x0572, 0x0573, 0x0574, 0x0575, 0x0576, 0x0577, 0x0578, 0x0579, 0x057a, 0x057b, 0x057c, 0x057d, 0x057e, 0x057f, 0x0580, 0x0581, 0x0582, 0x0583, 0x0584, 0x0585, 0x0586, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, 0x0538, 0x0539, 0x053a, 0x053b, 0x053c, 0x053d, 0x053e, 0x053f, 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054a, 0x054b, 0x054c, 0x054d, 0x054e, 0x054f, 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x1000044, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 6, index 6 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 7, index 7 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 9, index 8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 10, index 9 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 11, index 10 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 12, index 11 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 13, index 12 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 14, index 13 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 15, index 14 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 16, index 15 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2d00, 0x2d01, 0x2d02, 0x2d03, 0x2d04, 0x2d05, 0x2d06, 0x2d07, 0x2d08, 0x2d09, 0x2d0a, 0x2d0b, 0x2d0c, 0x2d0d, 0x2d0e, 0x2d0f, 0x2d10, 0x2d11, 0x2d12, 0x2d13, 0x2d14, 0x2d15, 0x2d16, 0x2d17, 0x2d18, 0x2d19, 0x2d1a, 0x2d1b, 0x2d1c, 0x2d1d, 0x2d1e, 0x2d1f, 0x2d20, 0x2d21, 0x2d22, 0x2d23, 0x2d24, 0x2d25, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 23, index 16 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 24, index 17 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 25, index 18 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 27, index 19 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 28, index 20 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 29, index 21 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xa77d, 0x0000, 0x0000, 0x0000, 0x2c63, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 30, index 22 */ 0x1e01, 0x1e00, 0x1e03, 0x1e02, 0x1e05, 0x1e04, 0x1e07, 0x1e06, 0x1e09, 0x1e08, 0x1e0b, 0x1e0a, 0x1e0d, 0x1e0c, 0x1e0f, 0x1e0e, 0x1e11, 0x1e10, 0x1e13, 0x1e12, 0x1e15, 0x1e14, 0x1e17, 0x1e16, 0x1e19, 0x1e18, 0x1e1b, 0x1e1a, 0x1e1d, 0x1e1c, 0x1e1f, 0x1e1e, 0x1e21, 0x1e20, 0x1e23, 0x1e22, 0x1e25, 0x1e24, 0x1e27, 0x1e26, 0x1e29, 0x1e28, 0x1e2b, 0x1e2a, 0x1e2d, 0x1e2c, 0x1e2f, 0x1e2e, 0x1e31, 0x1e30, 0x1e33, 0x1e32, 0x1e35, 0x1e34, 0x1e37, 0x1e36, 0x1e39, 0x1e38, 0x1e3b, 0x1e3a, 0x1e3d, 0x1e3c, 0x1e3f, 0x1e3e, 0x1e41, 0x1e40, 0x1e43, 0x1e42, 0x1e45, 0x1e44, 0x1e47, 0x1e46, 0x1e49, 0x1e48, 0x1e4b, 0x1e4a, 0x1e4d, 0x1e4c, 0x1e4f, 0x1e4e, 0x1e51, 0x1e50, 0x1e53, 0x1e52, 0x1e55, 0x1e54, 0x1e57, 0x1e56, 0x1e59, 0x1e58, 0x1e5b, 0x1e5a, 0x1e5d, 0x1e5c, 0x1e5f, 0x1e5e, 0x1e61, 0x1e60, 0x1e63, 0x1e62, 0x1e65, 0x1e64, 0x1e67, 0x1e66, 0x1e69, 0x1e68, 0x1e6b, 0x1e6a, 0x1e6d, 0x1e6c, 0x1e6f, 0x1e6e, 0x1e71, 0x1e70, 0x1e73, 0x1e72, 0x1e75, 0x1e74, 0x1e77, 0x1e76, 0x1e79, 0x1e78, 0x1e7b, 0x1e7a, 0x1e7d, 0x1e7c, 0x1e7f, 0x1e7e, 0x1e81, 0x1e80, 0x1e83, 0x1e82, 0x1e85, 0x1e84, 0x1e87, 0x1e86, 0x1e89, 0x1e88, 0x1e8b, 0x1e8a, 0x1e8d, 0x1e8c, 0x1e8f, 0x1e8e, 0x1e91, 0x1e90, 0x1e93, 0x1e92, 0x1e95, 0x1e94, 0x10000b6, 0x10000bf, 0x10000c8, 0x10000d1, 0x10000da, 0x1e60, 0x0000, 0x0000, 0x00df, 0x0000, 0x1ea1, 0x1ea0, 0x1ea3, 0x1ea2, 0x1ea5, 0x1ea4, 0x1ea7, 0x1ea6, 0x1ea9, 0x1ea8, 0x1eab, 0x1eaa, 0x1ead, 0x1eac, 0x1eaf, 0x1eae, 0x1eb1, 0x1eb0, 0x1eb3, 0x1eb2, 0x1eb5, 0x1eb4, 0x1eb7, 0x1eb6, 0x1eb9, 0x1eb8, 0x1ebb, 0x1eba, 0x1ebd, 0x1ebc, 0x1ebf, 0x1ebe, 0x1ec1, 0x1ec0, 0x1ec3, 0x1ec2, 0x1ec5, 0x1ec4, 0x1ec7, 0x1ec6, 0x1ec9, 0x1ec8, 0x1ecb, 0x1eca, 0x1ecd, 0x1ecc, 0x1ecf, 0x1ece, 0x1ed1, 0x1ed0, 0x1ed3, 0x1ed2, 0x1ed5, 0x1ed4, 0x1ed7, 0x1ed6, 0x1ed9, 0x1ed8, 0x1edb, 0x1eda, 0x1edd, 0x1edc, 0x1edf, 0x1ede, 0x1ee1, 0x1ee0, 0x1ee3, 0x1ee2, 0x1ee5, 0x1ee4, 0x1ee7, 0x1ee6, 0x1ee9, 0x1ee8, 0x1eeb, 0x1eea, 0x1eed, 0x1eec, 0x1eef, 0x1eee, 0x1ef1, 0x1ef0, 0x1ef3, 0x1ef2, 0x1ef5, 0x1ef4, 0x1ef7, 0x1ef6, 0x1ef9, 0x1ef8, 0x1efb, 0x1efa, 0x1efd, 0x1efc, 0x1eff, 0x1efe }, { /* page 31, index 23 */ 0x1f08, 0x1f09, 0x1f0a, 0x1f0b, 0x1f0c, 0x1f0d, 0x1f0e, 0x1f0f, 0x1f00, 0x1f01, 0x1f02, 0x1f03, 0x1f04, 0x1f05, 0x1f06, 0x1f07, 0x1f18, 0x1f19, 0x1f1a, 0x1f1b, 0x1f1c, 0x1f1d, 0x0000, 0x0000, 0x1f10, 0x1f11, 0x1f12, 0x1f13, 0x1f14, 0x1f15, 0x0000, 0x0000, 0x1f28, 0x1f29, 0x1f2a, 0x1f2b, 0x1f2c, 0x1f2d, 0x1f2e, 0x1f2f, 0x1f20, 0x1f21, 0x1f22, 0x1f23, 0x1f24, 0x1f25, 0x1f26, 0x1f27, 0x1f38, 0x1f39, 0x1f3a, 0x1f3b, 0x1f3c, 0x1f3d, 0x1f3e, 0x1f3f, 0x1f30, 0x1f31, 0x1f32, 0x1f33, 0x1f34, 0x1f35, 0x1f36, 0x1f37, 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, 0x1f4c, 0x1f4d, 0x0000, 0x0000, 0x1f40, 0x1f41, 0x1f42, 0x1f43, 0x1f44, 0x1f45, 0x0000, 0x0000, 0x10000e3, 0x1f59, 0x10000ee, 0x1f5b, 0x10000fd, 0x1f5d, 0x100010c, 0x1f5f, 0x0000, 0x1f51, 0x0000, 0x1f53, 0x0000, 0x1f55, 0x0000, 0x1f57, 0x1f68, 0x1f69, 0x1f6a, 0x1f6b, 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f, 0x1f60, 0x1f61, 0x1f62, 0x1f63, 0x1f64, 0x1f65, 0x1f66, 0x1f67, 0x1fba, 0x1fbb, 0x1fc8, 0x1fc9, 0x1fca, 0x1fcb, 0x1fda, 0x1fdb, 0x1ff8, 0x1ff9, 0x1fea, 0x1feb, 0x1ffa, 0x1ffb, 0x0000, 0x0000, 0x10001b7, 0x10001c4, 0x10001d1, 0x10001de, 0x10001eb, 0x10001f8, 0x1000205, 0x1000212, 0x100021f, 0x1000229, 0x1000233, 0x100023d, 0x1000247, 0x1000251, 0x100025b, 0x1000265, 0x100026f, 0x100027c, 0x1000289, 0x1000296, 0x10002a3, 0x10002b0, 0x10002bd, 0x10002ca, 0x10002d7, 0x10002e1, 0x10002eb, 0x10002f5, 0x10002ff, 0x1000309, 0x1000313, 0x100031d, 0x1000327, 0x1000334, 0x1000341, 0x100034e, 0x100035b, 0x1000368, 0x1000375, 0x1000382, 0x100038f, 0x1000399, 0x10003a3, 0x10003ad, 0x10003b7, 0x10003c1, 0x10003cb, 0x10003d5, 0x1fb8, 0x1fb9, 0x100041e, 0x10003df, 0x100042b, 0x0000, 0x100011b, 0x1000466, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x10003eb, 0x0000, 0x0399, 0x0000, 0x0000, 0x0000, 0x1000436, 0x10003f4, 0x1000443, 0x0000, 0x1000126, 0x1000475, 0x1f72, 0x1f73, 0x1f74, 0x1f75, 0x1000400, 0x0000, 0x0000, 0x0000, 0x1fd8, 0x1fd9, 0x1000131, 0x1000140, 0x0000, 0x0000, 0x100014f, 0x100015a, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x0000, 0x0000, 0x0000, 0x0000, 0x1fe8, 0x1fe9, 0x1000169, 0x1000178, 0x1000187, 0x1fec, 0x1000192, 0x100019d, 0x1fe0, 0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x100044e, 0x1000409, 0x100045b, 0x0000, 0x10001ac, 0x1000484, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x1000415, 0x0000, 0x0000, 0x0000 }, { /* page 33, index 24 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x03c9, 0x0000, 0x0000, 0x0000, 0x006b, 0x00e5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x214e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2132, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2184, 0x2183, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 44, index 25 */ 0x2c30, 0x2c31, 0x2c32, 0x2c33, 0x2c34, 0x2c35, 0x2c36, 0x2c37, 0x2c38, 0x2c39, 0x2c3a, 0x2c3b, 0x2c3c, 0x2c3d, 0x2c3e, 0x2c3f, 0x2c40, 0x2c41, 0x2c42, 0x2c43, 0x2c44, 0x2c45, 0x2c46, 0x2c47, 0x2c48, 0x2c49, 0x2c4a, 0x2c4b, 0x2c4c, 0x2c4d, 0x2c4e, 0x2c4f, 0x2c50, 0x2c51, 0x2c52, 0x2c53, 0x2c54, 0x2c55, 0x2c56, 0x2c57, 0x2c58, 0x2c59, 0x2c5a, 0x2c5b, 0x2c5c, 0x2c5d, 0x2c5e, 0x0000, 0x2c00, 0x2c01, 0x2c02, 0x2c03, 0x2c04, 0x2c05, 0x2c06, 0x2c07, 0x2c08, 0x2c09, 0x2c0a, 0x2c0b, 0x2c0c, 0x2c0d, 0x2c0e, 0x2c0f, 0x2c10, 0x2c11, 0x2c12, 0x2c13, 0x2c14, 0x2c15, 0x2c16, 0x2c17, 0x2c18, 0x2c19, 0x2c1a, 0x2c1b, 0x2c1c, 0x2c1d, 0x2c1e, 0x2c1f, 0x2c20, 0x2c21, 0x2c22, 0x2c23, 0x2c24, 0x2c25, 0x2c26, 0x2c27, 0x2c28, 0x2c29, 0x2c2a, 0x2c2b, 0x2c2c, 0x2c2d, 0x2c2e, 0x0000, 0x2c61, 0x2c60, 0x026b, 0x1d7d, 0x027d, 0x023a, 0x023e, 0x2c68, 0x2c67, 0x2c6a, 0x2c69, 0x2c6c, 0x2c6b, 0x0251, 0x0271, 0x0250, 0x0000, 0x0000, 0x2c73, 0x2c72, 0x0000, 0x2c76, 0x2c75, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2c81, 0x2c80, 0x2c83, 0x2c82, 0x2c85, 0x2c84, 0x2c87, 0x2c86, 0x2c89, 0x2c88, 0x2c8b, 0x2c8a, 0x2c8d, 0x2c8c, 0x2c8f, 0x2c8e, 0x2c91, 0x2c90, 0x2c93, 0x2c92, 0x2c95, 0x2c94, 0x2c97, 0x2c96, 0x2c99, 0x2c98, 0x2c9b, 0x2c9a, 0x2c9d, 0x2c9c, 0x2c9f, 0x2c9e, 0x2ca1, 0x2ca0, 0x2ca3, 0x2ca2, 0x2ca5, 0x2ca4, 0x2ca7, 0x2ca6, 0x2ca9, 0x2ca8, 0x2cab, 0x2caa, 0x2cad, 0x2cac, 0x2caf, 0x2cae, 0x2cb1, 0x2cb0, 0x2cb3, 0x2cb2, 0x2cb5, 0x2cb4, 0x2cb7, 0x2cb6, 0x2cb9, 0x2cb8, 0x2cbb, 0x2cba, 0x2cbd, 0x2cbc, 0x2cbf, 0x2cbe, 0x2cc1, 0x2cc0, 0x2cc3, 0x2cc2, 0x2cc5, 0x2cc4, 0x2cc7, 0x2cc6, 0x2cc9, 0x2cc8, 0x2ccb, 0x2cca, 0x2ccd, 0x2ccc, 0x2ccf, 0x2cce, 0x2cd1, 0x2cd0, 0x2cd3, 0x2cd2, 0x2cd5, 0x2cd4, 0x2cd7, 0x2cd6, 0x2cd9, 0x2cd8, 0x2cdb, 0x2cda, 0x2cdd, 0x2cdc, 0x2cdf, 0x2cde, 0x2ce1, 0x2ce0, 0x2ce3, 0x2ce2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 45, index 26 */ 0x10a0, 0x10a1, 0x10a2, 0x10a3, 0x10a4, 0x10a5, 0x10a6, 0x10a7, 0x10a8, 0x10a9, 0x10aa, 0x10ab, 0x10ac, 0x10ad, 0x10ae, 0x10af, 0x10b0, 0x10b1, 0x10b2, 0x10b3, 0x10b4, 0x10b5, 0x10b6, 0x10b7, 0x10b8, 0x10b9, 0x10ba, 0x10bb, 0x10bc, 0x10bd, 0x10be, 0x10bf, 0x10c0, 0x10c1, 0x10c2, 0x10c3, 0x10c4, 0x10c5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 166, index 27 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xa641, 0xa640, 0xa643, 0xa642, 0xa645, 0xa644, 0xa647, 0xa646, 0xa649, 0xa648, 0xa64b, 0xa64a, 0xa64d, 0xa64c, 0xa64f, 0xa64e, 0xa651, 0xa650, 0xa653, 0xa652, 0xa655, 0xa654, 0xa657, 0xa656, 0xa659, 0xa658, 0xa65b, 0xa65a, 0xa65d, 0xa65c, 0xa65f, 0xa65e, 0x0000, 0x0000, 0xa663, 0xa662, 0xa665, 0xa664, 0xa667, 0xa666, 0xa669, 0xa668, 0xa66b, 0xa66a, 0xa66d, 0xa66c, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xa681, 0xa680, 0xa683, 0xa682, 0xa685, 0xa684, 0xa687, 0xa686, 0xa689, 0xa688, 0xa68b, 0xa68a, 0xa68d, 0xa68c, 0xa68f, 0xa68e, 0xa691, 0xa690, 0xa693, 0xa692, 0xa695, 0xa694, 0xa697, 0xa696, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 167, index 28 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xa723, 0xa722, 0xa725, 0xa724, 0xa727, 0xa726, 0xa729, 0xa728, 0xa72b, 0xa72a, 0xa72d, 0xa72c, 0xa72f, 0xa72e, 0x0000, 0x0000, 0xa733, 0xa732, 0xa735, 0xa734, 0xa737, 0xa736, 0xa739, 0xa738, 0xa73b, 0xa73a, 0xa73d, 0xa73c, 0xa73f, 0xa73e, 0xa741, 0xa740, 0xa743, 0xa742, 0xa745, 0xa744, 0xa747, 0xa746, 0xa749, 0xa748, 0xa74b, 0xa74a, 0xa74d, 0xa74c, 0xa74f, 0xa74e, 0xa751, 0xa750, 0xa753, 0xa752, 0xa755, 0xa754, 0xa757, 0xa756, 0xa759, 0xa758, 0xa75b, 0xa75a, 0xa75d, 0xa75c, 0xa75f, 0xa75e, 0xa761, 0xa760, 0xa763, 0xa762, 0xa765, 0xa764, 0xa767, 0xa766, 0xa769, 0xa768, 0xa76b, 0xa76a, 0xa76d, 0xa76c, 0xa76f, 0xa76e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xa77a, 0xa779, 0xa77c, 0xa77b, 0x1d79, 0xa77f, 0xa77e, 0xa781, 0xa780, 0xa783, 0xa782, 0xa785, 0xa784, 0xa787, 0xa786, 0x0000, 0x0000, 0x0000, 0xa78c, 0xa78b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 168, index 29 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 169, index 30 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 170, index 31 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 251, index 32 */ 0x100000f, 0x1000016, 0x100001d, 0x1000024, 0x100002d, 0x1000036, 0x100003d, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x100004f, 0x100005a, 0x1000065, 0x1000070, 0x100007b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 255, index 33 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xff41, 0xff42, 0xff43, 0xff44, 0xff45, 0xff46, 0xff47, 0xff48, 0xff49, 0xff4a, 0xff4b, 0xff4c, 0xff4d, 0xff4e, 0xff4f, 0xff50, 0xff51, 0xff52, 0xff53, 0xff54, 0xff55, 0xff56, 0xff57, 0xff58, 0xff59, 0xff5a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xff21, 0xff22, 0xff23, 0xff24, 0xff25, 0xff26, 0xff27, 0xff28, 0xff29, 0xff2a, 0xff2b, 0xff2c, 0xff2d, 0xff2e, 0xff2f, 0xff30, 0xff31, 0xff32, 0xff33, 0xff34, 0xff35, 0xff36, 0xff37, 0xff38, 0xff39, 0xff3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 260, index 34 */ 0x10428, 0x10429, 0x1042a, 0x1042b, 0x1042c, 0x1042d, 0x1042e, 0x1042f, 0x10430, 0x10431, 0x10432, 0x10433, 0x10434, 0x10435, 0x10436, 0x10437, 0x10438, 0x10439, 0x1043a, 0x1043b, 0x1043c, 0x1043d, 0x1043e, 0x1043f, 0x10440, 0x10441, 0x10442, 0x10443, 0x10444, 0x10445, 0x10446, 0x10447, 0x10448, 0x10449, 0x1044a, 0x1044b, 0x1044c, 0x1044d, 0x1044e, 0x1044f, 0x10400, 0x10401, 0x10402, 0x10403, 0x10404, 0x10405, 0x10406, 0x10407, 0x10408, 0x10409, 0x1040a, 0x1040b, 0x1040c, 0x1040d, 0x1040e, 0x1040f, 0x10410, 0x10411, 0x10412, 0x10413, 0x10414, 0x10415, 0x10416, 0x10417, 0x10418, 0x10419, 0x1041a, 0x1041b, 0x1041c, 0x1041d, 0x1041e, 0x1041f, 0x10420, 0x10421, 0x10422, 0x10423, 0x10424, 0x10425, 0x10426, 0x10427, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { /* page 471, index 35 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009 } }; /* U+0000 through U+2FAFF */ static const int16_t attr_table_part1[763] = { 0 /* page 0 */, 1 /* page 1 */, 2 /* page 2 */, 3 /* page 3 */, 4 /* page 4 */, 5 /* page 5 */, 6 /* page 6 */, 7 /* page 7 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 8 /* page 9 */, 9 /* page 10 */, 10 /* page 11 */, 11 /* page 12 */, 12 /* page 13 */, 13 /* page 14 */, 14 /* page 15 */, 15 /* page 16 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 16 /* page 23 */, 17 /* page 24 */, 18 /* page 25 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 19 /* page 27 */, 20 /* page 28 */, 21 /* page 29 */, 22 /* page 30 */, 23 /* page 31 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 24 /* page 33 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 25 /* page 44 */, 26 /* page 45 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 27 /* page 166 */, 28 /* page 167 */, 29 /* page 168 */, 30 /* page 169 */, 31 /* page 170 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 32 /* page 251 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 33 /* page 255 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 34 /* page 260 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 35 /* page 471 */, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX }; /* U+E0000 through U+10FFFF */ static const int16_t attr_table_part2[768] = { 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX, 0x0000 + G_UNICODE_MAX_TABLE_INDEX }; static const gunichar title_table[][3] = { { 0x01c5, 0x01c4, 0x01c6 }, { 0x01c8, 0x01c7, 0x01c9 }, { 0x01cb, 0x01ca, 0x01cc }, { 0x01f2, 0x01f1, 0x01f3 }, { 0x1f88, 0x0000, 0x1f80 }, { 0x1f89, 0x0000, 0x1f81 }, { 0x1f8a, 0x0000, 0x1f82 }, { 0x1f8b, 0x0000, 0x1f83 }, { 0x1f8c, 0x0000, 0x1f84 }, { 0x1f8d, 0x0000, 0x1f85 }, { 0x1f8e, 0x0000, 0x1f86 }, { 0x1f8f, 0x0000, 0x1f87 }, { 0x1f98, 0x0000, 0x1f90 }, { 0x1f99, 0x0000, 0x1f91 }, { 0x1f9a, 0x0000, 0x1f92 }, { 0x1f9b, 0x0000, 0x1f93 }, { 0x1f9c, 0x0000, 0x1f94 }, { 0x1f9d, 0x0000, 0x1f95 }, { 0x1f9e, 0x0000, 0x1f96 }, { 0x1f9f, 0x0000, 0x1f97 }, { 0x1fa8, 0x0000, 0x1fa0 }, { 0x1fa9, 0x0000, 0x1fa1 }, { 0x1faa, 0x0000, 0x1fa2 }, { 0x1fab, 0x0000, 0x1fa3 }, { 0x1fac, 0x0000, 0x1fa4 }, { 0x1fad, 0x0000, 0x1fa5 }, { 0x1fae, 0x0000, 0x1fa6 }, { 0x1faf, 0x0000, 0x1fa7 }, { 0x1fbc, 0x0000, 0x1fb3 }, { 0x1fcc, 0x0000, 0x1fc3 }, { 0x1ffc, 0x0000, 0x1ff3 } }; /* Table of special cases for case conversion; each record contains * First, the best single character mapping to lowercase if Lu, * and to uppercase if Ll, followed by the output mapping for the two cases * other than the case of the codepoint, in the order [Ll],[Lu],[Lt], * encoded in UTF-8, separated and terminated by a null character. */ static const char special_case_table[] = { "\x00\x53\x53\x00\x53\x73\0" /* offset 0 */ "\x69\x69\xcc\x87\x00\xc4\xb0\0" /* offset 7 */ "\x00\x46\x46\x00\x46\x66\0" /* offset 15 */ "\x00\x46\x49\x00\x46\x69\0" /* offset 22 */ "\x00\x46\x4c\x00\x46\x6c\0" /* offset 29 */ "\x00\x46\x46\x49\x00\x46\x66\x69\0" /* offset 36 */ "\x00\x46\x46\x4c\x00\x46\x66\x6c\0" /* offset 45 */ "\x00\x53\x54\x00\x53\x74\0" /* offset 54 */ "\x00\x53\x54\x00\x53\x74\0" /* offset 61 */ "\x00\xd4\xb5\xd5\x92\x00\xd4\xb5\xd6\x82\0" /* offset 68 */ "\x00\xd5\x84\xd5\x86\x00\xd5\x84\xd5\xb6\0" /* offset 79 */ "\x00\xd5\x84\xd4\xb5\x00\xd5\x84\xd5\xa5\0" /* offset 90 */ "\x00\xd5\x84\xd4\xbb\x00\xd5\x84\xd5\xab\0" /* offset 101 */ "\x00\xd5\x8e\xd5\x86\x00\xd5\x8e\xd5\xb6\0" /* offset 112 */ "\x00\xd5\x84\xd4\xbd\x00\xd5\x84\xd5\xad\0" /* offset 123 */ "\x00\xca\xbc\x4e\x00\xca\xbc\x4e\0" /* offset 134 */ "\x00\xce\x99\xcc\x88\xcc\x81\x00\xce\x99\xcc\x88\xcc\x81\0" /* offset 143 */ "\x00\xce\xa5\xcc\x88\xcc\x81\x00\xce\xa5\xcc\x88\xcc\x81\0" /* offset 158 */ "\x00\x4a\xcc\x8c\x00\x4a\xcc\x8c\0" /* offset 173 */ "\x00\x48\xcc\xb1\x00\x48\xcc\xb1\0" /* offset 182 */ "\x00\x54\xcc\x88\x00\x54\xcc\x88\0" /* offset 191 */ "\x00\x57\xcc\x8a\x00\x57\xcc\x8a\0" /* offset 200 */ "\x00\x59\xcc\x8a\x00\x59\xcc\x8a\0" /* offset 209 */ "\x00\x41\xca\xbe\x00\x41\xca\xbe\0" /* offset 218 */ "\x00\xce\xa5\xcc\x93\x00\xce\xa5\xcc\x93\0" /* offset 227 */ "\x00\xce\xa5\xcc\x93\xcc\x80\x00\xce\xa5\xcc\x93\xcc\x80\0" /* offset 238 */ "\x00\xce\xa5\xcc\x93\xcc\x81\x00\xce\xa5\xcc\x93\xcc\x81\0" /* offset 253 */ "\x00\xce\xa5\xcc\x93\xcd\x82\x00\xce\xa5\xcc\x93\xcd\x82\0" /* offset 268 */ "\x00\xce\x91\xcd\x82\x00\xce\x91\xcd\x82\0" /* offset 283 */ "\x00\xce\x97\xcd\x82\x00\xce\x97\xcd\x82\0" /* offset 294 */ "\x00\xce\x99\xcc\x88\xcc\x80\x00\xce\x99\xcc\x88\xcc\x80\0" /* offset 305 */ "\x00\xce\x99\xcc\x88\xcc\x81\x00\xce\x99\xcc\x88\xcc\x81\0" /* offset 320 */ "\x00\xce\x99\xcd\x82\x00\xce\x99\xcd\x82\0" /* offset 335 */ "\x00\xce\x99\xcc\x88\xcd\x82\x00\xce\x99\xcc\x88\xcd\x82\0" /* offset 346 */ "\x00\xce\xa5\xcc\x88\xcc\x80\x00\xce\xa5\xcc\x88\xcc\x80\0" /* offset 361 */ "\x00\xce\xa5\xcc\x88\xcc\x81\x00\xce\xa5\xcc\x88\xcc\x81\0" /* offset 376 */ "\x00\xce\xa1\xcc\x93\x00\xce\xa1\xcc\x93\0" /* offset 391 */ "\x00\xce\xa5\xcd\x82\x00\xce\xa5\xcd\x82\0" /* offset 402 */ "\x00\xce\xa5\xcc\x88\xcd\x82\x00\xce\xa5\xcc\x88\xcd\x82\0" /* offset 413 */ "\x00\xce\xa9\xcd\x82\x00\xce\xa9\xcd\x82\0" /* offset 428 */ "\xe1\xbe\x88\xe1\xbc\x88\xce\x99\x00\xe1\xbe\x88\0" /* offset 439 */ "\xe1\xbe\x89\xe1\xbc\x89\xce\x99\x00\xe1\xbe\x89\0" /* offset 452 */ "\xe1\xbe\x8a\xe1\xbc\x8a\xce\x99\x00\xe1\xbe\x8a\0" /* offset 465 */ "\xe1\xbe\x8b\xe1\xbc\x8b\xce\x99\x00\xe1\xbe\x8b\0" /* offset 478 */ "\xe1\xbe\x8c\xe1\xbc\x8c\xce\x99\x00\xe1\xbe\x8c\0" /* offset 491 */ "\xe1\xbe\x8d\xe1\xbc\x8d\xce\x99\x00\xe1\xbe\x8d\0" /* offset 504 */ "\xe1\xbe\x8e\xe1\xbc\x8e\xce\x99\x00\xe1\xbe\x8e\0" /* offset 517 */ "\xe1\xbe\x8f\xe1\xbc\x8f\xce\x99\x00\xe1\xbe\x8f\0" /* offset 530 */ "\xe1\xbe\x80\x00\xe1\xbc\x88\xce\x99\0" /* offset 543 */ "\xe1\xbe\x81\x00\xe1\xbc\x89\xce\x99\0" /* offset 553 */ "\xe1\xbe\x82\x00\xe1\xbc\x8a\xce\x99\0" /* offset 563 */ "\xe1\xbe\x83\x00\xe1\xbc\x8b\xce\x99\0" /* offset 573 */ "\xe1\xbe\x84\x00\xe1\xbc\x8c\xce\x99\0" /* offset 583 */ "\xe1\xbe\x85\x00\xe1\xbc\x8d\xce\x99\0" /* offset 593 */ "\xe1\xbe\x86\x00\xe1\xbc\x8e\xce\x99\0" /* offset 603 */ "\xe1\xbe\x87\x00\xe1\xbc\x8f\xce\x99\0" /* offset 613 */ "\xe1\xbe\x98\xe1\xbc\xa8\xce\x99\x00\xe1\xbe\x98\0" /* offset 623 */ "\xe1\xbe\x99\xe1\xbc\xa9\xce\x99\x00\xe1\xbe\x99\0" /* offset 636 */ "\xe1\xbe\x9a\xe1\xbc\xaa\xce\x99\x00\xe1\xbe\x9a\0" /* offset 649 */ "\xe1\xbe\x9b\xe1\xbc\xab\xce\x99\x00\xe1\xbe\x9b\0" /* offset 662 */ "\xe1\xbe\x9c\xe1\xbc\xac\xce\x99\x00\xe1\xbe\x9c\0" /* offset 675 */ "\xe1\xbe\x9d\xe1\xbc\xad\xce\x99\x00\xe1\xbe\x9d\0" /* offset 688 */ "\xe1\xbe\x9e\xe1\xbc\xae\xce\x99\x00\xe1\xbe\x9e\0" /* offset 701 */ "\xe1\xbe\x9f\xe1\xbc\xaf\xce\x99\x00\xe1\xbe\x9f\0" /* offset 714 */ "\xe1\xbe\x90\x00\xe1\xbc\xa8\xce\x99\0" /* offset 727 */ "\xe1\xbe\x91\x00\xe1\xbc\xa9\xce\x99\0" /* offset 737 */ "\xe1\xbe\x92\x00\xe1\xbc\xaa\xce\x99\0" /* offset 747 */ "\xe1\xbe\x93\x00\xe1\xbc\xab\xce\x99\0" /* offset 757 */ "\xe1\xbe\x94\x00\xe1\xbc\xac\xce\x99\0" /* offset 767 */ "\xe1\xbe\x95\x00\xe1\xbc\xad\xce\x99\0" /* offset 777 */ "\xe1\xbe\x96\x00\xe1\xbc\xae\xce\x99\0" /* offset 787 */ "\xe1\xbe\x97\x00\xe1\xbc\xaf\xce\x99\0" /* offset 797 */ "\xe1\xbe\xa8\xe1\xbd\xa8\xce\x99\x00\xe1\xbe\xa8\0" /* offset 807 */ "\xe1\xbe\xa9\xe1\xbd\xa9\xce\x99\x00\xe1\xbe\xa9\0" /* offset 820 */ "\xe1\xbe\xaa\xe1\xbd\xaa\xce\x99\x00\xe1\xbe\xaa\0" /* offset 833 */ "\xe1\xbe\xab\xe1\xbd\xab\xce\x99\x00\xe1\xbe\xab\0" /* offset 846 */ "\xe1\xbe\xac\xe1\xbd\xac\xce\x99\x00\xe1\xbe\xac\0" /* offset 859 */ "\xe1\xbe\xad\xe1\xbd\xad\xce\x99\x00\xe1\xbe\xad\0" /* offset 872 */ "\xe1\xbe\xae\xe1\xbd\xae\xce\x99\x00\xe1\xbe\xae\0" /* offset 885 */ "\xe1\xbe\xaf\xe1\xbd\xaf\xce\x99\x00\xe1\xbe\xaf\0" /* offset 898 */ "\xe1\xbe\xa0\x00\xe1\xbd\xa8\xce\x99\0" /* offset 911 */ "\xe1\xbe\xa1\x00\xe1\xbd\xa9\xce\x99\0" /* offset 921 */ "\xe1\xbe\xa2\x00\xe1\xbd\xaa\xce\x99\0" /* offset 931 */ "\xe1\xbe\xa3\x00\xe1\xbd\xab\xce\x99\0" /* offset 941 */ "\xe1\xbe\xa4\x00\xe1\xbd\xac\xce\x99\0" /* offset 951 */ "\xe1\xbe\xa5\x00\xe1\xbd\xad\xce\x99\0" /* offset 961 */ "\xe1\xbe\xa6\x00\xe1\xbd\xae\xce\x99\0" /* offset 971 */ "\xe1\xbe\xa7\x00\xe1\xbd\xaf\xce\x99\0" /* offset 981 */ "\xe1\xbe\xbc\xce\x91\xce\x99\x00\xe1\xbe\xbc\0" /* offset 991 */ "\xe1\xbe\xb3\x00\xce\x91\xce\x99\0" /* offset 1003 */ "\xe1\xbf\x8c\xce\x97\xce\x99\x00\xe1\xbf\x8c\0" /* offset 1012 */ "\xe1\xbf\x83\x00\xce\x97\xce\x99\0" /* offset 1024 */ "\xe1\xbf\xbc\xce\xa9\xce\x99\x00\xe1\xbf\xbc\0" /* offset 1033 */ "\xe1\xbf\xb3\x00\xce\xa9\xce\x99\0" /* offset 1045 */ "\x00\xe1\xbe\xba\xce\x99\x00\xe1\xbe\xba\xcd\x85\0" /* offset 1054 */ "\x00\xce\x86\xce\x99\x00\xce\x86\xcd\x85\0" /* offset 1067 */ "\x00\xe1\xbf\x8a\xce\x99\x00\xe1\xbf\x8a\xcd\x85\0" /* offset 1078 */ "\x00\xce\x89\xce\x99\x00\xce\x89\xcd\x85\0" /* offset 1091 */ "\x00\xe1\xbf\xba\xce\x99\x00\xe1\xbf\xba\xcd\x85\0" /* offset 1102 */ "\x00\xce\x8f\xce\x99\x00\xce\x8f\xcd\x85\0" /* offset 1115 */ "\x00\xce\x91\xcd\x82\xce\x99\x00\xce\x91\xcd\x82\xcd\x85\0" /* offset 1126 */ "\x00\xce\x97\xcd\x82\xce\x99\x00\xce\x97\xcd\x82\xcd\x85\0" /* offset 1141 */ "\x00\xce\xa9\xcd\x82\xce\x99\x00\xce\xa9\xcd\x82\xcd\x85\0" /* offset 1156 */ }; /* Table of casefolding cases that can't be derived by lowercasing */ static const struct { uint16_t ch; char data[7]; } casefold_table[] = { { 0x00b5, "\xce\xbc" }, { 0x00df, "\x73\x73" }, { 0x0130, "\x69\xcc\x87" }, { 0x0149, "\xca\xbc\x6e" }, { 0x017f, "\x73" }, { 0x01f0, "\x6a\xcc\x8c" }, { 0x0345, "\xce\xb9" }, { 0x0390, "\xce\xb9\xcc\x88\xcc\x81" }, { 0x03b0, "\xcf\x85\xcc\x88\xcc\x81" }, { 0x03c2, "\xcf\x83" }, { 0x03d0, "\xce\xb2" }, { 0x03d1, "\xce\xb8" }, { 0x03d5, "\xcf\x86" }, { 0x03d6, "\xcf\x80" }, { 0x03f0, "\xce\xba" }, { 0x03f1, "\xcf\x81" }, { 0x03f5, "\xce\xb5" }, { 0x0587, "\xd5\xa5\xd6\x82" }, { 0x1e96, "\x68\xcc\xb1" }, { 0x1e97, "\x74\xcc\x88" }, { 0x1e98, "\x77\xcc\x8a" }, { 0x1e99, "\x79\xcc\x8a" }, { 0x1e9a, "\x61\xca\xbe" }, { 0x1e9b, "\xe1\xb9\xa1" }, { 0x1e9e, "\x73\x73" }, { 0x1f50, "\xcf\x85\xcc\x93" }, { 0x1f52, "\xcf\x85\xcc\x93\xcc\x80" }, { 0x1f54, "\xcf\x85\xcc\x93\xcc\x81" }, { 0x1f56, "\xcf\x85\xcc\x93\xcd\x82" }, { 0x1f80, "\xe1\xbc\x80\xce\xb9" }, { 0x1f81, "\xe1\xbc\x81\xce\xb9" }, { 0x1f82, "\xe1\xbc\x82\xce\xb9" }, { 0x1f83, "\xe1\xbc\x83\xce\xb9" }, { 0x1f84, "\xe1\xbc\x84\xce\xb9" }, { 0x1f85, "\xe1\xbc\x85\xce\xb9" }, { 0x1f86, "\xe1\xbc\x86\xce\xb9" }, { 0x1f87, "\xe1\xbc\x87\xce\xb9" }, { 0x1f88, "\xe1\xbc\x80\xce\xb9" }, { 0x1f89, "\xe1\xbc\x81\xce\xb9" }, { 0x1f8a, "\xe1\xbc\x82\xce\xb9" }, { 0x1f8b, "\xe1\xbc\x83\xce\xb9" }, { 0x1f8c, "\xe1\xbc\x84\xce\xb9" }, { 0x1f8d, "\xe1\xbc\x85\xce\xb9" }, { 0x1f8e, "\xe1\xbc\x86\xce\xb9" }, { 0x1f8f, "\xe1\xbc\x87\xce\xb9" }, { 0x1f90, "\xe1\xbc\xa0\xce\xb9" }, { 0x1f91, "\xe1\xbc\xa1\xce\xb9" }, { 0x1f92, "\xe1\xbc\xa2\xce\xb9" }, { 0x1f93, "\xe1\xbc\xa3\xce\xb9" }, { 0x1f94, "\xe1\xbc\xa4\xce\xb9" }, { 0x1f95, "\xe1\xbc\xa5\xce\xb9" }, { 0x1f96, "\xe1\xbc\xa6\xce\xb9" }, { 0x1f97, "\xe1\xbc\xa7\xce\xb9" }, { 0x1f98, "\xe1\xbc\xa0\xce\xb9" }, { 0x1f99, "\xe1\xbc\xa1\xce\xb9" }, { 0x1f9a, "\xe1\xbc\xa2\xce\xb9" }, { 0x1f9b, "\xe1\xbc\xa3\xce\xb9" }, { 0x1f9c, "\xe1\xbc\xa4\xce\xb9" }, { 0x1f9d, "\xe1\xbc\xa5\xce\xb9" }, { 0x1f9e, "\xe1\xbc\xa6\xce\xb9" }, { 0x1f9f, "\xe1\xbc\xa7\xce\xb9" }, { 0x1fa0, "\xe1\xbd\xa0\xce\xb9" }, { 0x1fa1, "\xe1\xbd\xa1\xce\xb9" }, { 0x1fa2, "\xe1\xbd\xa2\xce\xb9" }, { 0x1fa3, "\xe1\xbd\xa3\xce\xb9" }, { 0x1fa4, "\xe1\xbd\xa4\xce\xb9" }, { 0x1fa5, "\xe1\xbd\xa5\xce\xb9" }, { 0x1fa6, "\xe1\xbd\xa6\xce\xb9" }, { 0x1fa7, "\xe1\xbd\xa7\xce\xb9" }, { 0x1fa8, "\xe1\xbd\xa0\xce\xb9" }, { 0x1fa9, "\xe1\xbd\xa1\xce\xb9" }, { 0x1faa, "\xe1\xbd\xa2\xce\xb9" }, { 0x1fab, "\xe1\xbd\xa3\xce\xb9" }, { 0x1fac, "\xe1\xbd\xa4\xce\xb9" }, { 0x1fad, "\xe1\xbd\xa5\xce\xb9" }, { 0x1fae, "\xe1\xbd\xa6\xce\xb9" }, { 0x1faf, "\xe1\xbd\xa7\xce\xb9" }, { 0x1fb2, "\xe1\xbd\xb0\xce\xb9" }, { 0x1fb3, "\xce\xb1\xce\xb9" }, { 0x1fb4, "\xce\xac\xce\xb9" }, { 0x1fb6, "\xce\xb1\xcd\x82" }, { 0x1fb7, "\xce\xb1\xcd\x82\xce\xb9" }, { 0x1fbc, "\xce\xb1\xce\xb9" }, { 0x1fbe, "\xce\xb9" }, { 0x1fc2, "\xe1\xbd\xb4\xce\xb9" }, { 0x1fc3, "\xce\xb7\xce\xb9" }, { 0x1fc4, "\xce\xae\xce\xb9" }, { 0x1fc6, "\xce\xb7\xcd\x82" }, { 0x1fc7, "\xce\xb7\xcd\x82\xce\xb9" }, { 0x1fcc, "\xce\xb7\xce\xb9" }, { 0x1fd2, "\xce\xb9\xcc\x88\xcc\x80" }, { 0x1fd3, "\xce\xb9\xcc\x88\xcc\x81" }, { 0x1fd6, "\xce\xb9\xcd\x82" }, { 0x1fd7, "\xce\xb9\xcc\x88\xcd\x82" }, { 0x1fe2, "\xcf\x85\xcc\x88\xcc\x80" }, { 0x1fe3, "\xcf\x85\xcc\x88\xcc\x81" }, { 0x1fe4, "\xcf\x81\xcc\x93" }, { 0x1fe6, "\xcf\x85\xcd\x82" }, { 0x1fe7, "\xcf\x85\xcc\x88\xcd\x82" }, { 0x1ff2, "\xe1\xbd\xbc\xce\xb9" }, { 0x1ff3, "\xcf\x89\xce\xb9" }, { 0x1ff4, "\xcf\x8e\xce\xb9" }, { 0x1ff6, "\xcf\x89\xcd\x82" }, { 0x1ff7, "\xcf\x89\xcd\x82\xce\xb9" }, { 0x1ffc, "\xcf\x89\xce\xb9" }, { 0x2160, "\xe2\x85\xb0" }, { 0x2161, "\xe2\x85\xb1" }, { 0x2162, "\xe2\x85\xb2" }, { 0x2163, "\xe2\x85\xb3" }, { 0x2164, "\xe2\x85\xb4" }, { 0x2165, "\xe2\x85\xb5" }, { 0x2166, "\xe2\x85\xb6" }, { 0x2167, "\xe2\x85\xb7" }, { 0x2168, "\xe2\x85\xb8" }, { 0x2169, "\xe2\x85\xb9" }, { 0x216a, "\xe2\x85\xba" }, { 0x216b, "\xe2\x85\xbb" }, { 0x216c, "\xe2\x85\xbc" }, { 0x216d, "\xe2\x85\xbd" }, { 0x216e, "\xe2\x85\xbe" }, { 0x216f, "\xe2\x85\xbf" }, { 0x24b6, "\xe2\x93\x90" }, { 0x24b7, "\xe2\x93\x91" }, { 0x24b8, "\xe2\x93\x92" }, { 0x24b9, "\xe2\x93\x93" }, { 0x24ba, "\xe2\x93\x94" }, { 0x24bb, "\xe2\x93\x95" }, { 0x24bc, "\xe2\x93\x96" }, { 0x24bd, "\xe2\x93\x97" }, { 0x24be, "\xe2\x93\x98" }, { 0x24bf, "\xe2\x93\x99" }, { 0x24c0, "\xe2\x93\x9a" }, { 0x24c1, "\xe2\x93\x9b" }, { 0x24c2, "\xe2\x93\x9c" }, { 0x24c3, "\xe2\x93\x9d" }, { 0x24c4, "\xe2\x93\x9e" }, { 0x24c5, "\xe2\x93\x9f" }, { 0x24c6, "\xe2\x93\xa0" }, { 0x24c7, "\xe2\x93\xa1" }, { 0x24c8, "\xe2\x93\xa2" }, { 0x24c9, "\xe2\x93\xa3" }, { 0x24ca, "\xe2\x93\xa4" }, { 0x24cb, "\xe2\x93\xa5" }, { 0x24cc, "\xe2\x93\xa6" }, { 0x24cd, "\xe2\x93\xa7" }, { 0x24ce, "\xe2\x93\xa8" }, { 0x24cf, "\xe2\x93\xa9" }, { 0xfb00, "\x66\x66" }, { 0xfb01, "\x66\x69" }, { 0xfb02, "\x66\x6c" }, { 0xfb03, "\x66\x66\x69" }, { 0xfb04, "\x66\x66\x6c" }, { 0xfb05, "\x73\x74" }, { 0xfb06, "\x73\x74" }, { 0xfb13, "\xd5\xb4\xd5\xb6" }, { 0xfb14, "\xd5\xb4\xd5\xa5" }, { 0xfb15, "\xd5\xb4\xd5\xab" }, { 0xfb16, "\xd5\xbe\xd5\xb6" }, { 0xfb17, "\xd5\xb4\xd5\xad" }, }; #endif /* CHARTABLES_H */ LucenePlusPlus-rel_3.0.9/src/core/util/unicode/guniprop.cpp000066400000000000000000000435731456444476200240350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// /* Unicode character properties. * * Copyright (C) 1999 Tom Tromey * Copyright (C) 2000 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ // See COPYING file for licensing information. #include "LuceneInc.h" #include "guniprop.h" #define ATTR_TABLE(Page) (((Page) <= G_UNICODE_LAST_PAGE_PART1) \ ? attr_table_part1[Page] \ : attr_table_part2[(Page) - 0xe00]) #define ATTTABLE(Page, Char) \ ((ATTR_TABLE(Page) == G_UNICODE_MAX_TABLE_INDEX) ? 0 : (attr_data[ATTR_TABLE(Page)][Char])) #define TTYPE_PART1(Page, Char) \ ((type_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \ ? (type_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \ : (type_data[type_table_part1[Page]][Char])) #define TTYPE_PART2(Page, Char) \ ((type_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \ ? (type_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \ : (type_data[type_table_part2[Page]][Char])) #define TYPE(Char) \ (((Char) <= G_UNICODE_LAST_CHAR_PART1) \ ? TTYPE_PART1 ((Char) >> 8, (Char) & 0xff) \ : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \ ? TTYPE_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \ : G_UNICODE_UNASSIGNED)) #define IS(Type, Class) (((guint)1 << (Type)) & (Class)) #define OR(Type, Rest) (((guint)1 << (Type)) | (Rest)) /* Count the number of elements in an array. The array must be defined * as such; using this with a dynamically allocated array will give * incorrect results. */ #define G_N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0])) #define ISALPHA(Type) IS ((Type), \ OR (G_UNICODE_LOWERCASE_LETTER, \ OR (G_UNICODE_UPPERCASE_LETTER, \ OR (G_UNICODE_TITLECASE_LETTER, \ OR (G_UNICODE_MODIFIER_LETTER, \ OR (G_UNICODE_OTHER_LETTER, 0)))))) #define ISALDIGIT(Type) IS ((Type), \ OR (G_UNICODE_DECIMAL_NUMBER, \ OR (G_UNICODE_LETTER_NUMBER, \ OR (G_UNICODE_OTHER_NUMBER, \ OR (G_UNICODE_LOWERCASE_LETTER, \ OR (G_UNICODE_UPPERCASE_LETTER, \ OR (G_UNICODE_TITLECASE_LETTER, \ OR (G_UNICODE_MODIFIER_LETTER, \ OR (G_UNICODE_OTHER_LETTER, 0))))))))) #define ISMARK(Type) IS ((Type), \ OR (G_UNICODE_NON_SPACING_MARK, \ OR (G_UNICODE_COMBINING_MARK, \ OR (G_UNICODE_ENCLOSING_MARK, 0)))) #define ISZEROWIDTHTYPE(Type) IS ((Type), \ OR (G_UNICODE_NON_SPACING_MARK, \ OR (G_UNICODE_ENCLOSING_MARK, \ OR (G_UNICODE_FORMAT, 0)))) #define UTF8_COMPUTE(Char, Mask, Len) \ if (Char < 128) \ { \ Len = 1; \ Mask = 0x7f; \ } \ else if ((Char & 0xe0) == 0xc0) \ { \ Len = 2; \ Mask = 0x1f; \ } \ else if ((Char & 0xf0) == 0xe0) \ { \ Len = 3; \ Mask = 0x0f; \ } \ else if ((Char & 0xf8) == 0xf0) \ { \ Len = 4; \ Mask = 0x07; \ } \ else if ((Char & 0xfc) == 0xf8) \ { \ Len = 5; \ Mask = 0x03; \ } \ else if ((Char & 0xfe) == 0xfc) \ { \ Len = 6; \ Mask = 0x01; \ } \ else \ Len = -1; #define UTF8_GET(Result, Chars, Count, Mask, Len) \ (Result) = (Chars)[0] & (Mask); \ for ((Count) = 1; (Count) < (Len); ++(Count)) \ { \ if (((Chars)[(Count)] & 0xc0) != 0x80) \ { \ (Result) = -1; \ break; \ } \ (Result) <<= 6; \ (Result) |= ((Chars)[(Count)] & 0x3f); \ } /** * g_utf8_get_char: * @p: a pointer to Unicode character encoded as UTF-8 * * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. * If @p does not point to a valid UTF-8 encoded character, results are * undefined. If you are not sure that the bytes are complete * valid Unicode characters, you should use g_utf8_get_char_validated() * instead. * * Return value: the resulting character **/ gunichar g_utf8_get_char (const gchar* p) { int i, mask = 0, len; gunichar result; unsigned char c = (unsigned char) *p; UTF8_COMPUTE (c, mask, len); if (len == -1) { return (gunichar)-1; } UTF8_GET (result, p, i, mask, len); return result; } /** * g_unichar_isalnum: * @c: a Unicode character * * Determines whether a character is alphanumeric. * Given some UTF-8 text, obtain a character value * with g_utf8_get_char(). * * Return value: %TRUE if @c is an alphanumeric character **/ gboolean g_unichar_isalnum (gunichar c) { return ISALDIGIT (TYPE (c)) ? true : false; } /** * g_unichar_isalpha: * @c: a Unicode character * * Determines whether a character is alphabetic (i.e. a letter). * Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). * * Return value: %TRUE if @c is an alphabetic character **/ gboolean g_unichar_isalpha (gunichar c) { return ISALPHA (TYPE (c)) ? true : false; } /** * g_unichar_iscntrl: * @c: a Unicode character * * Determines whether a character is a control character. * Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). * * Return value: %TRUE if @c is a control character **/ gboolean g_unichar_iscntrl (gunichar c) { return TYPE (c) == G_UNICODE_CONTROL; } /** * g_unichar_isdigit: * @c: a Unicode character * * Determines whether a character is numeric (i.e. a digit). This * covers ASCII 0-9 and also digits in other languages/scripts. Given * some UTF-8 text, obtain a character value with g_utf8_get_char(). * * Return value: %TRUE if @c is a digit **/ gboolean g_unichar_isdigit (gunichar c) { return TYPE (c) == G_UNICODE_DECIMAL_NUMBER; } /** * g_unichar_isgraph: * @c: a Unicode character * * Determines whether a character is printable and not a space * (returns %FALSE for control characters, format characters, and * spaces). g_unichar_isprint() is similar, but returns %TRUE for * spaces. Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). * * Return value: %TRUE if @c is printable unless it's a space **/ gboolean g_unichar_isgraph (gunichar c) { return !IS (TYPE(c), OR (G_UNICODE_CONTROL, OR (G_UNICODE_FORMAT, OR (G_UNICODE_UNASSIGNED, OR (G_UNICODE_SURROGATE, OR (G_UNICODE_SPACE_SEPARATOR, 0)))))); } /** * g_unichar_islower: * @c: a Unicode character * * Determines whether a character is a lowercase letter. * Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). * * Return value: %TRUE if @c is a lowercase letter **/ gboolean g_unichar_islower (gunichar c) { return TYPE (c) == G_UNICODE_LOWERCASE_LETTER; } /** * g_unichar_isprint: * @c: a Unicode character * * Determines whether a character is printable. * Unlike g_unichar_isgraph(), returns %TRUE for spaces. * Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). * * Return value: %TRUE if @c is printable **/ gboolean g_unichar_isprint (gunichar c) { return !IS (TYPE(c), OR (G_UNICODE_CONTROL, OR (G_UNICODE_FORMAT, OR (G_UNICODE_UNASSIGNED, OR (G_UNICODE_SURROGATE, 0))))); } /** * g_unichar_ispunct: * @c: a Unicode character * * Determines whether a character is punctuation or a symbol. * Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). * * Return value: %TRUE if @c is a punctuation or symbol character **/ gboolean g_unichar_ispunct (gunichar c) { return IS (TYPE(c), OR (G_UNICODE_CONNECT_PUNCTUATION, OR (G_UNICODE_DASH_PUNCTUATION, OR (G_UNICODE_CLOSE_PUNCTUATION, OR (G_UNICODE_FINAL_PUNCTUATION, OR (G_UNICODE_INITIAL_PUNCTUATION, OR (G_UNICODE_OTHER_PUNCTUATION, OR (G_UNICODE_OPEN_PUNCTUATION, OR (G_UNICODE_CURRENCY_SYMBOL, OR (G_UNICODE_MODIFIER_SYMBOL, OR (G_UNICODE_MATH_SYMBOL, OR (G_UNICODE_OTHER_SYMBOL, 0)))))))))))) ? true : false; } /** * g_unichar_isspace: * @c: a Unicode character * * Determines whether a character is a space, tab, or line separator * (newline, carriage return, etc.). Given some UTF-8 text, obtain a * character value with g_utf8_get_char(). * * (Note: don't use this to do word breaking; you have to use * Pango or equivalent to get word breaking right, the algorithm * is fairly complex.) * * Return value: %TRUE if @c is a space character **/ gboolean g_unichar_isspace (gunichar c) { switch (c) { /* special-case these since Unicode thinks they are not spaces */ case '\t': case '\n': case '\r': case '\f': return true; break; default: { return IS (TYPE(c), OR (G_UNICODE_SPACE_SEPARATOR, OR (G_UNICODE_LINE_SEPARATOR, OR (G_UNICODE_PARAGRAPH_SEPARATOR, 0)))) ? true : false; } break; } } /** * g_unichar_ismark: * @c: a Unicode character * * Determines whether a character is a mark (non-spacing mark, * combining mark, or enclosing mark in Unicode speak). * Given some UTF-8 text, obtain a character value * with g_utf8_get_char(). * * Note: in most cases where isalpha characters are allowed, * ismark characters should be allowed to as they are essential * for writing most European languages as well as many non-Latin * scripts. * * Return value: %TRUE if @c is a mark character * * Since: 2.14 **/ gboolean g_unichar_ismark (gunichar c) { return ISMARK (TYPE (c)) ? true : false; } /** * g_unichar_isupper: * @c: a Unicode character * * Determines if a character is uppercase. * * Return value: %TRUE if @c is an uppercase character **/ gboolean g_unichar_isupper (gunichar c) { return TYPE (c) == G_UNICODE_UPPERCASE_LETTER; } /** * g_unichar_istitle: * @c: a Unicode character * * Determines if a character is titlecase. Some characters in * Unicode which are composites, such as the DZ digraph * have three case variants instead of just two. The titlecase * form is used at the beginning of a word where only the * first letter is capitalized. The titlecase form of the DZ * digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z. * * Return value: %TRUE if the character is titlecase **/ gboolean g_unichar_istitle (gunichar c) { unsigned int i; for (i = 0; i < G_N_ELEMENTS (title_table); ++i) if (title_table[i][0] == c) { return true; } return false; } /** * g_unichar_isxdigit: * @c: a Unicode character. * * Determines if a character is a hexidecimal digit. * * Return value: %TRUE if the character is a hexadecimal digit **/ gboolean g_unichar_isxdigit (gunichar c) { return ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || (TYPE (c) == G_UNICODE_DECIMAL_NUMBER)); } /** * g_unichar_isdefined: * @c: a Unicode character * * Determines if a given character is assigned in the Unicode * standard. * * Return value: %TRUE if the character has an assigned value **/ gboolean g_unichar_isdefined (gunichar c) { return !IS (TYPE(c), OR (G_UNICODE_UNASSIGNED, OR (G_UNICODE_SURROGATE, 0))); } /** * g_unichar_toupper: * @c: a Unicode character * * Converts a character to uppercase. * * Return value: the result of converting @c to uppercase. * If @c is not an lowercase or titlecase character, * or has no upper case equivalent @c is returned unchanged. **/ gunichar g_unichar_toupper (gunichar c) { int t = TYPE (c); if (t == G_UNICODE_LOWERCASE_LETTER) { gunichar val = ATTTABLE (c >> 8, c & 0xff); if (val >= 0x1000000) { const gchar* p = special_case_table + val - 0x1000000; val = g_utf8_get_char (p); } /* Some lowercase letters, e.g., U+000AA, FEMININE ORDINAL INDICATOR, * do not have an uppercase equivalent, in which case val will be * zero. */ return val ? val : c; } else if (t == G_UNICODE_TITLECASE_LETTER) { unsigned int i; for (i = 0; i < G_N_ELEMENTS (title_table); ++i) { if (title_table[i][0] == c) { return title_table[i][1]; } } } return c; } /** * g_unichar_tolower: * @c: a Unicode character. * * Converts a character to lower case. * * Return value: the result of converting @c to lower case. * If @c is not an upperlower or titlecase character, * or has no lowercase equivalent @c is returned unchanged. **/ gunichar g_unichar_tolower (gunichar c) { int t = TYPE (c); if (t == G_UNICODE_UPPERCASE_LETTER) { gunichar val = ATTTABLE (c >> 8, c & 0xff); if (val >= 0x1000000) { const gchar* p = special_case_table + val - 0x1000000; return g_utf8_get_char (p); } else { /* Not all uppercase letters are guaranteed to have a lowercase * equivalent. If this is the case, val will be zero. */ return val ? val : c; } } else if (t == G_UNICODE_TITLECASE_LETTER) { unsigned int i; for (i = 0; i < G_N_ELEMENTS (title_table); ++i) { if (title_table[i][0] == c) { return title_table[i][2]; } } } return c; } /** * g_unichar_totitle: * @c: a Unicode character * * Converts a character to the titlecase. * * Return value: the result of converting @c to titlecase. * If @c is not an uppercase or lowercase character, * @c is returned unchanged. **/ gunichar g_unichar_totitle (gunichar c) { unsigned int i; for (i = 0; i < G_N_ELEMENTS (title_table); ++i) { if (title_table[i][0] == c || title_table[i][1] == c || title_table[i][2] == c) { return title_table[i][0]; } } if (TYPE (c) == G_UNICODE_LOWERCASE_LETTER) { return g_unichar_toupper (c); } return c; } /** * g_unichar_digit_value: * @c: a Unicode character * * Determines the numeric value of a character as a decimal * digit. * * Return value: If @c is a decimal digit (according to * g_unichar_isdigit()), its numeric value. Otherwise, -1. **/ int g_unichar_digit_value (gunichar c) { if (TYPE (c) == G_UNICODE_DECIMAL_NUMBER) { return ATTTABLE (c >> 8, c & 0xff); } return -1; } /** * g_unichar_xdigit_value: * @c: a Unicode character * * Determines the numeric value of a character as a hexadecimal * digit. * * Return value: If @c is a hex digit (according to * g_unichar_isxdigit()), its numeric value. Otherwise, -1. **/ int g_unichar_xdigit_value (gunichar c) { if (c >= 'A' && c <= 'F') { return c - 'A' + 10; } if (c >= 'a' && c <= 'f') { return c - 'a' + 10; } if (TYPE (c) == G_UNICODE_DECIMAL_NUMBER) { return ATTTABLE (c >> 8, c & 0xff); } return -1; } /** * g_unichar_type: * @c: a Unicode character * * Classifies a Unicode character by type. * * Return value: the type of the character. **/ GUnicodeType g_unichar_type (gunichar c) { return (GUnicodeType)TYPE (c); } LucenePlusPlus-rel_3.0.9/src/core/util/unicode/guniprop.h000066400000000000000000000217551456444476200235000ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// /* Unicode character properties. * * Copyright (C) 1999 Tom Tromey * Copyright (C) 2000 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ // See COPYING file for licensing information. #include "Lucene.h" typedef uint32_t gunichar; typedef uint16_t gunichar2; typedef uint32_t guint; typedef uint8_t guchar; typedef int32_t gint; typedef char gchar; typedef bool gboolean; typedef size_t gsize; typedef size_t gssize; /* These are the possible character classifications. * See http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values */ typedef enum { G_UNICODE_CONTROL, G_UNICODE_FORMAT, G_UNICODE_UNASSIGNED, G_UNICODE_PRIVATE_USE, G_UNICODE_SURROGATE, G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_TITLECASE_LETTER, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_COMBINING_MARK, G_UNICODE_ENCLOSING_MARK, G_UNICODE_NON_SPACING_MARK, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LINE_SEPARATOR, G_UNICODE_PARAGRAPH_SEPARATOR, G_UNICODE_SPACE_SEPARATOR } GUnicodeType; /* These are the possible line break classifications. * Note that new types may be added in the future. * Implementations may regard unknown values like G_UNICODE_BREAK_UNKNOWN * See http://www.unicode.org/unicode/reports/tr14/ */ typedef enum { G_UNICODE_BREAK_MANDATORY, G_UNICODE_BREAK_CARRIAGE_RETURN, G_UNICODE_BREAK_LINE_FEED, G_UNICODE_BREAK_COMBINING_MARK, G_UNICODE_BREAK_SURROGATE, G_UNICODE_BREAK_ZERO_WIDTH_SPACE, G_UNICODE_BREAK_INSEPARABLE, G_UNICODE_BREAK_NON_BREAKING_GLUE, G_UNICODE_BREAK_CONTINGENT, G_UNICODE_BREAK_SPACE, G_UNICODE_BREAK_AFTER, G_UNICODE_BREAK_BEFORE, G_UNICODE_BREAK_BEFORE_AND_AFTER, G_UNICODE_BREAK_HYPHEN, G_UNICODE_BREAK_NON_STARTER, G_UNICODE_BREAK_OPEN_PUNCTUATION, G_UNICODE_BREAK_CLOSE_PUNCTUATION, G_UNICODE_BREAK_QUOTATION, G_UNICODE_BREAK_EXCLAMATION, G_UNICODE_BREAK_IDEOGRAPHIC, G_UNICODE_BREAK_NUMERIC, G_UNICODE_BREAK_INFIX_SEPARATOR, G_UNICODE_BREAK_SYMBOL, G_UNICODE_BREAK_ALPHABETIC, G_UNICODE_BREAK_PREFIX, G_UNICODE_BREAK_POSTFIX, G_UNICODE_BREAK_COMPLEX_CONTEXT, G_UNICODE_BREAK_AMBIGUOUS, G_UNICODE_BREAK_UNKNOWN, G_UNICODE_BREAK_NEXT_LINE, G_UNICODE_BREAK_WORD_JOINER, G_UNICODE_BREAK_HANGUL_L_JAMO, G_UNICODE_BREAK_HANGUL_V_JAMO, G_UNICODE_BREAK_HANGUL_T_JAMO, G_UNICODE_BREAK_HANGUL_LV_SYLLABLE, G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE } GUnicodeBreakType; typedef enum { /* ISO 15924 code */ G_UNICODE_SCRIPT_INVALID_CODE = -1, G_UNICODE_SCRIPT_COMMON = 0, /* Zyyy */ G_UNICODE_SCRIPT_INHERITED, /* Qaai */ G_UNICODE_SCRIPT_ARABIC, /* Arab */ G_UNICODE_SCRIPT_ARMENIAN, /* Armn */ G_UNICODE_SCRIPT_BENGALI, /* Beng */ G_UNICODE_SCRIPT_BOPOMOFO, /* Bopo */ G_UNICODE_SCRIPT_CHEROKEE, /* Cher */ G_UNICODE_SCRIPT_COPTIC, /* Qaac */ G_UNICODE_SCRIPT_CYRILLIC, /* Cyrl (Cyrs) */ G_UNICODE_SCRIPT_DESERET, /* Dsrt */ G_UNICODE_SCRIPT_DEVANAGARI, /* Deva */ G_UNICODE_SCRIPT_ETHIOPIC, /* Ethi */ G_UNICODE_SCRIPT_GEORGIAN, /* Geor (Geon, Geoa) */ G_UNICODE_SCRIPT_GOTHIC, /* Goth */ G_UNICODE_SCRIPT_GREEK, /* Grek */ G_UNICODE_SCRIPT_GUJARATI, /* Gujr */ G_UNICODE_SCRIPT_GURMUKHI, /* Guru */ G_UNICODE_SCRIPT_HAN, /* Hani */ G_UNICODE_SCRIPT_HANGUL, /* Hang */ G_UNICODE_SCRIPT_HEBREW, /* Hebr */ G_UNICODE_SCRIPT_HIRAGANA, /* Hira */ G_UNICODE_SCRIPT_KANNADA, /* Knda */ G_UNICODE_SCRIPT_KATAKANA, /* Kana */ G_UNICODE_SCRIPT_KHMER, /* Khmr */ G_UNICODE_SCRIPT_LAO, /* Laoo */ G_UNICODE_SCRIPT_LATIN, /* Latn (Latf, Latg) */ G_UNICODE_SCRIPT_MALAYALAM, /* Mlym */ G_UNICODE_SCRIPT_MONGOLIAN, /* Mong */ G_UNICODE_SCRIPT_MYANMAR, /* Mymr */ G_UNICODE_SCRIPT_OGHAM, /* Ogam */ G_UNICODE_SCRIPT_OLD_ITALIC, /* Ital */ G_UNICODE_SCRIPT_ORIYA, /* Orya */ G_UNICODE_SCRIPT_RUNIC, /* Runr */ G_UNICODE_SCRIPT_SINHALA, /* Sinh */ G_UNICODE_SCRIPT_SYRIAC, /* Syrc (Syrj, Syrn, Syre) */ G_UNICODE_SCRIPT_TAMIL, /* Taml */ G_UNICODE_SCRIPT_TELUGU, /* Telu */ G_UNICODE_SCRIPT_THAANA, /* Thaa */ G_UNICODE_SCRIPT_THAI, /* Thai */ G_UNICODE_SCRIPT_TIBETAN, /* Tibt */ G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, /* Cans */ G_UNICODE_SCRIPT_YI, /* Yiii */ G_UNICODE_SCRIPT_TAGALOG, /* Tglg */ G_UNICODE_SCRIPT_HANUNOO, /* Hano */ G_UNICODE_SCRIPT_BUHID, /* Buhd */ G_UNICODE_SCRIPT_TAGBANWA, /* Tagb */ /* Unicode-4.0 additions */ G_UNICODE_SCRIPT_BRAILLE, /* Brai */ G_UNICODE_SCRIPT_CYPRIOT, /* Cprt */ G_UNICODE_SCRIPT_LIMBU, /* Limb */ G_UNICODE_SCRIPT_OSMANYA, /* Osma */ G_UNICODE_SCRIPT_SHAVIAN, /* Shaw */ G_UNICODE_SCRIPT_LINEAR_B, /* Linb */ G_UNICODE_SCRIPT_TAI_LE, /* Tale */ G_UNICODE_SCRIPT_UGARITIC, /* Ugar */ /* Unicode-4.1 additions */ G_UNICODE_SCRIPT_NEW_TAI_LUE, /* Talu */ G_UNICODE_SCRIPT_BUGINESE, /* Bugi */ G_UNICODE_SCRIPT_GLAGOLITIC, /* Glag */ G_UNICODE_SCRIPT_TIFINAGH, /* Tfng */ G_UNICODE_SCRIPT_SYLOTI_NAGRI, /* Sylo */ G_UNICODE_SCRIPT_OLD_PERSIAN, /* Xpeo */ G_UNICODE_SCRIPT_KHAROSHTHI, /* Khar */ /* Unicode-5.0 additions */ G_UNICODE_SCRIPT_UNKNOWN, /* Zzzz */ G_UNICODE_SCRIPT_BALINESE, /* Bali */ G_UNICODE_SCRIPT_CUNEIFORM, /* Xsux */ G_UNICODE_SCRIPT_PHOENICIAN, /* Phnx */ G_UNICODE_SCRIPT_PHAGS_PA, /* Phag */ G_UNICODE_SCRIPT_NKO, /* Nkoo */ /* Unicode-5.1 additions */ G_UNICODE_SCRIPT_KAYAH_LI, /* Kali */ G_UNICODE_SCRIPT_LEPCHA, /* Lepc */ G_UNICODE_SCRIPT_REJANG, /* Rjng */ G_UNICODE_SCRIPT_SUNDANESE, /* Sund */ G_UNICODE_SCRIPT_SAURASHTRA, /* Saur */ G_UNICODE_SCRIPT_CHAM, /* Cham */ G_UNICODE_SCRIPT_OL_CHIKI, /* Olck */ G_UNICODE_SCRIPT_VAI, /* Vaii */ G_UNICODE_SCRIPT_CARIAN, /* Cari */ G_UNICODE_SCRIPT_LYCIAN, /* Lyci */ G_UNICODE_SCRIPT_LYDIAN /* Lydi */ } GUnicodeScript; #include "gunichartables.h" gboolean g_unichar_isalnum (gunichar c); gboolean g_unichar_isalpha (gunichar c); gboolean g_unichar_iscntrl (gunichar c); gboolean g_unichar_isdigit (gunichar c); gboolean g_unichar_isgraph (gunichar c); gboolean g_unichar_islower (gunichar c); gboolean g_unichar_isprint (gunichar c); gboolean g_unichar_ispunct (gunichar c); gboolean g_unichar_isspace (gunichar c); gboolean g_unichar_ismark (gunichar c); gboolean g_unichar_isupper (gunichar c); gboolean g_unichar_istitle (gunichar c); gboolean g_unichar_isxdigit (gunichar c); gboolean g_unichar_isdefined (gunichar c); gunichar g_unichar_toupper (gunichar c); gunichar g_unichar_tolower (gunichar c); gunichar g_unichar_totitle (gunichar c); int g_unichar_digit_value (gunichar c); int g_unichar_xdigit_value (gunichar c); GUnicodeType g_unichar_type (gunichar c); LucenePlusPlus-rel_3.0.9/src/demo/000077500000000000000000000000001456444476200170435ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/demo/CMakeLists.txt000066400000000000000000000001301456444476200215750ustar00rootroot00000000000000add_subdirectory(deletefiles) add_subdirectory(indexfiles) add_subdirectory(searchfiles)LucenePlusPlus-rel_3.0.9/src/demo/deletefiles/000077500000000000000000000000001456444476200213305ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/demo/deletefiles/CMakeLists.txt000066400000000000000000000020271456444476200240710ustar00rootroot00000000000000project(deletefiles) #################################### # src #################################### file(GLOB_RECURSE deletefiles_sources "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp") file(GLOB_RECURSE demo_headers "${deletefiles_SOURCE_DIR}/../include/*.h") #################################### # create executable target #################################### add_executable(deletefiles ${deletefiles_sources}) #################################### # include directories #################################### target_include_directories(deletefiles PRIVATE $ $ ${Boost_INCLUDE_DIRS}) #################################### # dependencies #################################### target_link_libraries(deletefiles PRIVATE Boost::boost Boost::date_time Boost::filesystem Boost::iostreams Boost::regex Boost::system Boost::thread ZLIB::ZLIB lucene++::lucene++ lucene++::lucene++-contrib) LucenePlusPlus-rel_3.0.9/src/demo/deletefiles/main.cpp000066400000000000000000000026401456444476200227620ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #ifndef NOMINMAX #define NOMINMAX #endif #include "targetver.h" #include #include "LuceneHeaders.h" using namespace Lucene; /// Deletes documents from an index that do not contain a term. int main(int argc, char* argv[]) { if (argc == 1) { std::wcout << L"Usage: deletefiles.exe \n"; return 1; } try { DirectoryPtr directory = FSDirectory::open(StringUtils::toUnicode(argv[1])); // we don't want read-only because we are about to delete IndexReaderPtr reader = IndexReader::open(directory, false); TermPtr term = newLucene(L"path", StringUtils::toUnicode(argv[2])); int32_t deleted = reader->deleteDocuments(term); std::wcout << L"Deleted " << deleted << L" documents containing " << term->toString() << L"\n"; reader->close(); directory->close(); } catch (LuceneException& e) { std::wcout << L"Exception: " << e.getError() << L"\n"; return 1; } return 0; } LucenePlusPlus-rel_3.0.9/src/demo/deletefiles/msvc/000077500000000000000000000000001456444476200223005ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/demo/deletefiles/msvc/deletefiles.vcproj000066400000000000000000000174031456444476200260170ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.9/src/demo/deletefiles/msvc/deletefiles.vcxproj000066400000000000000000000300711456444476200262030ustar00rootroot00000000000000 Debug DLL Win32 Debug Static Win32 Release DLL Win32 Release Static Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105} deletefiles Win32Proj Application Unicode true Application Unicode Application Unicode true Application Unicode <_ProjectFileVersion>10.0.40219.1 $(SolutionDir)$(Configuration)\ $(Configuration)\ true $(SolutionDir)$(Configuration)\ $(Configuration)\ false $(SolutionDir)$(Configuration)\ $(Configuration)\ true $(SolutionDir)$(Configuration)\ $(Configuration)\ false /Zm180 %(AdditionalOptions) Disabled ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;_DEBUG;_CONSOLE;LPP_HAVE_DLL;%(PreprocessorDefinitions) true Async EnableFastChecks MultiThreadedDebugDLL false Level3 EditAndContinue lucene++.lib;%(AdditionalDependencies) $(BOOST_ROOT)\lib32-msvc-10.0;..\..\..\..\lib;%(AdditionalLibraryDirectories) true Console MachineX86 copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." /Zm180 %(AdditionalOptions) MaxSpeed AnySuitable true Speed true ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) Async MultiThreadedDLL true false Level3 ProgramDatabase lucene++.lib;%(AdditionalDependencies) $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) true Console true true MachineX86 copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." /Zm180 %(AdditionalOptions) Disabled ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true Async EnableFastChecks MultiThreadedDebugDLL false Level3 EditAndContinue lucene++.lib;%(AdditionalDependencies) $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) true Console MachineX86 copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." /Zm180 %(AdditionalOptions) MaxSpeed AnySuitable true Speed true ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;NDEBUG;_CONSOLE;LPP_HAVE_DLL;%(PreprocessorDefinitions) Async MultiThreadedDLL true false Level3 ProgramDatabase lucene++.lib;%(AdditionalDependencies) $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) true Console true true MachineX86 copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." {46a95afd-95fd-4280-b22e-1b56f273144b} false {46a95afd-95fd-4280-b22e-1b56f273144a} false LucenePlusPlus-rel_3.0.9/src/demo/deletefiles/msvc/deletefiles.vcxproj.filters000066400000000000000000000007741456444476200276610ustar00rootroot00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx source files LucenePlusPlus-rel_3.0.9/src/demo/indexfiles/000077500000000000000000000000001456444476200211755ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/demo/indexfiles/CMakeLists.txt000066400000000000000000000020211456444476200237300ustar00rootroot00000000000000project(indexfiles) #################################### # src #################################### file(GLOB_RECURSE indexfiles_sources "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp") file(GLOB_RECURSE demo_headers "${indexfiles_SOURCE_DIR}/../include/*.h") #################################### # create executable target #################################### add_executable(indexfiles ${indexfiles_sources}) #################################### # include directories #################################### target_include_directories(indexfiles PRIVATE $ $ ${Boost_INCLUDE_DIRS}) #################################### # dependencies #################################### target_link_libraries(indexfiles PRIVATE Boost::boost Boost::date_time Boost::filesystem Boost::iostreams Boost::regex Boost::system Boost::thread ZLIB::ZLIB lucene++::lucene++ lucene++::lucene++-contrib) LucenePlusPlus-rel_3.0.9/src/demo/indexfiles/main.cpp000066400000000000000000000102121456444476200226210ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #ifndef NOMINMAX #define NOMINMAX #endif #include "targetver.h" #include #include "LuceneHeaders.h" #include "FileUtils.h" #include "MiscUtils.h" using namespace Lucene; int32_t docNumber = 0; DocumentPtr fileDocument(const String& docFile) { DocumentPtr doc = newLucene(); // Add the path of the file as a field named "path". Use a field that is indexed (ie. searchable), but // don't tokenize the field into words. doc->add(newLucene(L"path", docFile, Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); // Add the last modified date of the file a field named "modified". Use a field that is indexed (ie. searchable), // but don't tokenize the field into words. doc->add(newLucene(L"modified", DateTools::timeToString(FileUtils::fileModified(docFile), DateTools::RESOLUTION_MINUTE), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); // Add the contents of the file to a field named "contents". Specify a Reader, so that the text of the file is // tokenized and indexed, but not stored. Note that FileReader expects the file to be in the system's default // encoding. If that's not the case searching for special characters will fail. doc->add(newLucene(L"contents", newLucene(docFile))); return doc; } void indexDocs(const IndexWriterPtr& writer, const String& sourceDir) { HashSet dirList(HashSet::newInstance()); if (!FileUtils::listDirectory(sourceDir, false, dirList)) { return; } for (HashSet::iterator dirFile = dirList.begin(); dirFile != dirList.end(); ++dirFile) { String docFile(FileUtils::joinPath(sourceDir, *dirFile)); if (FileUtils::isDirectory(docFile)) { indexDocs(writer, docFile); } else { std::wcout << L"Adding [" << ++docNumber << L"]: " << *dirFile << L"\n"; try { writer->addDocument(fileDocument(docFile)); } catch (FileNotFoundException&) { } } } } /// Index all text files under a directory. int main(int argc, char* argv[]) { if (argc != 3) { std::wcout << L"Usage: indexfiles.exe \n"; return 1; } String sourceDir(StringUtils::toUnicode(argv[1])); String indexDir(StringUtils::toUnicode(argv[2])); if (!FileUtils::isDirectory(sourceDir)) { std::wcout << L"Source directory doesn't exist: " << sourceDir << L"\n"; return 1; } if (!FileUtils::isDirectory(indexDir)) { if (!FileUtils::createDirectory(indexDir)) { std::wcout << L"Unable to create directory: " << indexDir << L"\n"; return 1; } } uint64_t beginIndex = MiscUtils::currentTimeMillis(); try { IndexWriterPtr writer = newLucene(FSDirectory::open(indexDir), newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); std::wcout << L"Indexing to directory: " << indexDir << L"...\n"; indexDocs(writer, sourceDir); uint64_t endIndex = MiscUtils::currentTimeMillis(); uint64_t indexDuration = endIndex - beginIndex; std::wcout << L"Index time: " << indexDuration << L" milliseconds\n"; std::wcout << L"Optimizing...\n"; writer->optimize(); uint64_t optimizeDuration = MiscUtils::currentTimeMillis() - endIndex; std::wcout << L"Optimize time: " << optimizeDuration << L" milliseconds\n"; writer->close(); std::wcout << L"Total time: " << indexDuration + optimizeDuration << L" milliseconds\n"; } catch (LuceneException& e) { std::wcout << L"Exception: " << e.getError() << L"\n"; return 1; } return 0; } LucenePlusPlus-rel_3.0.9/src/demo/indexfiles/msvc/000077500000000000000000000000001456444476200221455ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/demo/indexfiles/msvc/indexfiles.vcproj000066400000000000000000000174011456444476200255270ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.9/src/demo/indexfiles/msvc/indexfiles.vcxproj000066400000000000000000000300701456444476200257140ustar00rootroot00000000000000 Debug DLL Win32 Debug Static Win32 Release DLL Win32 Release Static Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103} indexfiles Win32Proj Application Unicode true Application Unicode Application Unicode true Application Unicode <_ProjectFileVersion>10.0.40219.1 $(SolutionDir)$(Configuration)\ $(Configuration)\ true $(SolutionDir)$(Configuration)\ $(Configuration)\ false $(SolutionDir)$(Configuration)\ $(Configuration)\ true $(SolutionDir)$(Configuration)\ $(Configuration)\ false /Zm180 %(AdditionalOptions) Disabled ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;_DEBUG;_CONSOLE;LPP_HAVE_DLL;%(PreprocessorDefinitions) true Async EnableFastChecks MultiThreadedDebugDLL false Level3 EditAndContinue lucene++.lib;%(AdditionalDependencies) $(BOOST_ROOT)\lib32-msvc-10.0;..\..\..\..\lib;%(AdditionalLibraryDirectories) true Console MachineX86 copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." /Zm180 %(AdditionalOptions) MaxSpeed AnySuitable true Speed true ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) Async MultiThreadedDLL true false Level3 ProgramDatabase lucene++.lib;%(AdditionalDependencies) $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) true Console true true MachineX86 copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." /Zm180 %(AdditionalOptions) Disabled ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true Async EnableFastChecks MultiThreadedDebugDLL false Level3 EditAndContinue lucene++.lib;%(AdditionalDependencies) $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) true Console MachineX86 copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." /Zm180 %(AdditionalOptions) MaxSpeed AnySuitable true Speed true ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;NDEBUG;_CONSOLE;LPP_HAVE_DLL;%(PreprocessorDefinitions) Async MultiThreadedDLL true false Level3 ProgramDatabase lucene++.lib;%(AdditionalDependencies) $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) true Console true true MachineX86 copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." {46a95afd-95fd-4280-b22e-1b56f273144b} false {46a95afd-95fd-4280-b22e-1b56f273144a} false LucenePlusPlus-rel_3.0.9/src/demo/indexfiles/msvc/indexfiles.vcxproj.filters000066400000000000000000000007741456444476200273730ustar00rootroot00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx source files LucenePlusPlus-rel_3.0.9/src/demo/searchfiles/000077500000000000000000000000001456444476200213335ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/demo/searchfiles/CMakeLists.txt000066400000000000000000000020271456444476200240740ustar00rootroot00000000000000project(searchfiles) #################################### # src #################################### file(GLOB_RECURSE searchfiles_sources "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp") file(GLOB_RECURSE demo_headers "${searchfiles_SOURCE_DIR}/../include/*.h") #################################### # create executable target #################################### add_executable(searchfiles ${searchfiles_sources}) #################################### # include directories #################################### target_include_directories(searchfiles PRIVATE $ $ ${Boost_INCLUDE_DIRS}) #################################### # dependencies #################################### target_link_libraries(searchfiles PRIVATE Boost::boost Boost::date_time Boost::filesystem Boost::iostreams Boost::regex Boost::system Boost::thread ZLIB::ZLIB lucene++::lucene++ lucene++::lucene++-contrib) LucenePlusPlus-rel_3.0.9/src/demo/searchfiles/main.cpp000066400000000000000000000250141456444476200227650ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #ifndef NOMINMAX #define NOMINMAX #endif #include "targetver.h" #include #include #include "LuceneHeaders.h" #include "FilterIndexReader.h" #include "MiscUtils.h" using namespace Lucene; /// Use the norms from one field for all fields. Norms are read into memory, using a byte of memory /// per document per searched field. This can cause search of large collections with a large number /// of fields to run out of memory. If all of the fields contain only a single token, then the norms /// are all identical, then single norm vector may be shared. class OneNormsReader : public FilterIndexReader { public: OneNormsReader(const IndexReaderPtr& in, const String& field) : FilterIndexReader(in) { this->field = field; } virtual ~OneNormsReader() { } protected: String field; public: virtual ByteArray norms(const String& field) { return in->norms(this->field); } }; /// This demonstrates a typical paging search scenario, where the search engine presents pages of size n /// to the user. The user can then go to the next page if interested in the next hits. /// /// When the query is executed for the first time, then only enough results are collected to fill 5 result /// pages. If the user wants to page beyond this limit, then the query is executed another time and all /// hits are collected. static void doPagingSearch(const SearcherPtr& searcher, const QueryPtr& query, int32_t hitsPerPage, bool raw, bool interactive) { // Collect enough docs to show 5 pages TopScoreDocCollectorPtr collector = TopScoreDocCollector::create(5 * hitsPerPage, false); searcher->search(query, collector); Collection hits = collector->topDocs()->scoreDocs; int32_t numTotalHits = collector->getTotalHits(); std::wcout << numTotalHits << L" total matching documents\n"; int32_t start = 0; int32_t end = std::min(numTotalHits, hitsPerPage); while (true) { if (end > hits.size()) { std::wcout << L"Only results 1 - " << hits.size() << L" of " << numTotalHits << L" total matching documents collected.\n"; std::wcout << L"Collect more (y/n) ?"; String line; std::wcin >> line; boost::trim(line); if (line.empty() || boost::starts_with(line, L"n")) { break; } collector = TopScoreDocCollector::create(numTotalHits, false); searcher->search(query, collector); hits = collector->topDocs()->scoreDocs; } end = std::min(hits.size(), start + hitsPerPage); for (int32_t i = start; i < end; ++i) { if (raw) { // output raw format std::wcout << L"doc=" << hits[i]->doc << L" score=" << hits[i]->score << L"\n"; continue; } DocumentPtr doc = searcher->doc(hits[i]->doc); String path = doc->get(L"path"); if (!path.empty()) { std::wcout << StringUtils::toString(i + 1) + L". " << path << L"\n"; String title = doc->get(L"title"); if (!title.empty()) { std::wcout << L" Title: " << doc->get(L"title") << L"\n"; } } else { std::wcout << StringUtils::toString(i + 1) + L". No path for this document\n"; } } if (!interactive) { break; } if (numTotalHits >= end) { bool quit = false; while (true) { std::wcout << L"Press "; if (start - hitsPerPage >= 0) { std::wcout << L"(p)revious page, "; } if (start + hitsPerPage < numTotalHits) { std::wcout << L"(n)ext page, "; } std::wcout << L"(q)uit or enter number to jump to a page: "; String line; std::wcin >> line; boost::trim(line); if (line.empty() || boost::starts_with(line, L"q")) { quit = true; break; } if (boost::starts_with(line, L"p")) { start = std::max((int32_t)0, start - hitsPerPage); break; } else if (boost::starts_with(line, L"n")) { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { int32_t page = 0; try { page = StringUtils::toInt(line); } catch (NumberFormatException&) { } if ((page - 1) * hitsPerPage < numTotalHits) { start = std::max((int32_t)0, (page - 1) * hitsPerPage); break; } else { std::wcout << L"No such page\n"; } } } if (quit) { break; } end = std::min(numTotalHits, start + hitsPerPage); } } } class StreamingHitCollector : public Collector { public: StreamingHitCollector() { docBase = 0; } virtual ~StreamingHitCollector() { } protected: ScorerPtr scorer; int32_t docBase; public: /// simply print docId and score of every matching document virtual void collect(int32_t doc) { std::wcout << L"doc=" << (doc + docBase) << L" score=" << scorer->score(); } virtual bool acceptsDocsOutOfOrder() { return true; } virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase) { this->docBase = docBase; } virtual void setScorer(const ScorerPtr& scorer) { this->scorer = scorer; } }; /// This method uses a custom HitCollector implementation which simply prints out the docId and score of /// every matching document. /// /// This simulates the streaming search use case, where all hits are supposed to be processed, regardless /// of their relevance. static void doStreamingSearch(const SearcherPtr& searcher, const QueryPtr& query) { searcher->search(query, newLucene()); } /// Simple command-line based search demo. int main(int argc, char* argv[]) { if (argc == 1 || strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "-help") == 0) { std::wcout << L"Usage: searchfiles.exe [-index dir] [-field f] [-repeat n] [-queries file] [-raw] "; std::wcout << L"[-norms field] [-paging hitsPerPage]\n\n"; std::wcout << L"Specify 'false' for hitsPerPage to use streaming instead of paging search.\n"; return 1; } try { String index = L"index"; String field = L"contents"; String queries; int32_t repeat = 0; bool raw = false; String normsField; bool paging = true; int32_t hitsPerPage = 10; for (int32_t i = 0; i < argc; ++i) { if (strcmp(argv[i], "-index") == 0) { index = StringUtils::toUnicode(argv[i + 1]); ++i; } else if (strcmp(argv[i], "-field") == 0) { field = StringUtils::toUnicode(argv[i + 1]); ++i; } else if (strcmp(argv[i], "-queries") == 0) { queries = StringUtils::toUnicode(argv[i + 1]); ++i; } else if (strcmp(argv[i], "-repeat") == 0) { repeat = StringUtils::toInt(StringUtils::toUnicode(argv[i + 1])); ++i; } else if (strcmp(argv[i], "-raw") == 0) { raw = true; } else if (strcmp(argv[i], "-norms") == 0) { normsField = StringUtils::toUnicode(argv[i + 1]); ++i; } else if (strcmp(argv[i], "-paging") == 0) { if (strcmp(argv[i + 1], "false") == 0) { paging = false; } else { hitsPerPage = StringUtils::toInt(StringUtils::toUnicode(argv[i + 1])); if (hitsPerPage == 0) { paging = false; } } ++i; } } // only searching, so read-only=true IndexReaderPtr reader = IndexReader::open(FSDirectory::open(index), true); if (!normsField.empty()) { reader = newLucene(reader, normsField); } SearcherPtr searcher = newLucene(reader); AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); QueryParserPtr parser = newLucene(LuceneVersion::LUCENE_CURRENT, field, analyzer); ReaderPtr in; if (!queries.empty()) { in = newLucene(queries); } while (true) { String line; if (!queries.empty()) { wchar_t c = in->read(); while (c != L'\n' && c != L'\r' && c != Reader::READER_EOF) { line += c; c = in->read(); } } else { std::wcout << L"Enter query: "; getline(std::wcin, line); } boost::trim(line); if (line.empty()) { break; } QueryPtr query = parser->parse(line); std::wcout << L"Searching for: " << query->toString(field) << L"\n"; if (repeat > 0) { // repeat and time as benchmark int64_t start = MiscUtils::currentTimeMillis(); for (int32_t i = 0; i < repeat; ++i) { searcher->search(query, FilterPtr(), 100); } std::wcout << L"Time: " << (MiscUtils::currentTimeMillis() - start) << L"ms\n"; } if (paging) { doPagingSearch(searcher, query, hitsPerPage, raw, queries.empty()); } else { doStreamingSearch(searcher, query); } } reader->close(); } catch (LuceneException& e) { std::wcout << L"Exception: " << e.getError() << L"\n"; return 1; } return 0; } LucenePlusPlus-rel_3.0.9/src/demo/searchfiles/msvc/000077500000000000000000000000001456444476200223035ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/demo/searchfiles/msvc/searchfiles.vcproj000066400000000000000000000174031456444476200260250ustar00rootroot00000000000000 LucenePlusPlus-rel_3.0.9/src/demo/searchfiles/msvc/searchfiles.vcxproj000066400000000000000000000300711456444476200262110ustar00rootroot00000000000000 Debug DLL Win32 Debug Static Win32 Release DLL Win32 Release Static Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104} searchfiles Win32Proj Application Unicode true Application Unicode Application Unicode true Application Unicode <_ProjectFileVersion>10.0.40219.1 $(SolutionDir)$(Configuration)\ $(Configuration)\ true $(SolutionDir)$(Configuration)\ $(Configuration)\ false $(SolutionDir)$(Configuration)\ $(Configuration)\ true $(SolutionDir)$(Configuration)\ $(Configuration)\ false /Zm180 %(AdditionalOptions) Disabled ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;_DEBUG;_CONSOLE;LPP_HAVE_DLL;%(PreprocessorDefinitions) true Async EnableFastChecks MultiThreadedDebugDLL false Level3 EditAndContinue lucene++.lib;%(AdditionalDependencies) $(BOOST_ROOT)\lib32-msvc-10.0;..\..\..\..\lib;%(AdditionalLibraryDirectories) true Console MachineX86 copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." /Zm180 %(AdditionalOptions) MaxSpeed AnySuitable true Speed true ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) Async MultiThreadedDLL true false Level3 ProgramDatabase lucene++.lib;%(AdditionalDependencies) $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) true Console true true MachineX86 copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." /Zm180 %(AdditionalOptions) Disabled ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true Async EnableFastChecks MultiThreadedDebugDLL false Level3 EditAndContinue lucene++.lib;%(AdditionalDependencies) $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) true Console MachineX86 copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." /Zm180 %(AdditionalOptions) MaxSpeed AnySuitable true Speed true ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) WIN32;NDEBUG;_CONSOLE;LPP_HAVE_DLL;%(PreprocessorDefinitions) Async MultiThreadedDLL true false Level3 ProgramDatabase lucene++.lib;%(AdditionalDependencies) $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) true Console true true MachineX86 copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." {46a95afd-95fd-4280-b22e-1b56f273144b} false {46a95afd-95fd-4280-b22e-1b56f273144a} false LucenePlusPlus-rel_3.0.9/src/demo/searchfiles/msvc/searchfiles.vcxproj.filters000066400000000000000000000007741456444476200276670ustar00rootroot00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx source files LucenePlusPlus-rel_3.0.9/src/msvc/000077500000000000000000000000001456444476200170675ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/msvc/lucene++.sln000066400000000000000000000137261456444476200212170ustar00rootroot00000000000000 Microsoft Visual Studio Solution File, Format Version 11.00 # Visual Studio 2010 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "demos", "demos", "{E9344A66-4CC8-4E5B-83BC-8061E8962B46}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lucene_tester", "..\test\msvc\lucene_tester.vcxproj", "{6D684870-1124-49E1-8F96-7DE7B6114BEA}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "indexfiles", "..\demo\indexfiles\msvc\indexfiles.vcxproj", "{688A6720-739F-4EA3-AC5B-AA67A0965103}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "deletefiles", "..\demo\deletefiles\msvc\deletefiles.vcxproj", "{688A6720-739F-4EA3-AC5B-AA67A0965105}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "searchfiles", "..\demo\searchfiles\msvc\searchfiles.vcxproj", "{688A6720-739F-4EA3-AC5B-AA67A0965104}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lucene_contrib", "..\contrib\msvc\lucene_contrib.vcxproj", "{46A95AFD-95FD-4280-B22E-1B56F273144B}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lucene++", "..\core\msvc\lucene++.vcxproj", "{46A95AFD-95FD-4280-B22E-1B56F273144A}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug DLL|Win32 = Debug DLL|Win32 Debug Static|Win32 = Debug Static|Win32 Release DLL|Win32 = Release DLL|Win32 Release Static|Win32 = Release Static|Win32 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Debug Static|Win32.Build.0 = Debug Static|Win32 {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Release Static|Win32.ActiveCfg = Release Static|Win32 {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Release Static|Win32.Build.0 = Release Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103}.Debug Static|Win32.Build.0 = Debug Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103}.Release DLL|Win32.Build.0 = Release DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103}.Release Static|Win32.ActiveCfg = Release Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965103}.Release Static|Win32.Build.0 = Release Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105}.Debug Static|Win32.Build.0 = Debug Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105}.Release DLL|Win32.Build.0 = Release DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105}.Release Static|Win32.ActiveCfg = Release Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965105}.Release Static|Win32.Build.0 = Release Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104}.Debug Static|Win32.Build.0 = Debug Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104}.Release DLL|Win32.Build.0 = Release DLL|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104}.Release Static|Win32.ActiveCfg = Release Static|Win32 {688A6720-739F-4EA3-AC5B-AA67A0965104}.Release Static|Win32.Build.0 = Release Static|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B}.Debug Static|Win32.Build.0 = Debug Static|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B}.Release DLL|Win32.Build.0 = Release DLL|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B}.Release Static|Win32.ActiveCfg = Release Static|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144B}.Release Static|Win32.Build.0 = Release Static|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A}.Debug Static|Win32.Build.0 = Debug Static|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A}.Release DLL|Win32.Build.0 = Release DLL|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A}.Release Static|Win32.ActiveCfg = Release Static|Win32 {46A95AFD-95FD-4280-B22E-1B56F273144A}.Release Static|Win32.Build.0 = Release Static|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection GlobalSection(NestedProjects) = preSolution {688A6720-739F-4EA3-AC5B-AA67A0965103} = {E9344A66-4CC8-4E5B-83BC-8061E8962B46} {688A6720-739F-4EA3-AC5B-AA67A0965105} = {E9344A66-4CC8-4E5B-83BC-8061E8962B46} {688A6720-739F-4EA3-AC5B-AA67A0965104} = {E9344A66-4CC8-4E5B-83BC-8061E8962B46} EndGlobalSection EndGlobal LucenePlusPlus-rel_3.0.9/src/test/000077500000000000000000000000001456444476200170765ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/CMakeLists.txt000066400000000000000000000034171456444476200216430ustar00rootroot00000000000000project(tester) #################################### # configure GTest #################################### if(MSVC) set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) endif() add_subdirectory(gtest) #################################### # src #################################### file(GLOB_RECURSE tester_sources "analysis/*.cpp" "contrib/*.cpp" "document/*.cpp" "index/*.cpp" "main/*.cpp" "queryparser/*.cpp" "search/*.cpp" "store/*.cpp" "util/*.cpp") file(GLOB_RECURSE test_headers "${lucene++-tester_SOURCE_DIR}/include/*.h") #################################### # create test bin target #################################### add_executable(lucene++-tester ${tester_sources}) #################################### # include directories #################################### target_include_directories(lucene++-tester PUBLIC $ $ $ $ $ $ $) #################################### # dependencies #################################### target_link_libraries(lucene++-tester PRIVATE Boost::boost Boost::date_time Boost::filesystem Boost::iostreams Boost::regex Boost::system Boost::thread ZLIB::ZLIB gtest_main gtest lucene++::lucene++ lucene++::lucene++-contrib) #################################### # link args #################################### target_compile_options(lucene++-tester PRIVATE -DLPP_EXPOSE_INTERNAL) cotire(lucene++-tester) LucenePlusPlus-rel_3.0.9/src/test/analysis/000077500000000000000000000000001456444476200207215ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/analysis/AnalyzersTest.cpp000066400000000000000000000077211456444476200242440ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "SimpleAnalyzer.h" #include "WhitespaceAnalyzer.h" #include "StopAnalyzer.h" #include "TokenFilter.h" #include "WhitespaceTokenizer.h" #include "StringReader.h" #include "PayloadAttribute.h" #include "Payload.h" using namespace Lucene; typedef BaseTokenStreamFixture AnalyzersTest; static void verifyPayload(const TokenStreamPtr& ts) { PayloadAttributePtr payloadAtt = ts->getAttribute(); for (uint8_t b = 1; ; ++b) { bool hasNext = ts->incrementToken(); if (!hasNext) { break; } EXPECT_EQ(b, payloadAtt->getPayload()->toByteArray()[0]); } } TEST_F(AnalyzersTest, testSimple) { AnalyzerPtr a = newLucene(); checkAnalyzesTo(a, L"foo bar FOO BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); checkAnalyzesTo(a, L"foo bar . FOO <> BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); checkAnalyzesTo(a, L"foo.bar.FOO.BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); checkAnalyzesTo(a, L"U.S.A.", newCollection(L"u", L"s", L"a")); checkAnalyzesTo(a, L"C++", newCollection(L"c")); checkAnalyzesTo(a, L"B2B", newCollection(L"b", L"b")); checkAnalyzesTo(a, L"2B", newCollection(L"b")); checkAnalyzesTo(a, L"\"QUOTED\" word", newCollection(L"quoted", L"word")); } TEST_F(AnalyzersTest, testNull) { AnalyzerPtr a = newLucene(); checkAnalyzesTo(a, L"foo bar FOO BAR", newCollection(L"foo", L"bar", L"FOO", L"BAR")); checkAnalyzesTo(a, L"foo bar . FOO <> BAR", newCollection(L"foo", L"bar", L".", L"FOO", L"<>", L"BAR")); checkAnalyzesTo(a, L"foo.bar.FOO.BAR", newCollection(L"foo.bar.FOO.BAR")); checkAnalyzesTo(a, L"U.S.A.", newCollection(L"U.S.A.")); checkAnalyzesTo(a, L"C++", newCollection(L"C++")); checkAnalyzesTo(a, L"B2B", newCollection(L"B2B")); checkAnalyzesTo(a, L"2B", newCollection(L"2B")); checkAnalyzesTo(a, L"\"QUOTED\" word", newCollection(L"\"QUOTED\"", L"word")); } TEST_F(AnalyzersTest, testStop) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(a, L"foo bar FOO BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); checkAnalyzesTo(a, L"foo a bar such FOO THESE BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); } namespace TestPayloadCopy { DECLARE_SHARED_PTR(PayloadSetter) class PayloadSetter : public TokenFilter { public: PayloadSetter(const TokenStreamPtr& input) : TokenFilter(input) { payloadAtt = addAttribute(); data = ByteArray::newInstance(1); data[0] = 0; p = newLucene(data, 0, 1); } virtual ~PayloadSetter() { } public: PayloadAttributePtr payloadAtt; ByteArray data; PayloadPtr p; public: virtual bool incrementToken() { bool hasNext = input->incrementToken(); if (!hasNext) { return false; } payloadAtt->setPayload(p); // reuse the payload / byte[] data[0]++; return true; } }; } /// Make sure old style next() calls result in a new copy of payloads TEST_F(AnalyzersTest, testPayloadCopy) { String s = L"how now brown cow"; TokenStreamPtr ts = newLucene(newLucene(s)); ts = newLucene(ts); verifyPayload(ts); ts = newLucene(newLucene(s)); ts = newLucene(ts); verifyPayload(ts); } LucenePlusPlus-rel_3.0.9/src/test/analysis/BaseTokenStreamFixture.cpp000066400000000000000000000253121456444476200260260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "TokenStream.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "TypeAttribute.h" #include "PositionIncrementAttribute.h" #include "Analyzer.h" #include "StringReader.h" namespace Lucene { CheckClearAttributesAttribute::CheckClearAttributesAttribute() { clearCalled = false; } CheckClearAttributesAttribute::~CheckClearAttributesAttribute() { } bool CheckClearAttributesAttribute::getAndResetClearCalled() { bool _clearCalled = clearCalled; clearCalled = false; return _clearCalled; } void CheckClearAttributesAttribute::clear() { clearCalled = true; } bool CheckClearAttributesAttribute::equals(const LuceneObjectPtr& other) { if (Attribute::equals(other)) { return true; } CheckClearAttributesAttributePtr otherAttribute(boost::dynamic_pointer_cast(other)); if (otherAttribute) { return (otherAttribute->clearCalled == clearCalled); } return false; } int32_t CheckClearAttributesAttribute::hashCode() { return 76137213 ^ (clearCalled ? 1231 : 1237); } void CheckClearAttributesAttribute::copyTo(const AttributePtr& target) { CheckClearAttributesAttributePtr clearAttribute(boost::dynamic_pointer_cast(target)); clearAttribute->clear(); } LuceneObjectPtr CheckClearAttributesAttribute::clone(const LuceneObjectPtr& other) { LuceneObjectPtr clone = other ? other : newLucene(); CheckClearAttributesAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); cloneAttribute->clearCalled = clearCalled; return cloneAttribute; } BaseTokenStreamFixture::~BaseTokenStreamFixture() { } void BaseTokenStreamFixture::checkTokenStreamContents(const TokenStreamPtr& ts, Collection output, Collection startOffsets, Collection endOffsets, Collection types, Collection posIncrements, int32_t finalOffset) { EXPECT_TRUE(output); CheckClearAttributesAttributePtr checkClearAtt = ts->addAttribute(); EXPECT_TRUE(ts->hasAttribute()); TermAttributePtr termAtt = ts->getAttribute(); OffsetAttributePtr offsetAtt; if (startOffsets || endOffsets || finalOffset != -1) { EXPECT_TRUE(ts->hasAttribute()); offsetAtt = ts->getAttribute(); } TypeAttributePtr typeAtt; if (types) { EXPECT_TRUE(ts->hasAttribute()); typeAtt = ts->getAttribute(); } PositionIncrementAttributePtr posIncrAtt; if (posIncrements) { EXPECT_TRUE(ts->hasAttribute()); posIncrAtt = ts->getAttribute(); } ts->reset(); for (int32_t i = 0; i < output.size(); ++i) { // extra safety to enforce, that the state is not preserved and also assign bogus values ts->clearAttributes(); termAtt->setTermBuffer(L"bogusTerm"); if (offsetAtt) { offsetAtt->setOffset(14584724, 24683243); } if (typeAtt) { typeAtt->setType(L"bogusType"); } if (posIncrAtt) { posIncrAtt->setPositionIncrement(45987657); } checkClearAtt->getAndResetClearCalled(); // reset it, because we called clearAttribute() before EXPECT_TRUE(ts->incrementToken()); EXPECT_TRUE(checkClearAtt->getAndResetClearCalled()); EXPECT_EQ(output[i], termAtt->term()); if (startOffsets) { EXPECT_EQ(startOffsets[i], offsetAtt->startOffset()); } if (endOffsets) { EXPECT_EQ(endOffsets[i], offsetAtt->endOffset()); } if (types) { EXPECT_EQ(types[i], typeAtt->type()); } if (posIncrements) { EXPECT_EQ(posIncrements[i], posIncrAtt->getPositionIncrement()); } } EXPECT_TRUE(!ts->incrementToken()); ts->end(); if (finalOffset != -1) { EXPECT_EQ(finalOffset, offsetAtt->endOffset()); } ts->close(); } void BaseTokenStreamFixture::checkTokenStreamContents(const TokenStreamPtr& ts, Collection output) { checkTokenStreamContents(ts, output, Collection(), Collection(), Collection(), Collection(), -1); } void BaseTokenStreamFixture::checkTokenStreamContents(const TokenStreamPtr& ts, Collection output, Collection types) { checkTokenStreamContents(ts, output, Collection(), Collection(), types, Collection(), -1); } void BaseTokenStreamFixture::checkTokenStreamContents(const TokenStreamPtr& ts, Collection output, Collection posIncrements) { checkTokenStreamContents(ts, output, Collection(), Collection(), Collection(), posIncrements, -1); } void BaseTokenStreamFixture::checkTokenStreamContents(const TokenStreamPtr& ts, Collection output, Collection startOffsets, Collection endOffsets) { checkTokenStreamContents(ts, output, startOffsets, endOffsets, Collection(), Collection(), -1); } void BaseTokenStreamFixture::checkTokenStreamContents(const TokenStreamPtr& ts, Collection output, Collection startOffsets, Collection endOffsets, int32_t finalOffset) { checkTokenStreamContents(ts, output, startOffsets, endOffsets, Collection(), Collection(), finalOffset); } void BaseTokenStreamFixture::checkTokenStreamContents(const TokenStreamPtr& ts, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements) { checkTokenStreamContents(ts, output, startOffsets, endOffsets, Collection(), posIncrements, -1); } void BaseTokenStreamFixture::checkTokenStreamContents(const TokenStreamPtr& ts, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements, int32_t finalOffset) { checkTokenStreamContents(ts, output, startOffsets, endOffsets, Collection(), posIncrements, finalOffset); } void BaseTokenStreamFixture::checkAnalyzesTo(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets, Collection types, Collection posIncrements) { checkTokenStreamContents(analyzer->tokenStream(L"dummy", newLucene(input)), output, startOffsets, endOffsets, types, posIncrements, (int32_t)input.length()); } void BaseTokenStreamFixture::checkAnalyzesTo(const AnalyzerPtr& analyzer, const String& input, Collection output) { checkAnalyzesTo(analyzer, input, output, Collection(), Collection(), Collection(), Collection()); } void BaseTokenStreamFixture::checkAnalyzesTo(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection types) { checkAnalyzesTo(analyzer, input, output, Collection(), Collection(), types, Collection()); } void BaseTokenStreamFixture::checkAnalyzesTo(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection posIncrements) { checkAnalyzesTo(analyzer, input, output, Collection(), Collection(), Collection(), posIncrements); } void BaseTokenStreamFixture::checkAnalyzesTo(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets) { checkAnalyzesTo(analyzer, input, output, startOffsets, endOffsets, Collection(), Collection()); } void BaseTokenStreamFixture::checkAnalyzesTo(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements) { checkAnalyzesTo(analyzer, input, output, startOffsets, endOffsets, Collection(), posIncrements); } void BaseTokenStreamFixture::checkAnalyzesToReuse(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets, Collection types, Collection posIncrements) { checkTokenStreamContents(analyzer->reusableTokenStream(L"dummy", newLucene(input)), output, startOffsets, endOffsets, types, posIncrements, (int32_t)input.length()); } void BaseTokenStreamFixture::checkAnalyzesToReuse(const AnalyzerPtr& analyzer, const String& input, Collection output) { checkAnalyzesToReuse(analyzer, input, output, Collection(), Collection(), Collection(), Collection()); } void BaseTokenStreamFixture::checkAnalyzesToReuse(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection types) { checkAnalyzesToReuse(analyzer, input, output, Collection(), Collection(), types, Collection()); } void BaseTokenStreamFixture::checkAnalyzesToReuse(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection posIncrements) { checkAnalyzesToReuse(analyzer, input, output, Collection(), Collection(), Collection(), posIncrements); } void BaseTokenStreamFixture::checkAnalyzesToReuse(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets) { checkAnalyzesToReuse(analyzer, input, output, startOffsets, endOffsets, Collection(), Collection()); } void BaseTokenStreamFixture::checkAnalyzesToReuse(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements) { checkAnalyzesToReuse(analyzer, input, output, startOffsets, endOffsets, Collection(), posIncrements); } void BaseTokenStreamFixture::checkOneTerm(const AnalyzerPtr& analyzer, const String& input, const String& expected) { checkAnalyzesTo(analyzer, input, newCollection(expected)); } void BaseTokenStreamFixture::checkOneTermReuse(const AnalyzerPtr& analyzer, const String& input, const String& expected) { checkAnalyzesToReuse(analyzer, input, newCollection(expected)); } } LucenePlusPlus-rel_3.0.9/src/test/analysis/CachingTokenFilterTest.cpp000066400000000000000000000070101456444476200257660ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "TokenStream.h" #include "TermAttribute.h" #include "OffsetAttribute.h" #include "CachingTokenFilter.h" #include "IndexReader.h" #include "TermPositions.h" #include "Term.h" using namespace Lucene; typedef BaseTokenStreamFixture CachingTokenFilterTest; static Collection tokens = newCollection(L"term1", L"term2", L"term3", L"term2"); static void checkTokens(const TokenStreamPtr& stream) { int32_t count = 0; TermAttributePtr termAtt = stream->getAttribute(); EXPECT_TRUE(termAtt); while (stream->incrementToken()) { EXPECT_TRUE(count < tokens.size()); EXPECT_EQ(tokens[count], termAtt->term()); ++count; } EXPECT_EQ(tokens.size(), count); } namespace TestCaching { class TestableTokenStream : public TokenStream { public: TestableTokenStream() { index = 0; termAtt = addAttribute(); offsetAtt = addAttribute(); } virtual ~TestableTokenStream() { } protected: int32_t index; TermAttributePtr termAtt; OffsetAttributePtr offsetAtt; public: virtual bool incrementToken() { if (index == tokens.size()) { return false; } else { clearAttributes(); termAtt->setTermBuffer(tokens[index++]); offsetAtt->setOffset(0, 0); return true; } } }; } TEST_F(CachingTokenFilterTest, testCaching) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); TokenStreamPtr stream = newLucene(newLucene()); doc->add(newLucene(L"preanalyzed", stream, Field::TERM_VECTOR_NO)); // 1) we consume all tokens twice before we add the doc to the index checkTokens(stream); stream->reset(); checkTokens(stream); // 2) now add the document to the index and verify if all tokens are indexed don't reset the stream here, the // DocumentWriter should do that implicitly writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); TermPositionsPtr termPositions = reader->termPositions(newLucene(L"preanalyzed", L"term1")); EXPECT_TRUE(termPositions->next()); EXPECT_EQ(1, termPositions->freq()); EXPECT_EQ(0, termPositions->nextPosition()); termPositions->seek(newLucene(L"preanalyzed", L"term2")); EXPECT_TRUE(termPositions->next()); EXPECT_EQ(2, termPositions->freq()); EXPECT_EQ(1, termPositions->nextPosition()); EXPECT_EQ(3, termPositions->nextPosition()); termPositions->seek(newLucene(L"preanalyzed", L"term3")); EXPECT_TRUE(termPositions->next()); EXPECT_EQ(1, termPositions->freq()); EXPECT_EQ(2, termPositions->nextPosition()); reader->close(); // 3) reset stream and consume tokens again stream->reset(); checkTokens(stream); } LucenePlusPlus-rel_3.0.9/src/test/analysis/CharFilterTest.cpp000066400000000000000000000034151456444476200243130ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "CharFilter.h" #include "StringReader.h" #include "CharReader.h" using namespace Lucene; typedef LuceneTestFixture CharFilterTest; class CharFilter1 : public CharFilter { public: CharFilter1(const CharStreamPtr& in) : CharFilter(in) { } virtual ~CharFilter1() { } protected: virtual int32_t correct(int32_t currentOff) { return currentOff + 1; } }; class CharFilter2 : public CharFilter { public: CharFilter2(const CharStreamPtr& in) : CharFilter(in) { } virtual ~CharFilter2() { } protected: virtual int32_t correct(int32_t currentOff) { return currentOff + 2; } }; TEST_F(CharFilterTest, testCharFilter1) { CharStreamPtr cs = newLucene(CharReader::get(newLucene(L""))); EXPECT_EQ(1, cs->correctOffset(0)); } TEST_F(CharFilterTest, testCharFilter2) { CharStreamPtr cs = newLucene(CharReader::get(newLucene(L""))); EXPECT_EQ(2, cs->correctOffset(0)); } TEST_F(CharFilterTest, testCharFilter12) { CharStreamPtr cs = newLucene(newLucene(CharReader::get(newLucene(L"")))); EXPECT_EQ(3, cs->correctOffset(0)); } TEST_F(CharFilterTest, testCharFilter11) { CharStreamPtr cs = newLucene(newLucene(CharReader::get(newLucene(L"")))); EXPECT_EQ(2, cs->correctOffset(0)); } LucenePlusPlus-rel_3.0.9/src/test/analysis/KeywordAnalyzerTest.cpp000066400000000000000000000066311456444476200254250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "RAMDirectory.h" #include "IndexSearcher.h" #include "IndexWriter.h" #include "SimpleAnalyzer.h" #include "Document.h" #include "Field.h" #include "PerFieldAnalyzerWrapper.h" #include "KeywordAnalyzer.h" #include "QueryParser.h" #include "Query.h" #include "ScoreDoc.h" #include "TopDocs.h" #include "TermDocs.h" #include "Term.h" #include "TokenStream.h" #include "OffsetAttribute.h" #include "IndexReader.h" #include "StringReader.h" using namespace Lucene; class KeywordAnalyzerTest : public BaseTokenStreamFixture { public: KeywordAnalyzerTest() { directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"partnum", L"Q36", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"description", L"Illidium Space Modulator", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->close(); searcher = newLucene(directory, true); } virtual ~KeywordAnalyzerTest() { } protected: RAMDirectoryPtr directory; IndexSearcherPtr searcher; }; TEST_F(KeywordAnalyzerTest, testPerFieldAnalyzer) { PerFieldAnalyzerWrapperPtr analyzer = newLucene(newLucene()); analyzer->addAnalyzer(L"partnum", newLucene()); QueryParserPtr queryParser = newLucene(LuceneVersion::LUCENE_CURRENT, L"description", analyzer); QueryPtr query = queryParser->parse(L"partnum:Q36 AND SPACE"); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; EXPECT_EQ(L"+partnum:Q36 +space", query->toString(L"description")); EXPECT_EQ(1, hits.size()); } TEST_F(KeywordAnalyzerTest, testMutipleDocument) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); doc->add(newLucene(L"partnum", L"Q36", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); doc = newLucene(); doc->add(newLucene(L"partnum", L"Q37", Field::STORE_YES, Field::INDEX_ANALYZED)); writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); TermDocsPtr td = reader->termDocs(newLucene(L"partnum", L"Q36")); EXPECT_TRUE(td->next()); td = reader->termDocs(newLucene(L"partnum", L"Q37")); EXPECT_TRUE(td->next()); } TEST_F(KeywordAnalyzerTest, testOffsets) { TokenStreamPtr stream = newLucene()->tokenStream(L"field", newLucene(L"abcd")); OffsetAttributePtr offsetAtt = stream->addAttribute(); EXPECT_TRUE(stream->incrementToken()); EXPECT_EQ(0, offsetAtt->startOffset()); EXPECT_EQ(4, offsetAtt->endOffset()); } LucenePlusPlus-rel_3.0.9/src/test/analysis/LengthFilterTest.cpp000066400000000000000000000023061456444476200246550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "WhitespaceTokenizer.h" #include "TokenStream.h" #include "StringReader.h" #include "LengthFilter.h" #include "TermAttribute.h" using namespace Lucene; typedef BaseTokenStreamFixture LengthFilterTest; TEST_F(LengthFilterTest, testFilter) { TokenStreamPtr stream = newLucene(newLucene(L"short toolong evenmuchlongertext a ab toolong foo")); LengthFilterPtr filter = newLucene(stream, 2, 6); TermAttributePtr termAtt = filter->getAttribute(); EXPECT_TRUE(filter->incrementToken()); EXPECT_EQ(L"short", termAtt->term()); EXPECT_TRUE(filter->incrementToken()); EXPECT_EQ(L"ab", termAtt->term()); EXPECT_TRUE(filter->incrementToken()); EXPECT_EQ(L"foo", termAtt->term()); EXPECT_TRUE(!filter->incrementToken()); } LucenePlusPlus-rel_3.0.9/src/test/analysis/MappingCharFilterTest.cpp000066400000000000000000000134751456444476200256360ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "NormalizeCharMap.h" #include "CharStream.h" #include "MappingCharFilter.h" #include "StringReader.h" #include "WhitespaceTokenizer.h" #include "CharReader.h" using namespace Lucene; class MappingCharFilterTest : public BaseTokenStreamFixture { public: MappingCharFilterTest() { normMap = newLucene(); normMap->add(L"aa", L"a"); normMap->add(L"bbb", L"b"); normMap->add(L"cccc", L"cc"); normMap->add(L"h", L"i"); normMap->add(L"j", L"jj"); normMap->add(L"k", L"kkk"); normMap->add(L"ll", L"llll"); normMap->add(L"empty", L""); } virtual ~MappingCharFilterTest() { } public: NormalizeCharMapPtr normMap; }; TEST_F(MappingCharFilterTest, testReaderReset) { CharStreamPtr cs = newLucene(normMap, newLucene(L"x")); CharArray buf = CharArray::newInstance(10); int32_t len = cs->read(buf.get(), 0, 10); EXPECT_EQ(1, len); EXPECT_EQ(L'x', buf[0]) ; len = cs->read(buf.get(), 0, 10); EXPECT_EQ(-1, len); // rewind cs->reset(); len = cs->read(buf.get(), 0, 10); EXPECT_EQ(1, len); EXPECT_EQ(L'x', buf[0]) ; } TEST_F(MappingCharFilterTest, testNothingChange) { CharStreamPtr cs = newLucene(normMap, newLucene(L"x")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"x"), newCollection(0), newCollection(1)); } TEST_F(MappingCharFilterTest, test1to1) { CharStreamPtr cs = newLucene(normMap, newLucene(L"h")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"i"), newCollection(0), newCollection(1)); } TEST_F(MappingCharFilterTest, test1to2) { CharStreamPtr cs = newLucene(normMap, newLucene(L"j")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"jj"), newCollection(0), newCollection(1)); } TEST_F(MappingCharFilterTest, test1to3) { CharStreamPtr cs = newLucene(normMap, newLucene(L"k")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"kkk"), newCollection(0), newCollection(1)); } TEST_F(MappingCharFilterTest, test2to4) { CharStreamPtr cs = newLucene(normMap, newLucene(L"ll")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"llll"), newCollection(0), newCollection(2)); } TEST_F(MappingCharFilterTest, test2to1) { CharStreamPtr cs = newLucene(normMap, newLucene(L"aa")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"a"), newCollection(0), newCollection(2)); } TEST_F(MappingCharFilterTest, test3to1) { CharStreamPtr cs = newLucene(normMap, newLucene(L"bbb")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"b"), newCollection(0), newCollection(3)); } TEST_F(MappingCharFilterTest, test4to2) { CharStreamPtr cs = newLucene(normMap, newLucene(L"cccc")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"cc"), newCollection(0), newCollection(4)); } TEST_F(MappingCharFilterTest, test5to0) { CharStreamPtr cs = newLucene(normMap, newLucene(L"empty")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, Collection::newInstance()); } // // 1111111111222 // 01234567890123456789012 //(in) h i j k ll cccc bbb aa // // 1111111111222 // 01234567890123456789012 //(out) i i jj kkk llll cc b a // // h, 0, 1 => i, 0, 1 // i, 2, 3 => i, 2, 3 // j, 4, 5 => jj, 4, 5 // k, 6, 7 => kkk, 6, 7 // ll, 8,10 => llll, 8,10 // cccc,11,15 => cc,11,15 // bbb,16,19 => b,16,19 // aa,20,22 => a,20,22 TEST_F(MappingCharFilterTest, testTokenStream) { CharStreamPtr cs = newLucene(normMap, CharReader::get(newLucene(L"h i j k ll cccc bbb aa"))); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"i", L"i", L"jj", L"kkk", L"llll", L"cc", L"b", L"a"), newCollection(0, 2, 4, 6, 8, 11, 16, 20), newCollection(1, 3, 5, 7, 10, 15, 19, 22)); } // // // 0123456789 //(in) aaaa ll h //(out-1) aa llll i //(out-2) a llllllll i // // aaaa,0,4 => a,0,4 // ll,5,7 => llllllll,5,7 // h,8,9 => i,8,9 TEST_F(MappingCharFilterTest, testChained) { CharStreamPtr cs = newLucene(normMap, (CharStreamPtr)newLucene(normMap, CharReader::get(newLucene(L"aaaa ll h")))); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"a", L"llllllll", L"i"), newCollection(0, 5, 8), newCollection(4, 7, 9)); } LucenePlusPlus-rel_3.0.9/src/test/analysis/NumericTokenStreamTest.cpp000066400000000000000000000051501456444476200260450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "NumericTokenStream.h" #include "TermAttribute.h" #include "TypeAttribute.h" #include "NumericUtils.h" using namespace Lucene; typedef BaseTokenStreamFixture NumericTokenStreamTest; static int64_t lvalue = 4573245871874382LL; static int32_t ivalue = 123456; TEST_F(NumericTokenStreamTest, testLongStream) { NumericTokenStreamPtr stream = newLucene()->setLongValue(lvalue); // use getAttribute to test if attributes really exist, if not an IAE will be thrown TermAttributePtr termAtt = stream->getAttribute(); TypeAttributePtr typeAtt = stream->getAttribute(); for (int32_t shift = 0; shift < 64; shift += NumericUtils::PRECISION_STEP_DEFAULT) { EXPECT_TRUE(stream->incrementToken()); EXPECT_EQ(NumericUtils::longToPrefixCoded(lvalue, shift), termAtt->term()); EXPECT_EQ(shift == 0 ? NumericTokenStream::TOKEN_TYPE_FULL_PREC() : NumericTokenStream::TOKEN_TYPE_LOWER_PREC(), typeAtt->type()); } EXPECT_TRUE(!stream->incrementToken()); } TEST_F(NumericTokenStreamTest, testIntStream) { NumericTokenStreamPtr stream = newLucene()->setIntValue(ivalue); // use getAttribute to test if attributes really exist, if not an IAE will be thrown TermAttributePtr termAtt = stream->getAttribute(); TypeAttributePtr typeAtt = stream->getAttribute(); for (int32_t shift = 0; shift < 32; shift += NumericUtils::PRECISION_STEP_DEFAULT) { EXPECT_TRUE(stream->incrementToken()); EXPECT_EQ(NumericUtils::intToPrefixCoded(ivalue, shift), termAtt->term()); EXPECT_EQ(shift == 0 ? NumericTokenStream::TOKEN_TYPE_FULL_PREC() : NumericTokenStream::TOKEN_TYPE_LOWER_PREC(), typeAtt->type()); } EXPECT_TRUE(!stream->incrementToken()); } TEST_F(NumericTokenStreamTest, testNotInitialized) { NumericTokenStreamPtr stream = newLucene(); try { stream->reset(); } catch (IllegalStateException& e) { EXPECT_TRUE(check_exception(LuceneException::IllegalState)(e)); } try { stream->incrementToken(); } catch (IllegalStateException& e) { EXPECT_TRUE(check_exception(LuceneException::IllegalState)(e)); } } LucenePlusPlus-rel_3.0.9/src/test/analysis/PerFieldAnalzyerWrapperTest.cpp000066400000000000000000000026131456444476200270300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "PerFieldAnalyzerWrapper.h" #include "WhitespaceAnalyzer.h" #include "SimpleAnalyzer.h" #include "TokenStream.h" #include "StringReader.h" #include "TermAttribute.h" using namespace Lucene; typedef BaseTokenStreamFixture PerFieldAnalzyerWrapperTest; TEST_F(PerFieldAnalzyerWrapperTest, testPerField) { String text = L"Qwerty"; PerFieldAnalyzerWrapperPtr analyzer = newLucene(newLucene()); analyzer->addAnalyzer(L"special", newLucene()); TokenStreamPtr tokenStream = analyzer->tokenStream(L"field", newLucene(text)); TermAttributePtr termAtt = tokenStream->getAttribute(); EXPECT_TRUE(tokenStream->incrementToken()); EXPECT_EQ(L"Qwerty", termAtt->term()); tokenStream = analyzer->tokenStream(L"special", newLucene(text)); termAtt = tokenStream->getAttribute(); EXPECT_TRUE(tokenStream->incrementToken()); EXPECT_EQ(L"qwerty", termAtt->term()); } LucenePlusPlus-rel_3.0.9/src/test/analysis/StopAnalyzerTest.cpp000066400000000000000000000066351456444476200247320ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "StopAnalyzer.h" #include "StringReader.h" #include "TokenStream.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" using namespace Lucene; class StopAnalyzerTest : public BaseTokenStreamFixture { public: StopAnalyzerTest() { stop = newLucene(LuceneVersion::LUCENE_CURRENT); inValidTokens = HashSet::newInstance(); for (HashSet::iterator word = StopAnalyzer::ENGLISH_STOP_WORDS_SET().begin(); word != StopAnalyzer::ENGLISH_STOP_WORDS_SET().end(); ++word) { inValidTokens.add(*word); } } virtual ~StopAnalyzerTest() { } protected: StopAnalyzerPtr stop; HashSet inValidTokens; }; TEST_F(StopAnalyzerTest, testDefaults) { EXPECT_TRUE(stop); StringReaderPtr reader = newLucene(L"This is a test of the english stop analyzer"); TokenStreamPtr stream = stop->tokenStream(L"test", reader); EXPECT_TRUE(stream); TermAttributePtr termAtt = stream->getAttribute(); while (stream->incrementToken()) { EXPECT_TRUE(!inValidTokens.contains(termAtt->term())); } } TEST_F(StopAnalyzerTest, testStopList) { HashSet stopWordsSet = HashSet::newInstance(); stopWordsSet.add(L"good"); stopWordsSet.add(L"test"); stopWordsSet.add(L"analyzer"); StopAnalyzerPtr newStop = newLucene(LuceneVersion::LUCENE_24, stopWordsSet); StringReaderPtr reader = newLucene(L"This is a good test of the english stop analyzer"); TokenStreamPtr stream = newStop->tokenStream(L"test", reader); EXPECT_TRUE(stream); TermAttributePtr termAtt = stream->getAttribute(); PositionIncrementAttributePtr posIncrAtt = stream->addAttribute(); while (stream->incrementToken()) { String text = termAtt->term(); EXPECT_TRUE(!stopWordsSet.contains(text)); EXPECT_EQ(1, posIncrAtt->getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments. } } TEST_F(StopAnalyzerTest, testStopListPositions) { HashSet stopWordsSet = HashSet::newInstance(); stopWordsSet.add(L"good"); stopWordsSet.add(L"test"); stopWordsSet.add(L"analyzer"); StopAnalyzerPtr newStop = newLucene(LuceneVersion::LUCENE_CURRENT, stopWordsSet); StringReaderPtr reader = newLucene(L"This is a good test of the english stop analyzer with positions"); Collection expectedIncr = newCollection(1, 1, 1, 3, 1, 1, 1, 2, 1); TokenStreamPtr stream = newStop->tokenStream(L"test", reader); EXPECT_TRUE(stream); int32_t i = 0; TermAttributePtr termAtt = stream->getAttribute(); PositionIncrementAttributePtr posIncrAtt = stream->addAttribute(); while (stream->incrementToken()) { String text = termAtt->term(); EXPECT_TRUE(!stopWordsSet.contains(text)); EXPECT_EQ(expectedIncr[i++], posIncrAtt->getPositionIncrement()); } } LucenePlusPlus-rel_3.0.9/src/test/analysis/StopFilterTest.cpp000066400000000000000000000103271456444476200243630ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "TestUtils.h" #include "StringReader.h" #include "TokenStream.h" #include "StopFilter.h" #include "WhitespaceTokenizer.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" using namespace Lucene; typedef BaseTokenStreamFixture StopFilterTest; static void doTestStopPositons(const StopFilterPtr& stpf, bool enableIcrements) { stpf->setEnablePositionIncrements(enableIcrements); TermAttributePtr termAtt = stpf->getAttribute(); PositionIncrementAttributePtr posIncrAtt = stpf->getAttribute(); for (int32_t i = 0; i < 20; i += 3) { EXPECT_TRUE(stpf->incrementToken()); String w = intToEnglish(i); EXPECT_EQ(w, termAtt->term()); EXPECT_EQ(enableIcrements ? (i == 0 ? 1 : 3) : 1, posIncrAtt->getPositionIncrement()); } EXPECT_TRUE(!stpf->incrementToken()); } TEST_F(StopFilterTest, testExactCase) { StringReaderPtr reader = newLucene(L"Now is The Time"); HashSet stopWords = HashSet::newInstance(); stopWords.add(L"is"); stopWords.add(L"the"); stopWords.add(L"Time"); TokenStreamPtr stream = newLucene(false, newLucene(reader), stopWords, false); TermAttributePtr termAtt = stream->getAttribute(); EXPECT_TRUE(stream->incrementToken()); EXPECT_EQ(L"Now", termAtt->term()); EXPECT_TRUE(stream->incrementToken()); EXPECT_EQ(L"The", termAtt->term()); EXPECT_TRUE(!stream->incrementToken()); } TEST_F(StopFilterTest, testIgnoreCase) { StringReaderPtr reader = newLucene(L"Now is The Time"); HashSet stopWords = HashSet::newInstance(); stopWords.add(L"is"); stopWords.add(L"the"); stopWords.add(L"Time"); TokenStreamPtr stream = newLucene(false, newLucene(reader), stopWords, true); TermAttributePtr termAtt = stream->getAttribute(); EXPECT_TRUE(stream->incrementToken()); EXPECT_EQ(L"Now", termAtt->term()); EXPECT_TRUE(!stream->incrementToken()); } TEST_F(StopFilterTest, testStopPositons) { StringStream buf; Collection stopWords = Collection::newInstance(); for (int32_t i = 0; i < 20; ++i) { String w = intToEnglish(i); buf << w << L" "; if (i % 3 != 0) { stopWords.add(w); } } HashSet stopSet = HashSet::newInstance(stopWords.begin(), stopWords.end()); // with increments StringReaderPtr reader = newLucene(buf.str()); StopFilterPtr stpf = newLucene(false, newLucene(reader), stopSet); doTestStopPositons(stpf, true); // without increments reader = newLucene(buf.str()); stpf = newLucene(false, newLucene(reader), stopSet); doTestStopPositons(stpf, false); // with increments, concatenating two stop filters Collection stopWords0 = Collection::newInstance(); Collection stopWords1 = Collection::newInstance(); for (int32_t i = 0; i < stopWords.size(); ++i) { if (i % 2 == 0) { stopWords0.add(stopWords[i]); } else { stopWords1.add(stopWords[i]); } } HashSet stopSet0 = HashSet::newInstance(stopWords0.begin(), stopWords0.end()); HashSet stopSet1 = HashSet::newInstance(stopWords1.begin(), stopWords1.end()); reader = newLucene(buf.str()); StopFilterPtr stpf0 = newLucene(false, newLucene(reader), stopSet0); // first part of the set stpf0->setEnablePositionIncrements(true); StopFilterPtr stpf01 = newLucene(false, stpf0, stopSet1); // two stop filters concatenated! doTestStopPositons(stpf01, true); } LucenePlusPlus-rel_3.0.9/src/test/analysis/TeeSinkTokenFilterTest.cpp000066400000000000000000000224751456444476200260100ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "BaseTokenStreamFixture.h" #include "TestUtils.h" #include "TeeSinkTokenFilter.h" #include "WhitespaceTokenizer.h" #include "TokenStream.h" #include "TermAttribute.h" #include "StringReader.h" #include "CachingTokenFilter.h" #include "LowerCaseFilter.h" #include "StandardFilter.h" #include "StandardTokenizer.h" #include "PositionIncrementAttribute.h" #include "MiscUtils.h" using namespace Lucene; class TheSinkFilter : public SinkFilter { public: virtual ~TheSinkFilter() { } public: virtual bool accept(const AttributeSourcePtr& source) { TermAttributePtr termAtt = source->getAttribute(); return boost::iequals(termAtt->term(), L"The"); } }; class DogSinkFilter : public SinkFilter { public: virtual ~DogSinkFilter() { } public: virtual bool accept(const AttributeSourcePtr& source) { TermAttributePtr termAtt = source->getAttribute(); return boost::iequals(termAtt->term(), L"Dogs"); } }; class TeeSinkTokenFilterTest : public BaseTokenStreamFixture { public: TeeSinkTokenFilterTest() { tokens1 = newCollection(L"The", L"quick", L"Burgundy", L"Fox", L"jumped", L"over", L"the", L"lazy", L"Red", L"Dogs"); tokens2 = newCollection(L"The", L"Lazy", L"Dogs", L"should", L"stay", L"on", L"the", L"porch"); for (int32_t i = 0; i < tokens1.size(); ++i) { buffer1 << tokens1[i] << L" "; } for (int32_t i = 0; i < tokens2.size(); ++i) { buffer2 << tokens2[i] << L" "; } theFilter = newLucene(); dogFilter = newLucene(); } virtual ~TeeSinkTokenFilterTest() { } protected: StringStream buffer1; StringStream buffer2; Collection tokens1; Collection tokens2; SinkFilterPtr theFilter; SinkFilterPtr dogFilter; }; TEST_F(TeeSinkTokenFilterTest, testGeneral) { TeeSinkTokenFilterPtr source = newLucene(newLucene(newLucene(buffer1.str()))); TokenStreamPtr sink1 = source->newSinkTokenStream(); TokenStreamPtr sink2 = source->newSinkTokenStream(theFilter); source->addAttribute(); sink1->addAttribute(); sink2->addAttribute(); checkTokenStreamContents(source, tokens1); checkTokenStreamContents(sink1, tokens1); checkTokenStreamContents(sink2, newCollection(L"The", L"the")); } TEST_F(TeeSinkTokenFilterTest, testMultipleSources) { TeeSinkTokenFilterPtr tee1 = newLucene(newLucene(newLucene(buffer1.str()))); SinkTokenStreamPtr dogDetector = tee1->newSinkTokenStream(dogFilter); SinkTokenStreamPtr theDetector = tee1->newSinkTokenStream(theFilter); TokenStreamPtr source1 = newLucene(tee1); tee1->addAttribute(); dogDetector->addAttribute(); theDetector->addAttribute(); TeeSinkTokenFilterPtr tee2 = newLucene(newLucene(newLucene(buffer2.str()))); tee2->addSinkTokenStream(dogDetector); tee2->addSinkTokenStream(theDetector); TokenStreamPtr source2 = tee2; checkTokenStreamContents(source1, tokens1); checkTokenStreamContents(source2, tokens2); checkTokenStreamContents(theDetector, newCollection(L"The", L"the", L"The", L"the")); checkTokenStreamContents(dogDetector, newCollection(L"Dogs", L"Dogs")); source1->reset(); TokenStreamPtr lowerCasing = newLucene(source1); Collection lowerCaseTokens = Collection::newInstance(tokens1.size()); for (int32_t i = 0; i < tokens1.size(); ++i) { lowerCaseTokens[i] = StringUtils::toLower((const String&)tokens1[i]); } checkTokenStreamContents(lowerCasing, lowerCaseTokens); } namespace TestPerformance { class ModuloTokenFilter : public TokenFilter { public: ModuloTokenFilter(const TokenStreamPtr& input, int32_t mc) : TokenFilter(input) { modCount = mc; count = 0; } virtual ~ModuloTokenFilter() { } public: int32_t modCount; int32_t count; public: // return every 100 tokens virtual bool incrementToken() { bool hasNext = false; for (hasNext = input->incrementToken(); hasNext && count % modCount != 0; hasNext = input->incrementToken()) { ++count; } ++count; return hasNext; } }; class ModuloSinkFilter : public SinkFilter { public: ModuloSinkFilter(int32_t mc) { modCount = mc; count = 0; } virtual ~ModuloSinkFilter() { } public: int32_t modCount; int32_t count; public: virtual bool accept(const AttributeSourcePtr& source) { bool b = (source && count % modCount == 0); ++count; return b; } }; } /// Not an explicit test, just useful to print out some info on performance TEST_F(TeeSinkTokenFilterTest, testPerformance) { Collection tokCount = newCollection(100, 500, 1000, 2000, 5000, 10000); Collection modCounts = newCollection(1, 2, 5, 10, 20, 50, 100, 200, 500); for (int32_t k = 0; k < tokCount.size(); ++k) { StringStream buffer; // std::cout << "-----Tokens: " << tokCount[k] << "-----"; for (int32_t i = 0; i < tokCount[k]; ++i) { buffer << StringUtils::toUpper(intToEnglish(i)) << L" "; } // make sure we produce the same tokens TeeSinkTokenFilterPtr teeStream = newLucene(newLucene(newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(buffer.str())))); TokenStreamPtr sink = teeStream->newSinkTokenStream(newLucene(100)); teeStream->consumeAllTokens(); TokenStreamPtr stream = newLucene(newLucene(newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(buffer.str()))), 100); TermAttributePtr tfTok = stream->addAttribute(); TermAttributePtr sinkTok = sink->addAttribute(); for (int32_t i = 0; stream->incrementToken(); ++i) { EXPECT_TRUE(sink->incrementToken()); EXPECT_TRUE(tfTok->equals(sinkTok)); } // simulate two fields, each being analyzed once, for 20 documents for (int32_t j = 0; j < modCounts.size(); ++j) { int32_t tfPos = 0; int64_t start = MiscUtils::currentTimeMillis(); for (int32_t i = 0; i < 20; ++i) { stream = newLucene(newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(buffer.str()))); PositionIncrementAttributePtr posIncrAtt = stream->getAttribute(); while (stream->incrementToken()) { tfPos += posIncrAtt->getPositionIncrement(); } stream = newLucene(newLucene(newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(buffer.str()))), modCounts[j]); posIncrAtt = stream->getAttribute(); while (stream->incrementToken()) { tfPos += posIncrAtt->getPositionIncrement(); } } int64_t finish = MiscUtils::currentTimeMillis(); // std::cout << "ModCount: " << modCounts[j] << " Two fields took " << (finish - start) << " ms"; int32_t sinkPos = 0; // simulate one field with one sink start = MiscUtils::currentTimeMillis(); for (int32_t i = 0; i < 20; ++i) { teeStream = newLucene(newLucene(newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(buffer.str())))); sink = teeStream->newSinkTokenStream(newLucene(modCounts[j])); PositionIncrementAttributePtr posIncrAtt = teeStream->getAttribute(); while (teeStream->incrementToken()) { sinkPos += posIncrAtt->getPositionIncrement(); } posIncrAtt = sink->getAttribute(); while (sink->incrementToken()) { sinkPos += posIncrAtt->getPositionIncrement(); } } finish = MiscUtils::currentTimeMillis(); // std::cout << "ModCount: " << modCounts[j] << " Tee fields took " << (finish - start) << " ms"; EXPECT_EQ(sinkPos, tfPos); } // std::cout << "- End Tokens: " << tokCount[k] << "-----"; } } LucenePlusPlus-rel_3.0.9/src/test/analysis/TokenTest.cpp000066400000000000000000000131111456444476200233420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Token.h" #include "Payload.h" using namespace Lucene; typedef LuceneTestFixture TokenTest; static AttributePtr checkCloneIsEqual(const AttributePtr& att) { AttributePtr clone = boost::dynamic_pointer_cast(att->clone()); EXPECT_TRUE(att->equals(clone)); EXPECT_EQ(att->hashCode(), clone->hashCode()); return clone; } template static AttributePtr checkCopyIsEqual(const AttributePtr& att) { AttributePtr copy = newLucene(); att->copyTo(copy); EXPECT_TRUE(att->equals(copy)); EXPECT_EQ(att->hashCode(), copy->hashCode()); return copy; } TEST_F(TokenTest, testCtor) { TokenPtr t = newLucene(); t->setTermBuffer(L"hello"); EXPECT_EQ(L"hello", t->term()); EXPECT_EQ(L"word", t->type()); EXPECT_EQ(0, t->getFlags()); t = newLucene(6, 22); t->setTermBuffer(L"hello"); EXPECT_EQ(L"hello", t->term()); EXPECT_EQ(L"(hello,6,22)", t->toString()); EXPECT_EQ(L"word", t->type()); EXPECT_EQ(0, t->getFlags()); t = newLucene(6, 22, 7); t->setTermBuffer(L"hello"); EXPECT_EQ(L"hello", t->term()); EXPECT_EQ(L"(hello,6,22)", t->toString()); EXPECT_EQ(7, t->getFlags()); t = newLucene(6, 22, L"junk"); t->setTermBuffer(L"hello"); EXPECT_EQ(L"hello", t->term()); EXPECT_EQ(L"(hello,6,22,type=junk)", t->toString()); EXPECT_EQ(0, t->getFlags()); } TEST_F(TokenTest, testResize) { TokenPtr t = newLucene(); t->setTermBuffer(L"hello"); for (int32_t i = 0; i < 2000; ++i) { t->resizeTermBuffer(i); EXPECT_TRUE(i <= t->termBuffer().size()); EXPECT_EQ(L"hello", t->term()); } } TEST_F(TokenTest, testGrow) { TokenPtr t = newLucene(); StringStream buf; buf << L"ab"; for (int32_t i = 0; i < 20; ++i) { String content = buf.str(); t->setTermBuffer(content); EXPECT_EQ(content.length(), t->termLength()); EXPECT_EQ(content, t->term()); buf << content; } EXPECT_EQ(1048576, t->termLength()); EXPECT_EQ(1179654, t->termBuffer().size()); // Test for slow growth to a long term t = newLucene(); buf.str(L""); buf << L"a"; for (int32_t i = 0; i < 20000; ++i) { String content = buf.str(); t->setTermBuffer(content); EXPECT_EQ(content.length(), t->termLength()); EXPECT_EQ(content, t->term()); buf << L"a"; } EXPECT_EQ(20000, t->termLength()); EXPECT_EQ(20167, t->termBuffer().size()); } TEST_F(TokenTest, testToString) { TokenPtr t = newLucene(L"", 0, 5); t->setTermBuffer(L"aloha"); EXPECT_EQ(L"(aloha,0,5)", t->toString()); t->setTermBuffer(L"hi there"); EXPECT_EQ(L"(hi there,0,5)", t->toString()); } TEST_F(TokenTest, testTermBufferEquals) { TokenPtr t1a = newLucene(); t1a->setTermBuffer(L"hello"); TokenPtr t1b = newLucene(); t1b->setTermBuffer(L"hello"); TokenPtr t2 = newLucene(); t2->setTermBuffer(L"hello2"); EXPECT_TRUE(t1a->equals(t1b)); EXPECT_TRUE(!t1a->equals(t2)); EXPECT_TRUE(!t2->equals(t1b)); } TEST_F(TokenTest, testMixedStringArray) { TokenPtr t = newLucene(); t->setTermBuffer(L"hello"); EXPECT_EQ(t->termLength(), 5); EXPECT_EQ(t->term(), L"hello"); t->setTermBuffer(L"hello2"); EXPECT_EQ(t->termLength(), 6); EXPECT_EQ(t->term(), L"hello2"); CharArray test = CharArray::newInstance(6); test[0] = L'h'; test[1] = L'e'; test[2] = L'l'; test[3] = L'l'; test[4] = L'o'; test[5] = L'3'; t->setTermBuffer(test.get(), 0, 6); EXPECT_EQ(t->term(), L"hello3"); CharArray buffer = t->termBuffer(); buffer[1] = L'o'; EXPECT_EQ(t->term(), L"hollo3"); } TEST_F(TokenTest, testClone) { TokenPtr t = newLucene(); t->setTermBuffer(L"hello"); CharArray buf = t->termBuffer(); TokenPtr clone = boost::dynamic_pointer_cast(checkCloneIsEqual(t)); EXPECT_EQ(t->term(), clone->term()); EXPECT_TRUE(buf != clone->termBuffer()); ByteArray payload = ByteArray::newInstance(4); payload[0] = 1; payload[1] = 2; payload[2] = 3; payload[3] = 4; PayloadPtr pl = newLucene(payload); t->setPayload(pl); clone = boost::dynamic_pointer_cast(checkCloneIsEqual(t)); EXPECT_TRUE(pl->equals(clone->getPayload())); EXPECT_NE(pl, clone->getPayload()); } TEST_F(TokenTest, testCopyTo) { TokenPtr t = newLucene(); TokenPtr copy = boost::dynamic_pointer_cast(checkCopyIsEqual(t)); EXPECT_EQ(L"", t->term()); EXPECT_EQ(L"", copy->term()); t = newLucene(); t->setTermBuffer(L"hello"); CharArray buf = t->termBuffer(); copy = boost::dynamic_pointer_cast(checkCopyIsEqual(t)); EXPECT_EQ(t->term(), copy->term()); EXPECT_TRUE(buf != copy->termBuffer()); ByteArray payload = ByteArray::newInstance(4); payload[0] = 1; payload[1] = 2; payload[2] = 3; payload[3] = 4; PayloadPtr pl = newLucene(payload); t->setPayload(pl); copy = boost::dynamic_pointer_cast(checkCloneIsEqual(t)); EXPECT_TRUE(pl->equals(copy->getPayload())); EXPECT_NE(pl, copy->getPayload()); } LucenePlusPlus-rel_3.0.9/src/test/analysis/standard/000077500000000000000000000000001456444476200225215ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/analysis/standard/StandardAnalyzerTest.cpp000066400000000000000000000240551456444476200273410ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "StandardAnalyzer.h" using namespace Lucene; typedef BaseTokenStreamFixture StandardAnalyzerTest; TEST_F(StandardAnalyzerTest, testMaxTermLength) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); sa->setMaxTokenLength(5); checkAnalyzesTo(sa, L"ab cd toolong xy z", newCollection(L"ab", L"cd", L"xy", L"z")); } TEST_F(StandardAnalyzerTest, testMaxTermLength2) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"ab cd toolong xy z", newCollection(L"ab", L"cd", L"toolong", L"xy", L"z")); sa->setMaxTokenLength(5); checkAnalyzesTo(sa, L"ab cd toolong xy z", newCollection(L"ab", L"cd", L"xy", L"z"), newCollection(1, 1, 2, 1)); } TEST_F(StandardAnalyzerTest, testMaxTermLength3) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); String longTerm(255, L'a'); checkAnalyzesTo(sa, L"ab cd " + longTerm + L" xy z", newCollection(L"ab", L"cd", longTerm, L"xy", L"z")); checkAnalyzesTo(sa, L"ab cd " + longTerm + L"a xy z", newCollection(L"ab", L"cd", L"xy", L"z")); } TEST_F(StandardAnalyzerTest, testAlphanumeric) { // alphanumeric tokens StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"B2B", newCollection(L"b2b")); checkAnalyzesTo(sa, L"2B", newCollection(L"2b")); } TEST_F(StandardAnalyzerTest, testUnderscores) { // underscores are delimiters, but not in email addresses (below) StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"word_having_underscore", newCollection(L"word", L"having", L"underscore")); checkAnalyzesTo(sa, L"word_with_underscore_and_stopwords", newCollection(L"word", L"underscore", L"stopwords")); } TEST_F(StandardAnalyzerTest, testDelimiters) { // other delimiters: "-", "/", "," StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"some-dashed-phrase", newCollection(L"some", L"dashed", L"phrase")); checkAnalyzesTo(sa, L"dogs,chase,cats", newCollection(L"dogs", L"chase", L"cats")); checkAnalyzesTo(sa, L"ac/dc", newCollection(L"ac", L"dc")); } TEST_F(StandardAnalyzerTest, testApostrophes) { // internal apostrophes: O'Reilly, you're, O'Reilly's possessives are actually removed by StardardFilter, not the tokenizer StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"O'Reilly", newCollection(L"o'reilly")); checkAnalyzesTo(sa, L"you're", newCollection(L"you're")); checkAnalyzesTo(sa, L"she's", newCollection(L"she")); checkAnalyzesTo(sa, L"Jim's", newCollection(L"jim")); checkAnalyzesTo(sa, L"don't", newCollection(L"don't")); checkAnalyzesTo(sa, L"O'Reilly's", newCollection(L"o'reilly")); } TEST_F(StandardAnalyzerTest, testTSADash) { // t and s had been stopwords in Lucene <= 2.0, which made it impossible to correctly search for these terms StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"s-class", newCollection(L"s", L"class")); checkAnalyzesTo(sa, L"t-com", newCollection(L"t", L"com")); // 'a' is still a stopword checkAnalyzesTo(sa, L"a-class", newCollection(L"class")); } TEST_F(StandardAnalyzerTest, testCompanyNames) { // internal apostrophes: O'Reilly, you're, O'Reilly's possessives are actually removed by StardardFilter, not the tokenizer StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"AT&T", newCollection(L"at&t")); checkAnalyzesTo(sa, L"Excite@Home", newCollection(L"excite@home")); } TEST_F(StandardAnalyzerTest, testDomainNames) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); // domain names checkAnalyzesTo(sa, L"www.nutch.org", newCollection(L"www.nutch.org")); // the following should be recognized as HOST EXPECT_NO_THROW(checkAnalyzesTo(sa, L"www.nutch.org.", newCollection(L"www.nutch.org"), newCollection(L""))); // 2.3 should show the bug sa = newLucene(LuceneVersion::LUCENE_23); EXPECT_NO_THROW(checkAnalyzesTo(sa, L"www.nutch.org.", newCollection(L"wwwnutchorg"), newCollection(L""))); // 2.4 should not show the bug sa = newLucene(LuceneVersion::LUCENE_24); EXPECT_NO_THROW(checkAnalyzesTo(sa, L"www.nutch.org.", newCollection(L"www.nutch.org"), newCollection(L""))); } TEST_F(StandardAnalyzerTest, testEMailAddresses) { // email addresses, possibly with underscores, periods, etc StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"test@example.com", newCollection(L"test@example.com")); checkAnalyzesTo(sa, L"first.lastname@example.com", newCollection(L"first.lastname@example.com")); checkAnalyzesTo(sa, L"first_lastname@example.com", newCollection(L"first_lastname@example.com")); } TEST_F(StandardAnalyzerTest, testNumeric) { // floating point, serial, model numbers, ip addresses, etc. // every other segment must have at least one digit StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"21.35", newCollection(L"21.35")); checkAnalyzesTo(sa, L"216.239.63.104", newCollection(L"216.239.63.104")); checkAnalyzesTo(sa, L"1-2-3", newCollection(L"1-2-3")); checkAnalyzesTo(sa, L"a1-b2-c3", newCollection(L"a1-b2-c3")); checkAnalyzesTo(sa, L"a1-b-c3", newCollection(L"a1-b-c3")); checkAnalyzesTo(sa, L"R2D2 C3PO", newCollection(L"r2d2", L"c3po")); } TEST_F(StandardAnalyzerTest, testTextWithNumbers) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"David has 5000 bones", newCollection(L"david", L"has", L"5000", L"bones")); } TEST_F(StandardAnalyzerTest, testVariousText) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"C embedded developers wanted", newCollection(L"c", L"embedded", L"developers", L"wanted")); checkAnalyzesTo(sa, L"foo bar FOO BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); checkAnalyzesTo(sa, L"foo bar . FOO <> BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); checkAnalyzesTo(sa, L"\"QUOTED\" word", newCollection(L"quoted", L"word")); } TEST_F(StandardAnalyzerTest, testAcronyms) { // acronyms have their dots stripped StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"U.S.A.", newCollection(L"usa")); } TEST_F(StandardAnalyzerTest, testCPlusPlusHash) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"C++", newCollection(L"c")); checkAnalyzesTo(sa, L"C#", newCollection(L"c")); } TEST_F(StandardAnalyzerTest, testComplianceFileName) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"2004.jpg", newCollection(L"2004.jpg"), newCollection(L"")); } TEST_F(StandardAnalyzerTest, testComplianceNumericIncorrect) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"62.46", newCollection(L"62.46"), newCollection(L"")); } TEST_F(StandardAnalyzerTest, testComplianceNumericLong) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"978-0-94045043-1", newCollection(L"978-0-94045043-1"), newCollection(L"")); } TEST_F(StandardAnalyzerTest, testComplianceNumericFile) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"78academyawards/rules/rule02.html", newCollection(L"78academyawards/rules/rule02.html"), newCollection(L"")); } TEST_F(StandardAnalyzerTest, testComplianceNumericWithUnderscores) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"2006-03-11t082958z_01_ban130523_rtridst_0_ozabs", newCollection(L"2006-03-11t082958z_01_ban130523_rtridst_0_ozabs"), newCollection(L"")); } TEST_F(StandardAnalyzerTest, testComplianceNumericWithDash) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"mid-20th", newCollection(L"mid-20th"), newCollection(L"")); } TEST_F(StandardAnalyzerTest, testComplianceManyTokens) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"/money.cnn.com/magazines/fortune/fortune_archive/2007/03/19/8402357/index.htm safari-0-sheikh-zayed-grand-mosque.jpg", newCollection(L"money.cnn.com", L"magazines", L"fortune", L"fortune", L"archive/2007/03/19/8402357", L"index.htm", L"safari-0-sheikh", L"zayed", L"grand", L"mosque.jpg"), newCollection(L"", L"", L"", L"", L"", L"", L"", L"", L"", L"")); } LucenePlusPlus-rel_3.0.9/src/test/analysis/tokenattributes/000077500000000000000000000000001456444476200241505ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/analysis/tokenattributes/SimpleAttributeTest.cpp000066400000000000000000000113401456444476200306300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "FlagsAttribute.h" #include "PositionIncrementAttribute.h" #include "TypeAttribute.h" #include "PayloadAttribute.h" #include "Payload.h" #include "OffsetAttribute.h" using namespace Lucene; typedef LuceneTestFixture SimpleAttributeTest; static AttributePtr checkCloneIsEqual(const AttributePtr& att) { AttributePtr clone = boost::dynamic_pointer_cast(att->clone()); EXPECT_TRUE(att->equals(clone)); EXPECT_EQ(att->hashCode(), clone->hashCode()); return clone; } template static AttributePtr checkCopyIsEqual(const AttributePtr& att) { AttributePtr copy = newLucene(); att->copyTo(copy); EXPECT_TRUE(att->equals(copy)); EXPECT_EQ(att->hashCode(), copy->hashCode()); return copy; } TEST_F(SimpleAttributeTest, testFlagsAttribute) { FlagsAttributePtr att = newLucene(); EXPECT_EQ(0, att->getFlags()); att->setFlags(1234); EXPECT_EQ(L"flags=1234", att->toString()); FlagsAttributePtr att2 = boost::dynamic_pointer_cast(checkCloneIsEqual(att)); EXPECT_EQ(1234, att2->getFlags()); att2 = boost::dynamic_pointer_cast(checkCopyIsEqual(att)); EXPECT_EQ(1234, att2->getFlags()); att->clear(); EXPECT_EQ(0, att->getFlags()); } TEST_F(SimpleAttributeTest, testPositionIncrementAttribute) { PositionIncrementAttributePtr att = newLucene(); EXPECT_EQ(1, att->getPositionIncrement()); att->setPositionIncrement(1234); EXPECT_EQ(L"positionIncrement=1234", att->toString()); PositionIncrementAttributePtr att2 = boost::dynamic_pointer_cast(checkCloneIsEqual(att)); EXPECT_EQ(1234, att2->getPositionIncrement()); att2 = boost::dynamic_pointer_cast(checkCopyIsEqual(att)); EXPECT_EQ(1234, att2->getPositionIncrement()); att->clear(); EXPECT_EQ(1, att->getPositionIncrement()); } namespace TestTypeAttribute { class TestableTypeAttribute : public TypeAttribute { public: virtual ~TestableTypeAttribute() { } LUCENE_CLASS(TestableTypeAttribute); public: using TypeAttribute::DEFAULT_TYPE; }; } TEST_F(SimpleAttributeTest, testTypeAttribute) { TypeAttributePtr att = newLucene(); EXPECT_EQ(TestTypeAttribute::TestableTypeAttribute::DEFAULT_TYPE(), att->type()); att->setType(L"hello"); EXPECT_EQ(L"type=hello", att->toString()); TypeAttributePtr att2 = boost::dynamic_pointer_cast(checkCloneIsEqual(att)); EXPECT_EQ(L"hello", att2->type()); att2 = boost::dynamic_pointer_cast(checkCopyIsEqual(att)); EXPECT_EQ(L"hello", att2->type()); att->clear(); EXPECT_EQ(TestTypeAttribute::TestableTypeAttribute::DEFAULT_TYPE(), att->type()); } TEST_F(SimpleAttributeTest, testPayloadAttribute) { PayloadAttributePtr att = newLucene(); EXPECT_TRUE(!att->getPayload()); ByteArray payload = ByteArray::newInstance(4); payload[0] = 1; payload[1] = 2; payload[2] = 3; payload[3] = 4; PayloadPtr pl = newLucene(payload); att->setPayload(pl); PayloadAttributePtr att2 = boost::dynamic_pointer_cast(checkCloneIsEqual(att)); EXPECT_TRUE(pl->equals(att2->getPayload())); EXPECT_NE(pl, att2->getPayload()); att2 = boost::dynamic_pointer_cast(checkCopyIsEqual(att)); EXPECT_TRUE(pl->equals(att2->getPayload())); EXPECT_NE(pl, att2->getPayload()); att->clear(); EXPECT_TRUE(!att->getPayload()); } TEST_F(SimpleAttributeTest, testOffsetAttribute) { OffsetAttributePtr att = newLucene(); EXPECT_EQ(0, att->startOffset()); EXPECT_EQ(0, att->endOffset()); att->setOffset(12, 34); // no string test here, because order unknown OffsetAttributePtr att2 = boost::dynamic_pointer_cast(checkCloneIsEqual(att)); EXPECT_EQ(12, att2->startOffset()); EXPECT_EQ(34, att2->endOffset()); att2 = boost::dynamic_pointer_cast(checkCopyIsEqual(att)); EXPECT_EQ(12, att2->startOffset()); EXPECT_EQ(34, att2->endOffset()); att->clear(); EXPECT_EQ(0, att->startOffset()); EXPECT_EQ(0, att->endOffset()); } LucenePlusPlus-rel_3.0.9/src/test/analysis/tokenattributes/TermAttributeTest.cpp000066400000000000000000000106631456444476200303150ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "TestUtils.h" #include "TermAttribute.h" using namespace Lucene; typedef LuceneTestFixture TermAttributeTest; static AttributePtr checkCloneIsEqual(const AttributePtr& att) { AttributePtr clone = boost::dynamic_pointer_cast(att->clone()); EXPECT_TRUE(att->equals(clone)); EXPECT_EQ(att->hashCode(), clone->hashCode()); return clone; } template static AttributePtr checkCopyIsEqual(const AttributePtr& att) { AttributePtr copy = newLucene(); att->copyTo(copy); EXPECT_TRUE(att->equals(copy)); EXPECT_EQ(att->hashCode(), copy->hashCode()); return copy; } TEST_F(TermAttributeTest, testResize) { TermAttributePtr t = newLucene(); t->setTermBuffer(L"hello"); for (int32_t i = 0; i < 2000; ++i) { t->resizeTermBuffer(i); EXPECT_TRUE(i <= t->termBuffer().size()); EXPECT_EQ(L"hello", t->term()); } } TEST_F(TermAttributeTest, testGrow) { TermAttributePtr t = newLucene(); StringStream buf; buf << L"ab"; for (int32_t i = 0; i < 20; ++i) { String content = buf.str(); t->setTermBuffer(content); EXPECT_EQ(content.length(), t->termLength()); EXPECT_EQ(content, t->term()); buf << content; } EXPECT_EQ(1048576, t->termLength()); EXPECT_EQ(1179654, t->termBuffer().size()); // Test for slow growth to a long term t = newLucene(); buf.str(L""); buf << L"a"; for (int32_t i = 0; i < 20000; ++i) { String content = buf.str(); t->setTermBuffer(content); EXPECT_EQ(content.length(), t->termLength()); EXPECT_EQ(content, t->term()); buf << L"a"; } EXPECT_EQ(20000, t->termLength()); EXPECT_EQ(20167, t->termBuffer().size()); } TEST_F(TermAttributeTest, testToString) { TermAttributePtr t = newLucene(); t->setTermBuffer(L"aloha"); EXPECT_EQ(L"term=aloha", t->toString()); t->setTermBuffer(L"hi there"); EXPECT_EQ(L"term=hi there", t->toString()); } TEST_F(TermAttributeTest, testMixedStringArray) { TermAttributePtr t = newLucene(); t->setTermBuffer(L"hello"); EXPECT_EQ(t->termLength(), 5); EXPECT_EQ(t->term(), L"hello"); t->setTermBuffer(L"hello2"); EXPECT_EQ(t->termLength(), 6); EXPECT_EQ(t->term(), L"hello2"); CharArray test = CharArray::newInstance(6); test[0] = L'h'; test[1] = L'e'; test[2] = L'l'; test[3] = L'l'; test[4] = L'o'; test[5] = L'3'; t->setTermBuffer(test.get(), 0, 6); EXPECT_EQ(t->term(), L"hello3"); // Make sure if we get the buffer and change a character that term() reflects the change CharArray buffer = t->termBuffer(); buffer[1] = L'o'; EXPECT_EQ(t->term(), L"hollo3"); } TEST_F(TermAttributeTest, testClone) { TermAttributePtr t = newLucene(); t->setTermBuffer(L"hello"); CharArray buf = t->termBuffer(); TermAttributePtr clone = boost::dynamic_pointer_cast(checkCloneIsEqual(t)); EXPECT_EQ(t->term(), clone->term()); EXPECT_TRUE(buf != clone->termBuffer()); } TEST_F(TermAttributeTest, testEquals) { TermAttributePtr t1a = newLucene(); t1a->setTermBuffer(L"hello"); TermAttributePtr t1b = newLucene(); t1b->setTermBuffer(L"hello"); TermAttributePtr t2 = newLucene(); t2->setTermBuffer(L"hello2"); EXPECT_TRUE(t1a->equals(t1b)); EXPECT_TRUE(!t1a->equals(t2)); EXPECT_TRUE(!t2->equals(t1b)); } TEST_F(TermAttributeTest, testCopyTo) { TermAttributePtr t = newLucene(); TermAttributePtr copy = boost::dynamic_pointer_cast(checkCopyIsEqual(t)); EXPECT_EQ(L"", t->term()); EXPECT_EQ(L"", copy->term()); t = newLucene(); t->setTermBuffer(L"hello"); CharArray buf = t->termBuffer(); copy = boost::dynamic_pointer_cast(checkCopyIsEqual(t)); EXPECT_EQ(t->term(), copy->term()); EXPECT_TRUE(buf != copy->termBuffer()); } LucenePlusPlus-rel_3.0.9/src/test/contrib/000077500000000000000000000000001456444476200205365ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/000077500000000000000000000000001456444476200225465ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/000077500000000000000000000000001456444476200240365ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/000077500000000000000000000000001456444476200256615ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/ar/000077500000000000000000000000001456444476200262635ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/ar/ArabicAnalyzerTest.cpp000066400000000000000000000152271456444476200325250ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "ArabicAnalyzer.h" using namespace Lucene; typedef BaseTokenStreamFixture ArabicAnalyzerTest; /// Some simple tests showing some features of the analyzer, how some regular forms will conflate TEST_F(ArabicAnalyzerTest, testBasicFeatures1) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } TEST_F(ArabicAnalyzerTest, testBasicFeatures2) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1, 0xd8, 0xa9}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } TEST_F(ArabicAnalyzerTest, testBasicFeatures3) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd8, 0xb4, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8}; const uint8_t second[] = {0xd9, 0x85, 0xd8, 0xb4, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } TEST_F(ArabicAnalyzerTest, testBasicFeatures4) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd8, 0xb4, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xaa}; const uint8_t second[] = {0xd9, 0x85, 0xd8, 0xb4, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } TEST_F(ArabicAnalyzerTest, testBasicFeatures5) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa3, 0xd9, 0x85, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x83, 0xd9, 0x8a, 0xd9, 0x8a, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x83}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } TEST_F(ArabicAnalyzerTest, testBasicFeatures6) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x83, 0xd9, 0x8a}; const uint8_t second[] = {0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x83}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } TEST_F(ArabicAnalyzerTest, testBasicFeatures7) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } TEST_F(ArabicAnalyzerTest, testBasicFeatures8) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa7, 0xd9, 0x84, 0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } TEST_F(ArabicAnalyzerTest, testBasicFeatures9) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd8, 0xa7, 0x20, 0xd9, 0x85, 0xd9, 0x84, 0xd9, 0x83, 0xd8, 0xaa, 0x20, 0xd8, 0xa3, 0xd9, 0x8a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x83, 0xd9, 0x85}; const uint8_t second[] = {0xd9, 0x85, 0xd9, 0x84, 0xd9, 0x83, 0xd8, 0xaa}; const uint8_t third[] = {0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x83, 0xd9, 0x85}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second), UTF8_TO_STRING(third))); } TEST_F(ArabicAnalyzerTest, testBasicFeatures10) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xb0, 0xd9, 0x8a, 0xd9, 0x86, 0x20, 0xd9, 0x85, 0xd9, 0x84, 0xd9, 0x83, 0xd8, 0xaa, 0x20, 0xd8, 0xa3, 0xd9, 0x8a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x83, 0xd9, 0x85}; const uint8_t second[] = {0xd9, 0x85, 0xd9, 0x84, 0xd9, 0x83, 0xd8, 0xaa}; const uint8_t third[] = {0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x83, 0xd9, 0x85}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second), UTF8_TO_STRING(third))); } /// Simple tests to show things are getting reset correctly, etc. TEST_F(ArabicAnalyzerTest, testReusableTokenStream1) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; checkAnalyzesToReuse(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } TEST_F(ArabicAnalyzerTest, testReusableTokenStream2) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1, 0xd8, 0xa9}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; checkAnalyzesToReuse(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// Non-arabic text gets treated in a similar way as SimpleAnalyzer. TEST_F(ArabicAnalyzerTest, testEnglishInput) { checkAnalyzesTo(newLucene(LuceneVersion::LUCENE_CURRENT), L"English text.", newCollection(L"english", L"text")); } /// Test that custom stopwords work, and are not case-sensitive. TEST_F(ArabicAnalyzerTest, testCustomStopwords) { Collection stopWords = newCollection(L"the", L"and", L"a"); ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT, HashSet::newInstance(stopWords.begin(), stopWords.end())); checkAnalyzesTo(a, L"The quick brown fox.", newCollection(L"quick", L"brown", L"fox")); } ArabicNormalizationFilterTest.cpp000066400000000000000000000115131456444476200346470ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/ar///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "ArabicLetterTokenizer.h" #include "ArabicNormalizationFilter.h" #include "StringReader.h" using namespace Lucene; class ArabicNormalizationFilterTest : public BaseTokenStreamFixture { public: virtual ~ArabicNormalizationFilterTest() { } public: void check(const String& input, const String& expected) { ArabicLetterTokenizerPtr tokenStream = newLucene(newLucene(input)); ArabicNormalizationFilterPtr filter = newLucene(tokenStream); checkTokenStreamContents(filter, newCollection(expected)); } }; TEST_F(ArabicNormalizationFilterTest, testAlifMadda) { const uint8_t first[] = {0xd8, 0xa2, 0xd8, 0xac, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xa7, 0xd8, 0xac, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicNormalizationFilterTest, testAlifHamzaAbove) { const uint8_t first[] = {0xd8, 0xa3, 0xd8, 0xad, 0xd9, 0x85, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xa7, 0xd8, 0xad, 0xd9, 0x85, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicNormalizationFilterTest, testAlifHamzaBelow) { const uint8_t first[] = {0xd8, 0xa5, 0xd8, 0xb9, 0xd8, 0xa7, 0xd8, 0xb0}; const uint8_t second[] = {0xd8, 0xa7, 0xd8, 0xb9, 0xd8, 0xa7, 0xd8, 0xb0}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicNormalizationFilterTest, testAlifMaksura) { const uint8_t first[] = {0xd8, 0xa8, 0xd9, 0x86, 0xd9, 0x89}; const uint8_t second[] = {0xd8, 0xa8, 0xd9, 0x86, 0xd9, 0x8a}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicNormalizationFilterTest, testTehMarbuta) { const uint8_t first[] = {0xd9, 0x81, 0xd8, 0xa7, 0xd8, 0xb7, 0xd9, 0x85, 0xd8, 0xa9}; const uint8_t second[] = {0xd9, 0x81, 0xd8, 0xa7, 0xd8, 0xb7, 0xd9, 0x85, 0xd9, 0x87}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicNormalizationFilterTest, testTatweel) { const uint8_t first[] = {0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8, 0xd8, 0xb1, 0xd9, 0x80, 0xd9, 0x80, 0xd9, 0x80, 0xd9, 0x80, 0xd9, 0x80, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xaa}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicNormalizationFilterTest, testFatha) { const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8e, 0xd8, 0xa8, 0xd9, 0x86, 0xd8, 0xa7}; const uint8_t second[] = {0xd9, 0x85, 0xd8, 0xa8, 0xd9, 0x86, 0xd8, 0xa7}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicNormalizationFilterTest, testKasra) { const uint8_t first[] = {0xd8, 0xb9, 0xd9, 0x84, 0xd9, 0x90, 0xd9, 0x8a}; const uint8_t second[] = {0xd8, 0xb9, 0xd9, 0x84, 0xd9, 0x8a}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicNormalizationFilterTest, testDamma) { const uint8_t first[] = {0xd8, 0xa8, 0xd9, 0x8f, 0xd9, 0x88, 0xd8, 0xa7, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xa7, 0xd8, 0xaa}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicNormalizationFilterTest, testFathatan) { const uint8_t first[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, 0x8b}; const uint8_t second[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf, 0xd8, 0xa7}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicNormalizationFilterTest, testKasratan) { const uint8_t first[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf, 0xd9, 0x8d}; const uint8_t second[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicNormalizationFilterTest, testDammatan) { const uint8_t first[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf, 0xd9, 0x8c}; const uint8_t second[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicNormalizationFilterTest, testSukun) { const uint8_t first[] = {0xd9, 0x86, 0xd9, 0x84, 0xd9, 0x92, 0xd8, 0xb3, 0xd9, 0x88, 0xd9, 0x86}; const uint8_t second[] = {0xd9, 0x86, 0xd9, 0x84, 0xd8, 0xb3, 0xd9, 0x88, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicNormalizationFilterTest, testShaddah) { const uint8_t first[] = {0xd9, 0x87, 0xd8, 0xaa, 0xd9, 0x85, 0xd9, 0x8a, 0xd9, 0x91}; const uint8_t second[] = {0xd9, 0x87, 0xd8, 0xaa, 0xd9, 0x85, 0xd9, 0x8a}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/ar/ArabicStemFilterTest.cpp000066400000000000000000000146771456444476200330260ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "ArabicStemFilter.h" #include "ArabicLetterTokenizer.h" #include "StringReader.h" using namespace Lucene; class ArabicStemFilterTest : public BaseTokenStreamFixture { public: virtual ~ArabicStemFilterTest() { } public: void check(const String& input, const String& expected) { ArabicLetterTokenizerPtr tokenStream = newLucene(newLucene(input)); ArabicStemFilterPtr filter = newLucene(tokenStream); checkTokenStreamContents(filter, newCollection(expected)); } }; TEST_F(ArabicStemFilterTest, testAlPrefix) { const uint8_t first[] = {0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testWalPrefix) { const uint8_t first[] = {0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testBalPrefix) { const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testKalPrefix) { const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testFalPrefix) { const uint8_t first[] = {0xd9, 0x81, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testLlPrefix) { const uint8_t first[] = {0xd9, 0x84, 0xd9, 0x84, 0xd8, 0xa7, 0xd8, 0xae, 0xd8, 0xb1}; const uint8_t second[] = {0xd8, 0xa7, 0xd8, 0xae, 0xd8, 0xb1}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testWaPrefix) { const uint8_t first[] = {0xd9, 0x88, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testAhSuffix) { const uint8_t first[] = {0xd8, 0xb2, 0xd9, 0x88, 0xd8, 0xac, 0xd9, 0x87, 0xd8, 0xa7}; const uint8_t second[] = {0xd8, 0xb2, 0xd9, 0x88, 0xd8, 0xac}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testAnSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testAtSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testWnSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x88, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testYnSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x8a, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testYhSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x8a, 0xd9, 0x87}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testYpSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x8a, 0xd8, 0xa9}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testHSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x87}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testPSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd8, 0xa9}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testYSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x8a}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testComboPrefSuf) { const uint8_t first[] = {0xd9, 0x88, 0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x88, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testComboSuf) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x87, 0xd8, 0xa7, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testShouldntStem) { const uint8_t first[] = {0xd8, 0xa7, 0xd9, 0x84, 0xd9, 0x88}; const uint8_t second[] = {0xd8, 0xa7, 0xd9, 0x84, 0xd9, 0x88}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(ArabicStemFilterTest, testNonArabic) { check(L"English", L"English"); } LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/br/000077500000000000000000000000001456444476200262645ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/br/BrazilianStemmerTest.cpp000066400000000000000000000210171456444476200331010ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "BrazilianAnalyzer.h" using namespace Lucene; class BrazilianStemmerTest : public BaseTokenStreamFixture { public: virtual ~BrazilianStemmerTest() { } public: void check(const String& input, const String& expected) { checkOneTerm(newLucene(LuceneVersion::LUCENE_CURRENT), input, expected); } void checkReuse(const AnalyzerPtr& a, const String& input, const String& expected) { checkOneTermReuse(a, input, expected); } }; /// Test the Brazilian Stem Filter, which only modifies the term text. /// It is very similar to the snowball Portuguese algorithm but not exactly the same. TEST_F(BrazilianStemmerTest, testWithSnowballExamples) { check(L"boa", L"boa"); check(L"boainain", L"boainain"); check(L"boas", L"boas"); const uint8_t boas[] = {0x62, 0xc3, 0xb4, 0x61, 0x73}; check(UTF8_TO_STRING(boas), L"boas"); // removes diacritic: different from snowball Portuguese check(L"boassu", L"boassu"); check(L"boataria", L"boat"); check(L"boate", L"boat"); check(L"boates", L"boat"); check(L"boatos", L"boat"); check(L"bob", L"bob"); check(L"boba", L"bob"); check(L"bobagem", L"bobag"); check(L"bobagens", L"bobagens"); const uint8_t bobalho[] = {0x62, 0x6f, 0x62, 0x61, 0x6c, 0x68, 0xc3, 0xb5, 0x65, 0x73}; check(UTF8_TO_STRING(bobalho), L"bobalho"); // removes diacritic: different from snowball Portuguese check(L"bobear", L"bob"); check(L"bobeira", L"bobeir"); check(L"bobinho", L"bobinh"); check(L"bobinhos", L"bobinh"); check(L"bobo", L"bob"); check(L"bobs", L"bobs"); check(L"boca", L"boc"); check(L"bocadas", L"boc"); check(L"bocadinho", L"bocadinh"); check(L"bocado", L"boc"); const uint8_t bocaiuv[] = {0x62, 0x6f, 0x63, 0x61, 0x69, 0xc3, 0xba, 0x76, 0x61}; check(UTF8_TO_STRING(bocaiuv), L"bocaiuv"); // removes diacritic: different from snowball Portuguese const uint8_t bocal[] = {0x62, 0x6f, 0xc3, 0xa7, 0x61, 0x6c}; check(UTF8_TO_STRING(bocal), L"bocal"); // removes diacritic: different from snowball Portuguese check(L"bocarra", L"bocarr"); check(L"bocas", L"boc"); check(L"bode", L"bod"); check(L"bodoque", L"bodoqu"); check(L"body", L"body"); check(L"boeing", L"boeing"); check(L"boem", L"boem"); check(L"boemia", L"boem"); const uint8_t boemi[] = {0x62, 0x6f, 0xc3, 0xaa, 0x6d, 0x69, 0x6f}; check(UTF8_TO_STRING(boemi), L"boemi"); // removes diacritic: different from snowball Portuguese const uint8_t bogot[] = {0x62, 0x6f, 0x67, 0x6f, 0x74, 0xc3, 0xa1}; check(UTF8_TO_STRING(bogot), L"bogot"); // removes diacritic: different from snowball Portuguese check(L"boi", L"boi"); const uint8_t boi[] = {0x62, 0xc3, 0xb3, 0x69, 0x61}; check(UTF8_TO_STRING(boi), L"boi"); // removes diacritic: different from snowball Portuguese check(L"boiando", L"boi"); check(L"quiabo", L"quiab"); check(L"quicaram", L"quic"); check(L"quickly", L"quickly"); check(L"quieto", L"quiet"); check(L"quietos", L"quiet"); check(L"quilate", L"quilat"); check(L"quilates", L"quilat"); check(L"quilinhos", L"quilinh"); check(L"quilo", L"quil"); check(L"quilombo", L"quilomb"); const uint8_t quilometricas[] = {0x71, 0x75, 0x69, 0x6c, 0x6f, 0x6d, 0xc3, 0xa9, 0x74, 0x72, 0x69, 0x63, 0x61, 0x73}; check(UTF8_TO_STRING(quilometricas), L"quilometr"); // removes diacritic: different from snowball Portuguese const uint8_t quilometricos[] = {0x71, 0x75, 0x69, 0x6c, 0x6f, 0x6d, 0xc3, 0xa9, 0x74, 0x72, 0x69, 0x63, 0x6f, 0x73}; check(UTF8_TO_STRING(quilometricos), L"quilometr"); // removes diacritic: different from snowball Portuguese const uint8_t quilometro[] = {0x71, 0x75, 0x69, 0x6c, 0xc3, 0xb4, 0x6d, 0x65, 0x74, 0x72, 0x6f}; check(UTF8_TO_STRING(quilometro), L"quilometr"); // removes diacritic: different from snowball Portuguese const uint8_t quilometros[] = {0x71, 0x75, 0x69, 0x6c, 0xc3, 0xb4, 0x6d, 0x65, 0x74, 0x72, 0x6f, 0x73}; check(UTF8_TO_STRING(quilometros), L"quilometr"); // removes diacritic: different from snowball Portuguese check(L"quilos", L"quil"); check(L"quimica", L"quimic"); check(L"quilos", L"quil"); check(L"quimica", L"quimic"); check(L"quimicas", L"quimic"); check(L"quimico", L"quimic"); check(L"quimicos", L"quimic"); check(L"quimioterapia", L"quimioterap"); const uint8_t quimioterap[] = {0x71, 0x75, 0x69, 0x6d, 0x69, 0x6f, 0x74, 0x65, 0x72, 0xc3, 0xa1, 0x70, 0x69, 0x63, 0x6f, 0x73}; check(UTF8_TO_STRING(quimioterap), L"quimioterap"); // removes diacritic: different from snowball Portuguese check(L"quimono", L"quimon"); check(L"quincas", L"quinc"); const uint8_t quinha[] = {0x71, 0x75, 0x69, 0x6e, 0x68, 0xc3, 0xa3, 0x6f}; check(UTF8_TO_STRING(quinha), L"quinha"); // removes diacritic: different from snowball Portuguese check(L"quinhentos", L"quinhent"); check(L"quinn", L"quinn"); check(L"quino", L"quin"); check(L"quinta", L"quint"); check(L"quintal", L"quintal"); check(L"quintana", L"quintan"); check(L"quintanilha", L"quintanilh"); const uint8_t quinta[] = {0x71, 0x75, 0x69, 0x6e, 0x74, 0xc3, 0xa3, 0x6f}; check(UTF8_TO_STRING(quinta), L"quinta"); // removes diacritic: different from snowball Portuguese const uint8_t quintessente[] = {0x71, 0x75, 0x69, 0x6e, 0x74, 0x65, 0x73, 0x73, 0xc3, 0xaa, 0x6e, 0x63, 0x69, 0x61}; check(UTF8_TO_STRING(quintessente), L"quintessente"); // removes diacritic: different from snowball Portuguese check(L"quintino", L"quintin"); check(L"quinto", L"quint"); check(L"quintos", L"quint"); check(L"quintuplicou", L"quintuplic"); check(L"quinze", L"quinz"); check(L"quinzena", L"quinzen"); check(L"quiosque", L"quiosqu"); } TEST_F(BrazilianStemmerTest, testNormalization) { check(L"Brasil", L"brasil"); // lowercase by default const uint8_t brasil[] = {0x42, 0x72, 0x61, 0x73, 0xc3, 0xad, 0x6c, 0x69, 0x61}; check(UTF8_TO_STRING(brasil), L"brasil"); // remove diacritics const uint8_t quimio5terapicos[] = {0x71, 0x75, 0x69, 0x6d, 0x69, 0x6f, 0x35, 0x74, 0x65, 0x72, 0xc3, 0xa1, 0x70, 0x69, 0x63, 0x6f, 0x73}; check(UTF8_TO_STRING(quimio5terapicos), L"quimio5terapicos"); // contains non-letter, diacritic will still be removed const uint8_t aa[] = {0xc3, 0xa1, 0xc3, 0xa1}; check(UTF8_TO_STRING(aa), UTF8_TO_STRING(aa)); // token is too short: diacritics are not removed const uint8_t aaa[] = {0xc3, 0xa1, 0xc3, 0xa1, 0xc3, 0xa1}; check(UTF8_TO_STRING(aaa), L"aaa"); // normally, diacritics are removed } TEST_F(BrazilianStemmerTest, testReusableTokenStream) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkReuse(a, L"boa", L"boa"); checkReuse(a, L"boainain", L"boainain"); checkReuse(a, L"boas", L"boas"); const uint8_t boas[] = {0x62, 0xc3, 0xb4, 0x61, 0x73}; checkReuse(a, UTF8_TO_STRING(boas), L"boas"); // removes diacritic: different from snowball Portuguese } TEST_F(BrazilianStemmerTest, testStemExclusionTable) { BrazilianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); HashSet exclusions = HashSet::newInstance(); const uint8_t quintessencia[] = {0x71, 0x75, 0x69, 0x6e, 0x74, 0x65, 0x73, 0x73, 0xc3, 0xaa, 0x6e, 0x63, 0x69, 0x61}; exclusions.add(UTF8_TO_STRING(quintessencia)); a->setStemExclusionTable(exclusions); checkReuse(a, UTF8_TO_STRING(quintessencia), UTF8_TO_STRING(quintessencia)); // excluded words will be completely unchanged } /// Test that changes to the exclusion table are applied immediately when using reusable token streams. TEST_F(BrazilianStemmerTest, testExclusionTableReuse) { BrazilianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t quintessencia[] = {0x71, 0x75, 0x69, 0x6e, 0x74, 0x65, 0x73, 0x73, 0xc3, 0xaa, 0x6e, 0x63, 0x69, 0x61}; checkReuse(a, UTF8_TO_STRING(quintessencia), L"quintessente"); HashSet exclusions = HashSet::newInstance(); exclusions.add(UTF8_TO_STRING(quintessencia)); a->setStemExclusionTable(exclusions); checkReuse(a, UTF8_TO_STRING(quintessencia), UTF8_TO_STRING(quintessencia)); } LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/cjk/000077500000000000000000000000001456444476200264305ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/cjk/CJKTokenizerTest.cpp000066400000000000000000000554521456444476200323110ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "CJKTokenizer.h" #include "CJKAnalyzer.h" using namespace Lucene; class CJKTokenizerTest : public BaseTokenStreamFixture { public: virtual ~CJKTokenizerTest() { } public: struct TestToken { TestToken(const String& termText = L"", int32_t start = 0, int32_t end = 0, int32_t type = 0) { this->termText = termText; this->start = start; this->end = end; this->type = CJKTokenizer::TOKEN_TYPE_NAMES[type]; } String termText; int32_t start; int32_t end; String type; }; void checkCJKToken(const String& str, Collection out_tokens) { AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); Collection terms = Collection::newInstance(out_tokens.size()); Collection startOffsets = Collection::newInstance(out_tokens.size()); Collection endOffsets = Collection::newInstance(out_tokens.size()); Collection types = Collection::newInstance(out_tokens.size()); for (int32_t i = 0; i < out_tokens.size(); ++i) { terms[i] = out_tokens[i].termText; startOffsets[i] = out_tokens[i].start; endOffsets[i] = out_tokens[i].end; types[i] = out_tokens[i].type; } checkAnalyzesTo(analyzer, str, terms, startOffsets, endOffsets, types, Collection()); } void checkCJKTokenReusable(const AnalyzerPtr& analyzer, const String& str, Collection out_tokens) { Collection terms = Collection::newInstance(out_tokens.size()); Collection startOffsets = Collection::newInstance(out_tokens.size()); Collection endOffsets = Collection::newInstance(out_tokens.size()); Collection types = Collection::newInstance(out_tokens.size()); for (int32_t i = 0; i < out_tokens.size(); ++i) { terms[i] = out_tokens[i].termText; startOffsets[i] = out_tokens[i].start; endOffsets[i] = out_tokens[i].end; types[i] = out_tokens[i].type; } checkAnalyzesToReuse(analyzer, str, terms, startOffsets, endOffsets, types, Collection()); } }; TEST_F(CJKTokenizerTest, testJa1) { const uint8_t str[] = {0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0xe5, 0x9b, 0x9b, 0xe4, 0xba, 0x94, 0xe5, 0x85, 0xad, 0xe4, 0xb8, 0x83, 0xe5, 0x85, 0xab, 0xe4, 0xb9, 0x9d, 0xe5, 0x8d, 0x81 }; const uint8_t token1[] = {0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c}; const uint8_t token2[] = {0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89}; const uint8_t token3[] = {0xe4, 0xb8, 0x89, 0xe5, 0x9b, 0x9b}; const uint8_t token4[] = {0xe5, 0x9b, 0x9b, 0xe4, 0xba, 0x94}; const uint8_t token5[] = {0xe4, 0xba, 0x94, 0xe5, 0x85, 0xad}; const uint8_t token6[] = {0xe5, 0x85, 0xad, 0xe4, 0xb8, 0x83}; const uint8_t token7[] = {0xe4, 0xb8, 0x83, 0xe5, 0x85, 0xab}; const uint8_t token8[] = {0xe5, 0x85, 0xab, 0xe4, 0xb9, 0x9d}; const uint8_t token9[] = {0xe4, 0xb9, 0x9d, 0xe5, 0x8d, 0x81}; Collection out_tokens = newCollection( TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token5), 4, 6, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token6), 5, 7, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token7), 6, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token8), 7, 9, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token9), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } TEST_F(CJKTokenizerTest, testJa2) { const uint8_t str[] = {0xe4, 0xb8, 0x80, 0x20, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0xe5, 0x9b, 0x9b, 0x20, 0xe4, 0xba, 0x94, 0xe5, 0x85, 0xad, 0xe4, 0xb8, 0x83, 0xe5, 0x85, 0xab, 0xe4, 0xb9, 0x9d, 0x20, 0xe5, 0x8d, 0x81 }; const uint8_t token1[] = {0xe4, 0xb8, 0x80}; const uint8_t token2[] = {0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89}; const uint8_t token3[] = {0xe4, 0xb8, 0x89, 0xe5, 0x9b, 0x9b}; const uint8_t token4[] = {0xe4, 0xba, 0x94, 0xe5, 0x85, 0xad}; const uint8_t token5[] = {0xe5, 0x85, 0xad, 0xe4, 0xb8, 0x83}; const uint8_t token6[] = {0xe4, 0xb8, 0x83, 0xe5, 0x85, 0xab}; const uint8_t token7[] = {0xe5, 0x85, 0xab, 0xe4, 0xb9, 0x9d}; const uint8_t token8[] = {0xe5, 0x8d, 0x81}; Collection out_tokens = newCollection( TestToken(UTF8_TO_STRING(token1), 0, 1, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token2), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token3), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token4), 6, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token5), 7, 9, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token6), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token7), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token8), 12, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } TEST_F(CJKTokenizerTest, testC) { String str = L"abc defgh ijklmn opqrstu vwxy z"; Collection out_tokens = newCollection( TestToken(L"abc", 0, 3, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(L"defgh", 4, 9, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(L"ijklmn", 10, 16, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(L"opqrstu", 17, 24, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(L"vwxy", 25, 29, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(L"z", 30, 31, CJKTokenizer::SINGLE_TOKEN_TYPE) ); checkCJKToken(str, out_tokens); } TEST_F(CJKTokenizerTest, testMix) { const uint8_t str[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a, 0x61, 0x62, 0x63, 0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91, 0xe3, 0x81, 0x93 }; const uint8_t token1[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84}; const uint8_t token2[] = {0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86}; const uint8_t token3[] = {0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88}; const uint8_t token4[] = {0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a}; const uint8_t token6[] = {0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d}; const uint8_t token7[] = {0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f}; const uint8_t token8[] = {0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91}; const uint8_t token9[] = {0xe3, 0x81, 0x91, 0xe3, 0x81, 0x93}; Collection out_tokens = newCollection( TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(L"abc", 5, 8, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token6), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token7), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token8), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token9), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } TEST_F(CJKTokenizerTest, testMix2) { const uint8_t str[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a, 0x61, 0x62, 0xe3, 0x82, 0x93, 0x63, 0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91 }; const uint8_t token1[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84}; const uint8_t token2[] = {0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86}; const uint8_t token3[] = {0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88}; const uint8_t token4[] = {0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a}; const uint8_t token6[] = {0xe3, 0x82, 0x93}; const uint8_t token8[] = {0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d}; const uint8_t token9[] = {0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f}; const uint8_t token10[] = {0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91}; Collection out_tokens = newCollection( TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(L"ab", 5, 7, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token6), 7, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(L"c", 8, 9, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token8), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token9), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token10), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } TEST_F(CJKTokenizerTest, testSingleChar) { const uint8_t str[] = {0xe4, 0xb8, 0x80}; Collection out_tokens = newCollection( TestToken(UTF8_TO_STRING(str), 0, 1, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } /// Full-width text is normalized to half-width TEST_F(CJKTokenizerTest, testFullWidth) { const uint8_t str[] = {0xef, 0xbc, 0xb4, 0xef, 0xbd, 0x85, 0xef, 0xbd, 0x93, 0xef, 0xbd, 0x94, 0x20, 0xef, 0xbc, 0x91, 0xef, 0xbc, 0x92, 0xef, 0xbc, 0x93, 0xef, 0xbc, 0x94 }; Collection out_tokens = newCollection( TestToken(L"test", 0, 4, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(L"1234", 5, 9, CJKTokenizer::SINGLE_TOKEN_TYPE) ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } /// Non-english text (not just CJK) is treated the same as CJK: C1C2 C2C3 TEST_F(CJKTokenizerTest, testNonIdeographic) { const uint8_t str[] = {0xe4, 0xb8, 0x80, 0x20, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xd9, 0x88, 0xd9, 0x8a, 0xd8, 0xb1 }; const uint8_t token1[] = {0xe4, 0xb8, 0x80}; const uint8_t token2[] = {0xd8, 0xb1, 0xd9, 0x88}; const uint8_t token3[] = {0xd9, 0x88, 0xd8, 0xa8}; const uint8_t token4[] = {0xd8, 0xa8, 0xd8, 0xb1}; const uint8_t token5[] = {0xd8, 0xb1, 0xd8, 0xaa}; const uint8_t token6[] = {0xd9, 0x85, 0xd9, 0x88}; const uint8_t token7[] = {0xd9, 0x88, 0xd9, 0x8a}; const uint8_t token8[] = {0xd9, 0x8a, 0xd8, 0xb1}; Collection out_tokens = newCollection( TestToken(UTF8_TO_STRING(token1), 0, 1, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token2), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token3), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token4), 4, 6, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token5), 5, 7, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token6), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token7), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token8), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } /// Non-english text with non-letters (non-spacing marks,etc) is treated as C1C2 C2C3, /// except for words are split around non-letters. TEST_F(CJKTokenizerTest, testNonIdeographicNonLetter) { const uint8_t str[] = {0xe4, 0xb8, 0x80, 0x20, 0xd8, 0xb1, 0xd9, 0x8f, 0xd9, 0x88, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xd9, 0x88, 0xd9, 0x8a, 0xd8, 0xb1 }; const uint8_t token1[] = {0xe4, 0xb8, 0x80}; const uint8_t token2[] = {0xd8, 0xb1}; const uint8_t token3[] = {0xd9, 0x88, 0xd8, 0xa8}; const uint8_t token4[] = {0xd8, 0xa8, 0xd8, 0xb1}; const uint8_t token5[] = {0xd8, 0xb1, 0xd8, 0xaa}; const uint8_t token6[] = {0xd9, 0x85, 0xd9, 0x88}; const uint8_t token7[] = {0xd9, 0x88, 0xd9, 0x8a}; const uint8_t token8[] = {0xd9, 0x8a, 0xd8, 0xb1}; Collection out_tokens = newCollection( TestToken(UTF8_TO_STRING(token1), 0, 1, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token2), 2, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token3), 4, 6, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token4), 5, 7, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token5), 6, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token6), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token7), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token8), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } TEST_F(CJKTokenizerTest, testTokenStream) { AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t token1[] = {0xe4, 0xb8, 0x80, 0xe4, 0xb8, 0x81, 0xe4, 0xb8, 0x82}; const uint8_t token2[] = {0xe4, 0xb8, 0x80, 0xe4, 0xb8, 0x81}; const uint8_t token3[] = {0xe4, 0xb8, 0x81, 0xe4, 0xb8, 0x82}; checkAnalyzesTo(analyzer, UTF8_TO_STRING(token1), newCollection(UTF8_TO_STRING(token2), UTF8_TO_STRING(token3))); } TEST_F(CJKTokenizerTest, testReusableTokenStream) { AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a, 0x61, 0x62, 0x63, 0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91, 0xe3, 0x81, 0x93 }; const uint8_t firstToken1[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84}; const uint8_t firstToken2[] = {0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86}; const uint8_t firstToken3[] = {0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88}; const uint8_t firstToken4[] = {0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a}; const uint8_t firstToken6[] = {0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d}; const uint8_t firstToken7[] = {0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f}; const uint8_t firstToken8[] = {0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91}; const uint8_t firstToken9[] = {0xe3, 0x81, 0x91, 0xe3, 0x81, 0x93}; Collection out_tokens = newCollection( TestToken(UTF8_TO_STRING(firstToken1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(firstToken2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(firstToken3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(firstToken4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(L"abc", 5, 8, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(firstToken6), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(firstToken7), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(firstToken8), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(firstToken9), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKTokenReusable(analyzer, UTF8_TO_STRING(first), out_tokens); const uint8_t second[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a, 0x61, 0x62, 0xe3, 0x82, 0x93, 0x63, 0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91 }; const uint8_t secondToken1[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84}; const uint8_t secondToken2[] = {0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86}; const uint8_t secondToken3[] = {0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88}; const uint8_t secondToken4[] = {0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a}; const uint8_t secondToken6[] = {0xe3, 0x82, 0x93}; const uint8_t secondToken8[] = {0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d}; const uint8_t secondToken9[] = {0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f}; const uint8_t secondToken10[] = {0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91}; Collection out_tokens2 = newCollection( TestToken(UTF8_TO_STRING(secondToken1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(secondToken2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(secondToken3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(secondToken4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(L"ab", 5, 7, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(secondToken6), 7, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(L"c", 8, 9, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(secondToken8), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(secondToken9), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(secondToken10), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) ); checkCJKTokenReusable(analyzer, UTF8_TO_STRING(second), out_tokens2); } TEST_F(CJKTokenizerTest, testFinalOffset) { const uint8_t token1[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84}; checkCJKToken(UTF8_TO_STRING(token1), newCollection(TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE))); const uint8_t token2[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0x20, 0x20, 0x20}; checkCJKToken(UTF8_TO_STRING(token2), newCollection(TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE))); checkCJKToken(L"test", newCollection(TestToken(L"test", 0, 4, CJKTokenizer::SINGLE_TOKEN_TYPE))); checkCJKToken(L"test ", newCollection(TestToken(L"test", 0, 4, CJKTokenizer::SINGLE_TOKEN_TYPE))); const uint8_t token3[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0x74, 0x65, 0x73, 0x74}; checkCJKToken(UTF8_TO_STRING(token3), newCollection( TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), TestToken(L"test", 2, 6, CJKTokenizer::SINGLE_TOKEN_TYPE))); const uint8_t token4[] = {0x74, 0x65, 0x73, 0x74, 0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0x20, 0x20, 0x20, 0x20}; checkCJKToken(UTF8_TO_STRING(token4), newCollection( TestToken(L"test", 0, 4, CJKTokenizer::SINGLE_TOKEN_TYPE), TestToken(UTF8_TO_STRING(token1), 4, 6, CJKTokenizer::DOUBLE_TOKEN_TYPE))); } LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/cn/000077500000000000000000000000001456444476200262615ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/cn/ChineseTokenizerTest.cpp000066400000000000000000000130341456444476200330770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "ChineseTokenizer.h" #include "ChineseFilter.h" #include "ChineseAnalyzer.h" #include "StringReader.h" #include "OffsetAttribute.h" #include "WhitespaceTokenizer.h" using namespace Lucene; /// Analyzer that just uses ChineseTokenizer, not ChineseFilter. /// Convenience to show the behaviour of the tokenizer class JustChineseTokenizerAnalyzer : public Analyzer { public: virtual ~JustChineseTokenizerAnalyzer() { } public: virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader) { return newLucene(reader); } }; /// Analyzer that just uses ChineseFilter, not ChineseTokenizer. /// Convenience to show the behavior of the filter. class JustChineseFilterAnalyzer : public Analyzer { public: virtual ~JustChineseFilterAnalyzer() { } public: virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader) { return newLucene(newLucene(reader)); } }; typedef BaseTokenStreamFixture ChineseTokenizerTest; TEST_F(ChineseTokenizerTest, testOtherLetterOffset) { const uint8_t token[] = {0x61, 0xe5, 0xa4, 0xa9, 0x62}; ChineseTokenizerPtr tokenizer = newLucene(newLucene(UTF8_TO_STRING(token))); int32_t correctStartOffset = 0; int32_t correctEndOffset = 1; OffsetAttributePtr offsetAtt = tokenizer->getAttribute(); while (tokenizer->incrementToken()) { EXPECT_EQ(correctStartOffset, offsetAtt->startOffset()); EXPECT_EQ(correctEndOffset, offsetAtt->endOffset()); ++correctStartOffset; ++correctEndOffset; } } TEST_F(ChineseTokenizerTest, testReusableTokenStream1) { AnalyzerPtr a = newLucene(); const uint8_t input[] = {0xe4, 0xb8, 0xad, 0xe5, 0x8d, 0x8e, 0xe4, 0xba, 0xba, 0xe6, 0xb0, 0x91, 0xe5, 0x85, 0xb1, 0xe5, 0x92, 0x8c, 0xe5, 0x9b, 0xbd }; const uint8_t token1[] = {0xe4, 0xb8, 0xad}; const uint8_t token2[] = {0xe5, 0x8d, 0x8e}; const uint8_t token3[] = {0xe4, 0xba, 0xba}; const uint8_t token4[] = {0xe6, 0xb0, 0x91}; const uint8_t token5[] = {0xe5, 0x85, 0xb1}; const uint8_t token6[] = {0xe5, 0x92, 0x8c}; const uint8_t token7[] = {0xe5, 0x9b, 0xbd}; checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3), UTF8_TO_STRING(token4), UTF8_TO_STRING(token5), UTF8_TO_STRING(token6), UTF8_TO_STRING(token7) ), newCollection(0, 1, 2, 3, 4, 5, 6), newCollection(1, 2, 3, 4, 5, 6, 7)); } TEST_F(ChineseTokenizerTest, testReusableTokenStream2) { AnalyzerPtr a = newLucene(); const uint8_t input[] = {0xe5, 0x8c, 0x97, 0xe4, 0xba, 0xac, 0xe5, 0xb8, 0x82}; const uint8_t token1[] = {0xe5, 0x8c, 0x97}; const uint8_t token2[] = {0xe4, 0xba, 0xac}; const uint8_t token3[] = {0xe5, 0xb8, 0x82}; checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3) ), newCollection(0, 1, 2), newCollection(1, 2, 3)); } /// ChineseTokenizer tokenizes numbers as one token, but they are filtered by ChineseFilter TEST_F(ChineseTokenizerTest, testNumerics) { AnalyzerPtr justTokenizer = newLucene(); const uint8_t input[] = {0xe4, 0xb8, 0xad, 0x31, 0x32, 0x33, 0x34}; const uint8_t token1[] = {0xe4, 0xb8, 0xad}; checkAnalyzesTo(justTokenizer, UTF8_TO_STRING(input), newCollection(UTF8_TO_STRING(token1), L"1234")); // in this case the ChineseAnalyzer (which applies ChineseFilter) will not remove the numeric token. AnalyzerPtr a = newLucene(); checkAnalyzesTo(a, UTF8_TO_STRING(input), newCollection(UTF8_TO_STRING(token1), L"1234")); } /// ChineseTokenizer tokenizes english similar to SimpleAnalyzer. /// It will lowercase terms automatically. /// /// ChineseFilter has an english stopword list, it also removes any single character tokens. /// The stopword list is case-sensitive. TEST_F(ChineseTokenizerTest, testEnglish) { AnalyzerPtr chinese = newLucene(); checkAnalyzesTo(chinese, L"This is a Test. b c d", newCollection(L"test")); AnalyzerPtr justTokenizer = newLucene(); checkAnalyzesTo(justTokenizer, L"This is a Test. b c d", newCollection(L"this", L"is", L"a", L"test", L"b", L"c", L"d")); AnalyzerPtr justFilter = newLucene(); checkAnalyzesTo(justFilter, L"This is a Test. b c d", newCollection(L"This", L"Test.")); } LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/cz/000077500000000000000000000000001456444476200262755ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/cz/CzechAnalyzerTest.cpp000066400000000000000000000031201456444476200323770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "CzechAnalyzer.h" using namespace Lucene; typedef BaseTokenStreamFixture CzechAnalyzerTest; TEST_F(CzechAnalyzerTest, testStopWord) { CzechAnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(analyzer, L"Pokud mluvime o volnem", newCollection(L"mluvime", L"volnem")); } TEST_F(CzechAnalyzerTest, testReusableTokenStream1) { CzechAnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesToReuse(analyzer, L"Pokud mluvime o volnem", newCollection(L"mluvime", L"volnem")); } TEST_F(CzechAnalyzerTest, testReusableTokenStream2) { CzechAnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = {0xc4, 0x8c, 0x65, 0x73, 0x6b, 0xc3, 0xa1, 0x20, 0x52, 0x65, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x6b, 0x61 }; const uint8_t token1[] = {0xc4, 0x8d, 0x65, 0x73, 0x6b, 0xc3, 0xa1}; const uint8_t token2[] = {0x72, 0x65, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x6b, 0x61}; checkAnalyzesToReuse(analyzer, UTF8_TO_STRING(input), newCollection(UTF8_TO_STRING(token1), UTF8_TO_STRING(token2))); } LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/de/000077500000000000000000000000001456444476200262515ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/de/GermanStemFilterTest.cpp000066400000000000000000000110101456444476200330160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "GermanAnalyzer.h" #include "WhitespaceTokenizer.h" using namespace Lucene; class GermanStemFilterTest : public BaseTokenStreamFixture { public: virtual ~GermanStemFilterTest() { } public: void check(const String& input, const String& expected) { checkOneTerm(newLucene(LuceneVersion::LUCENE_CURRENT), input, expected); } void checkReuse(const AnalyzerPtr& a, const String& input, const String& expected) { checkOneTermReuse(a, input, expected); } }; /// Test the German stemmer. The stemming algorithm is known to work less than perfect, as it doesn't /// use any word lists with exceptions. We also check some of the cases where the algorithm is wrong. TEST_F(GermanStemFilterTest, testStemming) { const uint8_t haufig[] = {0x68, 0xc3, 0xa4, 0x75, 0x66, 0x69, 0x67}; check(UTF8_TO_STRING(haufig), L"haufig"); // German special characters are replaced const uint8_t abschliess1[] = {0x61, 0x62, 0x73, 0x63, 0x68, 0x6c, 0x69, 0x65, 0xc3, 0x9f, 0x65, 0x6e}; check(UTF8_TO_STRING(abschliess1), L"abschliess"); // here the stemmer works okay, it maps related words to the same stem const uint8_t abschliess2[] = {0x61, 0x62, 0x73, 0x63, 0x68, 0x6c, 0x69, 0x65, 0xc3, 0x9f, 0x65, 0x6e, 0x64, 0x65, 0x72}; check(UTF8_TO_STRING(abschliess2), L"abschliess"); // here the stemmer works okay, it maps related words to the same stem const uint8_t abschliess3[] = {0x61, 0x62, 0x73, 0x63, 0x68, 0x6c, 0x69, 0x65, 0xc3, 0x9f, 0x65, 0x6e, 0x64, 0x65, 0x73}; check(UTF8_TO_STRING(abschliess3), L"abschliess"); // here the stemmer works okay, it maps related words to the same stem const uint8_t abschliess4[] = {0x61, 0x62, 0x73, 0x63, 0x68, 0x6c, 0x69, 0x65, 0xc3, 0x9f, 0x65, 0x6e, 0x64, 0x65, 0x6e}; check(UTF8_TO_STRING(abschliess4), L"abschliess"); // here the stemmer works okay, it maps related words to the same stem check(L"Tisch", L"tisch"); check(L"Tische", L"tisch"); check(L"Tischen", L"tisch"); check(L"Haus", L"hau"); check(L"Hauses", L"hau"); const uint8_t hau1[] = {0x48, 0xc3, 0xa4, 0x75, 0x73, 0x65, 0x72}; check(UTF8_TO_STRING(hau1), L"hau"); const uint8_t hau2[] = {0x48, 0xc3, 0xa4, 0x75, 0x73, 0x65, 0x72, 0x6e}; check(UTF8_TO_STRING(hau2), L"hau"); // Here's a case where overstemming occurs, ie. a word is mapped to the same stem as unrelated words check(L"hauen", L"hau"); // Here's a case where understemming occurs, i.e. two related words are not mapped to the same stem. // This is the case with basically all irregular forms check(L"Drama", L"drama"); check(L"Dramen", L"dram"); const uint8_t ausmass[] = {0x41, 0x75, 0x73, 0x6d, 0x61, 0xc3, 0x9f}; check(UTF8_TO_STRING(ausmass), L"ausmass"); // Fake words to test if suffixes are cut off check(L"xxxxxe", L"xxxxx"); check(L"xxxxxs", L"xxxxx"); check(L"xxxxxn", L"xxxxx"); check(L"xxxxxt", L"xxxxx"); check(L"xxxxxem", L"xxxxx"); check(L"xxxxxet", L"xxxxx"); check(L"xxxxxnd", L"xxxxx"); // The suffixes are also removed when combined check(L"xxxxxetende", L"xxxxx"); // Words that are shorter than four charcters are not changed check(L"xxe", L"xxe"); // -em and -er are not removed from words shorter than five characters check(L"xxem", L"xxem"); check(L"xxer", L"xxer"); // -nd is not removed from words shorter than six characters check(L"xxxnd", L"xxxnd"); } TEST_F(GermanStemFilterTest, testReusableTokenStream) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkReuse(a, L"Tisch", L"tisch"); checkReuse(a, L"Tische", L"tisch"); checkReuse(a, L"Tischen", L"tisch"); } /// Test that changes to the exclusion table are applied immediately when using reusable token streams. TEST_F(GermanStemFilterTest, testExclusionTableReuse) { GermanAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkReuse(a, L"tischen", L"tisch"); HashSet exclusions = HashSet::newInstance(); exclusions.add(L"tischen"); a->setStemExclusionTable(exclusions); checkReuse(a, L"tischen", L"tischen"); } LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/el/000077500000000000000000000000001456444476200262615ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/el/GreekAnalyzerTest.cpp000066400000000000000000000277151456444476200324040ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "GreekAnalyzer.h" using namespace Lucene; typedef BaseTokenStreamFixture GreekAnalyzerTest; TEST_F(GreekAnalyzerTest, testAnalyzer1) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = { 0xce, 0x9c, 0xce, 0xaf, 0xce, 0xb1, 0x20, 0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xac, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xbb, 0xce, 0xae, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xbf, 0xcf, 0x8d, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1, 0x20, 0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xac, 0x20, 0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, 0xcf, 0x84, 0xce, 0xae, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd, 0x20, 0xcf, 0x84, 0xce, 0xb7, 0xcf, 0x82, 0x20, 0xce, 0x95, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xae, 0xcf, 0x82, 0x20, 0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x8e, 0xcf, 0x83, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x82 }; const uint8_t token1[] = {0xce, 0xbc, 0xce, 0xb9, 0xce, 0xb1}; const uint8_t token2[] = {0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xb1 }; const uint8_t token3[] = {0xce, 0xba, 0xce, 0xb1, 0xce, 0xbb, 0xce, 0xb7}; const uint8_t token4[] = {0xcf, 0x80, 0xce, 0xbb, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1}; const uint8_t token5[] = {0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb1}; const uint8_t token6[] = {0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, 0xcf, 0x84, 0xce, 0xb7, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd }; const uint8_t token7[] = {0xce, 0xb5, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xb7, 0xcf, 0x83 }; const uint8_t token8[] = {0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x89, 0xcf, 0x83, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x83}; // Verify the correct analysis of capitals and small accented letters checkAnalyzesTo(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3), UTF8_TO_STRING(token4), UTF8_TO_STRING(token5), UTF8_TO_STRING(token6), UTF8_TO_STRING(token7), UTF8_TO_STRING(token8) )); } TEST_F(GreekAnalyzerTest, testAnalyzer2) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = { 0xce, 0xa0, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x8a, 0xcf, 0x8c, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb1, 0x20, 0x28, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x29, 0x20, 0x20, 0x20, 0x20, 0x20, 0x5b, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xad, 0xcf, 0x82, 0x5d, 0x09, 0x2d, 0x09, 0xce, 0x91, 0xce, 0x9d, 0xce, 0x91, 0xce, 0x93, 0xce, 0x9a, 0xce, 0x95, 0xce, 0xa3 }; const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb1 }; const uint8_t token2[] = {0xcf, 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xb5, 0xcf, 0x83 }; const uint8_t token3[] = {0xce, 0xb1, 0xce, 0xbd, 0xce, 0xb1, 0xce, 0xb3, 0xce, 0xba, 0xce, 0xb5, 0xcf, 0x83 }; // Verify the correct analysis of small letters with diaeresis and the elimination of punctuation marks checkAnalyzesTo(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3) )); } TEST_F(GreekAnalyzerTest, testAnalyzer3) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = { 0xce, 0xa0, 0xce, 0xa1, 0xce, 0x9f, 0xce, 0xab, 0xce, 0xa0, 0xce, 0x9f, 0xce, 0x98, 0xce, 0x95, 0xce, 0xa3, 0xce, 0x95, 0xce, 0x99, 0xce, 0xa3, 0x20, 0x20, 0xce, 0x86, 0xcf, 0x88, 0xce, 0xbf, 0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x82, 0x2c, 0x20, 0xce, 0xbf, 0x20, 0xce, 0xbc, 0xce, 0xb5, 0xcf, 0x83, 0xcf, 0x84, 0xcf, 0x8c, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xce, 0xbf, 0xce, 0xb9, 0x20, 0xce, 0xac, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xbf, 0xce, 0xb9 }; const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb8, 0xce, 0xb5, 0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83 }; const uint8_t token2[] = {0xce, 0xb1, 0xcf, 0x88, 0xce, 0xbf, 0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x83}; const uint8_t token3[] = {0xce, 0xbc, 0xce, 0xb5, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x83}; const uint8_t token4[] = {0xce, 0xb1, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xbf, 0xce, 0xb9}; // Verify the correct analysis of capital accented letters and capital letters with diaeresis, // as well as the elimination of stop words checkAnalyzesTo(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3), UTF8_TO_STRING(token4) )); } TEST_F(GreekAnalyzerTest, testReusableTokenStream1) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = { 0xce, 0x9c, 0xce, 0xaf, 0xce, 0xb1, 0x20, 0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xac, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xbb, 0xce, 0xae, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xbf, 0xcf, 0x8d, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1, 0x20, 0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xac, 0x20, 0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, 0xcf, 0x84, 0xce, 0xae, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd, 0x20, 0xcf, 0x84, 0xce, 0xb7, 0xcf, 0x82, 0x20, 0xce, 0x95, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xae, 0xcf, 0x82, 0x20, 0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x8e, 0xcf, 0x83, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x82 }; const uint8_t token1[] = {0xce, 0xbc, 0xce, 0xb9, 0xce, 0xb1}; const uint8_t token2[] = {0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xb1 }; const uint8_t token3[] = {0xce, 0xba, 0xce, 0xb1, 0xce, 0xbb, 0xce, 0xb7}; const uint8_t token4[] = {0xcf, 0x80, 0xce, 0xbb, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1}; const uint8_t token5[] = {0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb1}; const uint8_t token6[] = {0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, 0xcf, 0x84, 0xce, 0xb7, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd }; const uint8_t token7[] = {0xce, 0xb5, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xb7, 0xcf, 0x83 }; const uint8_t token8[] = {0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x89, 0xcf, 0x83, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x83}; // Verify the correct analysis of capitals and small accented letters checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3), UTF8_TO_STRING(token4), UTF8_TO_STRING(token5), UTF8_TO_STRING(token6), UTF8_TO_STRING(token7), UTF8_TO_STRING(token8) )); } TEST_F(GreekAnalyzerTest, testReusableTokenStream2) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = { 0xce, 0xa0, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x8a, 0xcf, 0x8c, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb1, 0x20, 0x28, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x29, 0x20, 0x20, 0x20, 0x20, 0x20, 0x5b, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xad, 0xcf, 0x82, 0x5d, 0x09, 0x2d, 0x09, 0xce, 0x91, 0xce, 0x9d, 0xce, 0x91, 0xce, 0x93, 0xce, 0x9a, 0xce, 0x95, 0xce, 0xa3 }; const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb1 }; const uint8_t token2[] = {0xcf, 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xb5, 0xcf, 0x83 }; const uint8_t token3[] = {0xce, 0xb1, 0xce, 0xbd, 0xce, 0xb1, 0xce, 0xb3, 0xce, 0xba, 0xce, 0xb5, 0xcf, 0x83 }; // Verify the correct analysis of small letters with diaeresis and the elimination of punctuation marks checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3) )); } TEST_F(GreekAnalyzerTest, testReusableTokenStream3) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = { 0xce, 0xa0, 0xce, 0xa1, 0xce, 0x9f, 0xce, 0xab, 0xce, 0xa0, 0xce, 0x9f, 0xce, 0x98, 0xce, 0x95, 0xce, 0xa3, 0xce, 0x95, 0xce, 0x99, 0xce, 0xa3, 0x20, 0x20, 0xce, 0x86, 0xcf, 0x88, 0xce, 0xbf, 0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x82, 0x2c, 0x20, 0xce, 0xbf, 0x20, 0xce, 0xbc, 0xce, 0xb5, 0xcf, 0x83, 0xcf, 0x84, 0xcf, 0x8c, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xce, 0xbf, 0xce, 0xb9, 0x20, 0xce, 0xac, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xbf, 0xce, 0xb9 }; const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb8, 0xce, 0xb5, 0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83 }; const uint8_t token2[] = {0xce, 0xb1, 0xcf, 0x88, 0xce, 0xbf, 0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x83}; const uint8_t token3[] = {0xce, 0xbc, 0xce, 0xb5, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x83}; const uint8_t token4[] = {0xce, 0xb1, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xbf, 0xce, 0xb9}; // Verify the correct analysis of capital accented letters and capital letters with diaeresis, // as well as the elimination of stop words checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3), UTF8_TO_STRING(token4) )); } LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/fa/000077500000000000000000000000001456444476200262475ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/fa/PersianAnalyzerTest.cpp000066400000000000000000001152161456444476200327300ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "PersianAnalyzer.h" using namespace Lucene; typedef BaseTokenStreamFixture PersianAnalyzerTest; /// These tests show how the combination of tokenization (breaking on zero-width /// non-joiner), normalization (such as treating arabic YEH and farsi YEH the /// same), and stopwords creates a light-stemming effect for verbs. /// /// These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar /// active present indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs1) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active preterite indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs2) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective preterite indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs3) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active future indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs4) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active present progressive indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs5) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active preterite progressive indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs6) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active perfect indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs7) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective perfect indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs8) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active pluperfect indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs9) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective pluperfect indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs10) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active preterite subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbs11) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective preterite subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbs12) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active pluperfect subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbs13) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective pluperfect subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbs14) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs15) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs16) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective preterite indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs17) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive perfect indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs18) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective perfect indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs19) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive pluperfect indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs20) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective pluperfect indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs21) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive future indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs22) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present progressive indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs23) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite progressive indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbs24) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbs25) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbs26) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective preterite subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbs27) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive pluperfect subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbs28) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective pluperfect subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbs29) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active present subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbs30) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// This test shows how the combination of tokenization and stopwords creates a /// light-stemming effect for verbs. /// /// In this case, these forms are presented with alternative orthography, using /// arabic yeh and whitespace. This yeh phenomenon is common for legacy text /// due to some previous bugs in Microsoft Windows. /// /// These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar /// active present subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective1) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active preterite indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective2) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective preterite indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective3) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active future indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective4) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active present progressive indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective5) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active preterite progressive indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective6) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active perfect indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective7) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective perfect indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective8) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active pluperfect indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective9) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective pluperfect indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective10) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active preterite subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective11) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective preterite subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective12) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active pluperfect subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective13) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective pluperfect subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective14) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective15) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective16) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective preterite indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective17) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive perfect indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective18) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective perfect indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective19) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive pluperfect indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective20) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective pluperfect indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective21) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive future indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective22) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present progressive indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective23) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite progressive indicative TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective24) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective25) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective26) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective preterite subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective27) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive pluperfect subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective28) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective pluperfect subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective29) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active present subjunctive TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective30) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// These tests show how the combination of tokenization (breaking on zero-width /// non-joiner or space) and stopwords creates a light-stemming effect for /// nouns, removing the plural -ha. TEST_F(PersianAnalyzerTest, testBehaviorNouns1) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf, 0x20, 0xd9, 0x87, 0xd8, 0xa7}; const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } TEST_F(PersianAnalyzerTest, testBehaviorNouns2) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf, 0xe2, 0x80, 0x8c, 0xd9, 0x87, 0xd8, 0xa7}; const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// Test showing that non-Persian text is treated very much like SimpleAnalyzer (lowercased, etc) TEST_F(PersianAnalyzerTest, testBehaviorNonPersian) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(a, L"English test.", newCollection(L"english", L"test")); } TEST_F(PersianAnalyzerTest, testReusableTokenStream1) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesToReuse(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } TEST_F(PersianAnalyzerTest, testReusableTokenStream2) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf, 0xe2, 0x80, 0x8c, 0xd9, 0x87, 0xd8, 0xa7}; const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf}; checkAnalyzesToReuse(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// Test that custom stopwords work, and are not case-sensitive. TEST_F(PersianAnalyzerTest, testCustomStopwords) { Collection stopWords = newCollection(L"the", L"and", L"a"); PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT, HashSet::newInstance(stopWords.begin(), stopWords.end())); checkAnalyzesTo(a, L"The quick brown fox.", newCollection(L"quick", L"brown", L"fox")); } PersianNormalizationFilterTest.cpp000066400000000000000000000052421456444476200350550ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/fa///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "ArabicLetterTokenizer.h" #include "PersianNormalizationFilter.h" #include "StringReader.h" using namespace Lucene; class PersianNormalizationFilterTest : public BaseTokenStreamFixture { public: virtual ~PersianNormalizationFilterTest() { } public: void check(const String& input, const String& expected) { ArabicLetterTokenizerPtr tokenStream = newLucene(newLucene(input)); PersianNormalizationFilterPtr filter = newLucene(tokenStream); checkTokenStreamContents(filter, newCollection(expected)); } }; TEST_F(PersianNormalizationFilterTest, testFarsiYeh) { const uint8_t first[] = {0xd9, 0x87, 0xd8, 0xa7, 0xdb, 0x8c}; const uint8_t second[] = {0xd9, 0x87, 0xd8, 0xa7, 0xd9, 0x8a}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(PersianNormalizationFilterTest, testYehBarree) { const uint8_t first[] = {0xd9, 0x87, 0xd8, 0xa7, 0xdb, 0x92}; const uint8_t second[] = {0xd9, 0x87, 0xd8, 0xa7, 0xd9, 0x8a}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(PersianNormalizationFilterTest, testKeheh) { const uint8_t first[] = {0xda, 0xa9, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaf, 0xd9, 0x86}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaf, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(PersianNormalizationFilterTest, testHehYeh) { const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8, 0xdb, 0x80}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x87}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(PersianNormalizationFilterTest, testHehHamzaAbove) { const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x87, 0xd9, 0x94}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x87}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } TEST_F(PersianNormalizationFilterTest, testHehGoal) { const uint8_t first[] = {0xd8, 0xb2, 0xd8, 0xa7, 0xd8, 0xaf, 0xdb, 0x81}; const uint8_t second[] = {0xd8, 0xb2, 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x87}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/fr/000077500000000000000000000000001456444476200262705ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/fr/ElisionTest.cpp000066400000000000000000000030231456444476200312340ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "StandardTokenizer.h" #include "StringReader.h" #include "ElisionFilter.h" #include "TermAttribute.h" using namespace Lucene; class ElisionTest : public BaseTokenStreamFixture { public: virtual ~ElisionTest() { } public: Collection addTerms(const TokenFilterPtr& filter) { Collection terms = Collection::newInstance(); TermAttributePtr termAtt = filter->getAttribute(); while (filter->incrementToken()) { terms.add(termAtt->term()); } return terms; } }; TEST_F(ElisionTest, testElision) { String test = L"Plop, juste pour voir l'embrouille avec O'brian. M'enfin."; TokenizerPtr tokenizer = newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(test)); HashSet articles = HashSet::newInstance(); articles.add(L"l"); articles.add(L"M"); TokenFilterPtr filter = newLucene(tokenizer, articles); Collection terms = addTerms(filter); EXPECT_EQ(L"embrouille", terms[4]); EXPECT_EQ(L"O'brian", terms[6]); EXPECT_EQ(L"enfin", terms[7]); } LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/fr/FrenchAnalyzerTest.cpp000066400000000000000000000070161456444476200325530ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "FrenchAnalyzer.h" using namespace Lucene; typedef BaseTokenStreamFixture FrenchAnalyzerTest; TEST_F(FrenchAnalyzerTest, testAnalyzer) { AnalyzerPtr fa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(fa, L"", Collection::newInstance()); checkAnalyzesTo(fa, L"chien chat cheval", newCollection(L"chien", L"chat", L"cheval")); checkAnalyzesTo(fa, L"chien CHAT CHEVAL", newCollection(L"chien", L"chat", L"cheval")); checkAnalyzesTo(fa, L" chien ,? + = - CHAT /: > CHEVAL", newCollection(L"chien", L"chat", L"cheval")); checkAnalyzesTo(fa, L"chien++", newCollection(L"chien")); checkAnalyzesTo(fa, L"mot \"entreguillemet\"", newCollection(L"mot", L"entreguillemet")); // let's do some French specific tests now /// I would expect this to stay one term as in French the minus sign is often used for composing words checkAnalyzesTo(fa, L"Jean-Fran\u00e7ois", newCollection(L"jean", L"fran\u00e7ois")); // stopwords checkAnalyzesTo(fa, L"le la chien les aux chat du des \u00e0 cheval", newCollection(L"chien", L"chat", L"cheval")); // some nouns and adjectives checkAnalyzesTo(fa, L"lances chismes habitable chiste \u00e9l\u00e9ments captifs", newCollection( L"lanc", L"chism", L"habit", L"chist", L"\u00e9l\u00e9ment", L"captif")); // some verbs checkAnalyzesTo(fa, L"finissions souffrirent rugissante", newCollection(L"fin", L"souffr", L"rug")); // aujourd'hui stays one term which is OK checkAnalyzesTo(fa, L"C3PO aujourd\'hui oeuf \u00ef\u00e2\u00f6\u00fb\u00e0\u00e4 anticonstitutionnellement Java++ ", newCollection(L"c3po", L"aujourd\'hui", L"oeuf", L"\u00ef\u00e2\u00f6\u00fb\u00e0\u00e4", L"anticonstitutionnel", L"jav")); // here 1940-1945 stays as one term, 1940:1945 not ? checkAnalyzesTo(fa, L"33Bis 1940-1945 1940:1945 (---i+++)*", newCollection(L"33bis", L"1940-1945", L"1940", L"1945", L"i")); } TEST_F(FrenchAnalyzerTest, testReusableTokenStream) { AnalyzerPtr fa = newLucene(LuceneVersion::LUCENE_CURRENT); // stopwords checkAnalyzesToReuse(fa, L"le la chien les aux chat du des \u00e0 cheval", newCollection(L"chien", L"chat", L"cheval")); // some nouns and adjectives checkAnalyzesToReuse(fa, L"lances chismes habitable chiste \u00e9l\u00e9ments captifs", newCollection(L"lanc", L"chism", L"habit", L"chist", L"\u00e9l\u00e9ment", L"captif")); } /// Test that changes to the exclusion table are applied immediately when using reusable token streams. TEST_F(FrenchAnalyzerTest, testExclusionTableReuse) { FrenchAnalyzerPtr fa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesToReuse(fa, L"habitable", newCollection(L"habit")); HashSet exclusions = HashSet::newInstance(); exclusions.add(L"habitable"); fa->setStemExclusionTable(exclusions); checkAnalyzesToReuse(fa, L"habitable", newCollection(L"habitable")); } LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/nl/000077500000000000000000000000001456444476200262725ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/nl/DutchStemmerTest.cpp000066400000000000000000000120161456444476200322420ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "DutchAnalyzer.h" #include "WhitespaceTokenizer.h" using namespace Lucene; class DutchStemmerTest : public BaseTokenStreamFixture { public: virtual ~DutchStemmerTest() { } public: void check(const String& input, const String& expected) { checkOneTerm(newLucene(LuceneVersion::LUCENE_CURRENT), input, expected); } void checkReuse(const AnalyzerPtr& a, const String& input, const String& expected) { checkOneTermReuse(a, input, expected); } }; /// Test the Dutch Stem Filter, which only modifies the term text. /// The code states that it uses the snowball algorithm, but tests reveal some differences. TEST_F(DutchStemmerTest, testWithSnowballExamples) { check(L"lichaamsziek", L"lichaamsziek"); check(L"lichamelijk", L"licham"); check(L"lichamelijke", L"licham"); check(L"lichamelijkheden", L"licham"); check(L"lichamen", L"licham"); check(L"lichere", L"licher"); check(L"licht", L"licht"); check(L"lichtbeeld", L"lichtbeeld"); check(L"lichtbruin", L"lichtbruin"); check(L"lichtdoorlatende", L"lichtdoorlat"); check(L"lichte", L"licht"); check(L"lichten", L"licht"); check(L"lichtende", L"lichtend"); check(L"lichtenvoorde", L"lichtenvoord"); check(L"lichter", L"lichter"); check(L"lichtere", L"lichter"); check(L"lichters", L"lichter"); check(L"lichtgevoeligheid", L"lichtgevoel"); check(L"lichtgewicht", L"lichtgewicht"); check(L"lichtgrijs", L"lichtgrijs"); check(L"lichthoeveelheid", L"lichthoevel"); check(L"lichtintensiteit", L"lichtintensiteit"); check(L"lichtje", L"lichtj"); check(L"lichtjes", L"lichtjes"); check(L"lichtkranten", L"lichtkrant"); check(L"lichtkring", L"lichtkring"); check(L"lichtkringen", L"lichtkring"); check(L"lichtregelsystemen", L"lichtregelsystem"); check(L"lichtste", L"lichtst"); check(L"lichtstromende", L"lichtstrom"); check(L"lichtte", L"licht"); check(L"lichtten", L"licht"); check(L"lichttoetreding", L"lichttoetred"); check(L"lichtverontreinigde", L"lichtverontreinigd"); check(L"lichtzinnige", L"lichtzinn"); check(L"lid", L"lid"); check(L"lidia", L"lidia"); check(L"lidmaatschap", L"lidmaatschap"); check(L"lidstaten", L"lidstat"); check(L"lidvereniging", L"lidveren"); check(L"opgingen", L"opging"); check(L"opglanzing", L"opglanz"); check(L"opglanzingen", L"opglanz"); check(L"opglimlachten", L"opglimlacht"); check(L"opglimpen", L"opglimp"); check(L"opglimpende", L"opglimp"); check(L"opglimping", L"opglimp"); check(L"opglimpingen", L"opglimp"); check(L"opgraven", L"opgrav"); check(L"opgrijnzen", L"opgrijnz"); check(L"opgrijzende", L"opgrijz"); check(L"opgroeien", L"opgroei"); check(L"opgroeiende", L"opgroei"); check(L"opgroeiplaats", L"opgroeiplat"); check(L"ophaal", L"ophal"); check(L"ophaaldienst", L"ophaaldienst"); check(L"ophaalkosten", L"ophaalkost"); check(L"ophaalsystemen", L"ophaalsystem"); check(L"ophaalt", L"ophaalt"); check(L"ophaaltruck", L"ophaaltruck"); check(L"ophalen", L"ophal"); check(L"ophalend", L"ophal"); check(L"ophalers", L"ophaler"); check(L"ophef", L"ophef"); check(L"opheffen", L"ophef"); // versus snowball 'opheff' check(L"opheffende", L"ophef"); // versus snowball 'opheff' check(L"opheffing", L"ophef"); // versus snowball 'opheff' check(L"opheldering", L"ophelder"); check(L"ophemelde", L"ophemeld"); check(L"ophemelen", L"ophemel"); check(L"opheusden", L"opheusd"); check(L"ophief", L"ophief"); check(L"ophield", L"ophield"); check(L"ophieven", L"ophiev"); check(L"ophoepelt", L"ophoepelt"); check(L"ophoog", L"ophog"); check(L"ophoogzand", L"ophoogzand"); check(L"ophopen", L"ophop"); check(L"ophoping", L"ophop"); check(L"ophouden", L"ophoud"); } TEST_F(DutchStemmerTest, testReusableTokenStream) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkReuse(a, L"lichaamsziek", L"lichaamsziek"); checkReuse(a, L"lichamelijk", L"licham"); checkReuse(a, L"lichamelijke", L"licham"); checkReuse(a, L"lichamelijkheden", L"licham"); } /// Test that changes to the exclusion table are applied immediately when using reusable token streams. TEST_F(DutchStemmerTest, testExclusionTableReuse) { DutchAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkReuse(a, L"lichamelijk", L"licham"); HashSet exclusions = HashSet::newInstance(); exclusions.add(L"lichamelijk"); a->setStemExclusionTable(exclusions); checkReuse(a, L"lichamelijk", L"lichamelijk"); } LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/reverse/000077500000000000000000000000001456444476200273345ustar00rootroot00000000000000ReverseStringFilterTest.cpp000066400000000000000000000043671456444476200346030ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/reverse///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "ReverseStringFilter.h" #include "WhitespaceTokenizer.h" #include "StringReader.h" #include "TermAttribute.h" using namespace Lucene; typedef BaseTokenStreamFixture ReverseStringFilterTest; TEST_F(ReverseStringFilterTest, testFilter) { TokenStreamPtr stream = newLucene(newLucene(L"Do have a nice day")); // 1-4 length string ReverseStringFilterPtr filter = newLucene(stream); TermAttributePtr text = filter->getAttribute(); EXPECT_TRUE(filter->incrementToken()); EXPECT_EQ(L"oD", text->term()); EXPECT_TRUE(filter->incrementToken()); EXPECT_EQ(L"evah", text->term()); EXPECT_TRUE(filter->incrementToken()); EXPECT_EQ(L"a", text->term()); EXPECT_TRUE(filter->incrementToken()); EXPECT_EQ(L"ecin", text->term()); EXPECT_TRUE(filter->incrementToken()); EXPECT_EQ(L"yad", text->term()); EXPECT_TRUE(!filter->incrementToken()); } TEST_F(ReverseStringFilterTest, testFilterWithMark) { TokenStreamPtr stream = newLucene(newLucene(L"Do have a nice day")); // 1-4 length string ReverseStringFilterPtr filter = newLucene(stream, (wchar_t)0x0001); TermAttributePtr text = filter->getAttribute(); EXPECT_TRUE(filter->incrementToken()); EXPECT_EQ(String(1, (wchar_t)0x0001) + L"oD", text->term()); EXPECT_TRUE(filter->incrementToken()); EXPECT_EQ(String(1, (wchar_t)0x0001) + L"evah", text->term()); EXPECT_TRUE(filter->incrementToken()); EXPECT_EQ(String(1, (wchar_t)0x0001) + L"a", text->term()); EXPECT_TRUE(filter->incrementToken()); EXPECT_EQ(String(1, (wchar_t)0x0001) + L"ecin", text->term()); EXPECT_TRUE(filter->incrementToken()); EXPECT_EQ(String(1, (wchar_t)0x0001) + L"yad", text->term()); EXPECT_TRUE(!filter->incrementToken()); } LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/ru/000077500000000000000000000000001456444476200263075ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/ru/RussianAnalyzerTest.cpp000066400000000000000000000127721456444476200330160ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "TestUtils.h" #include "BaseTokenStreamFixture.h" #include "RussianAnalyzer.h" #include "RussianLetterTokenizer.h" #include "InputStreamReader.h" #include "FileReader.h" #include "TermAttribute.h" #include "StringReader.h" #include "FileUtils.h" using namespace Lucene; typedef BaseTokenStreamFixture RussianAnalyzerTest; TEST_F(RussianAnalyzerTest, testUnicode) { RussianAnalyzerPtr ra = newLucene(LuceneVersion::LUCENE_CURRENT); String testFile(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"russian"), L"testUTF8.txt")); InputStreamReaderPtr inWords = newLucene(newLucene(testFile)); String sampleFile(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"russian"), L"resUTF8.htm")); InputStreamReaderPtr sampleUnicode = newLucene(newLucene(sampleFile)); TokenStreamPtr in = ra->tokenStream(L"all", inWords); RussianLetterTokenizerPtr sample = newLucene(sampleUnicode); TermAttributePtr text = in->getAttribute(); TermAttributePtr sampleText = sample->getAttribute(); while (true) { if (!in->incrementToken()) { break; } sample->incrementToken(); EXPECT_EQ(text->term(), sampleText->term()); } inWords->close(); sampleUnicode->close(); } TEST_F(RussianAnalyzerTest, testDigitsInRussianCharset) { ReaderPtr reader = newLucene(L"text 1000"); RussianAnalyzerPtr ra = newLucene(LuceneVersion::LUCENE_CURRENT); TokenStreamPtr stream = ra->tokenStream(L"", reader); TermAttributePtr termText = stream->getAttribute(); EXPECT_TRUE(stream->incrementToken()); EXPECT_EQ(L"text", termText->term()); EXPECT_TRUE(stream->incrementToken()); EXPECT_EQ(L"1000", termText->term()); EXPECT_TRUE(!stream->incrementToken()); } TEST_F(RussianAnalyzerTest, testReusableTokenStream1) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = { 0xd0, 0x92, 0xd0, 0xbc, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xb5, 0x20, 0xd1, 0x81, 0x20, 0xd1, 0x82, 0xd0, 0xb5, 0xd0, 0xbc, 0x20, 0xd0, 0xbe, 0x20, 0xd1, 0x81, 0xd0, 0xb8, 0xd0, 0xbb, 0xd0, 0xb5, 0x20, 0xd1, 0x8d, 0xd0, 0xbb, 0xd0, 0xb5, 0xd0, 0xba, 0xd1, 0x82, 0xd1, 0x80, 0xd0, 0xbe, 0xd0, 0xbc, 0xd0, 0xb0, 0xd0, 0xb3, 0xd0, 0xbd, 0xd0, 0xb8, 0xd1, 0x82, 0xd0, 0xbd, 0xd0, 0xbe, 0xd0, 0xb9, 0x20, 0xd1, 0x8d, 0xd0, 0xbd, 0xd0, 0xb5, 0xd1, 0x80, 0xd0, 0xb3, 0xd0, 0xb8, 0xd0, 0xb8, 0x20, 0xd0, 0xb8, 0xd0, 0xbc, 0xd0, 0xb5, 0xd0, 0xbb, 0xd0, 0xb8, 0x20, 0xd0, 0xbf, 0xd1, 0x80, 0xd0, 0xb5, 0xd0, 0xb4, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xb2, 0xd0, 0xbb, 0xd0, 0xb5, 0xd0, 0xbd, 0xd0, 0xb8, 0xd0, 0xb5, 0x20, 0xd0, 0xb5, 0xd1, 0x89, 0xd0, 0xb5 }; const uint8_t token1[] = {0xd0, 0xb2, 0xd0, 0xbc, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82}; const uint8_t token2[] = {0xd1, 0x81, 0xd0, 0xb8, 0xd0, 0xbb}; const uint8_t token3[] = {0xd1, 0x8d, 0xd0, 0xbb, 0xd0, 0xb5, 0xd0, 0xba, 0xd1, 0x82, 0xd1, 0x80, 0xd0, 0xbe, 0xd0, 0xbc, 0xd0, 0xb0, 0xd0, 0xb3, 0xd0, 0xbd, 0xd0, 0xb8, 0xd1, 0x82, 0xd0, 0xbd }; const uint8_t token4[] = {0xd1, 0x8d, 0xd0, 0xbd, 0xd0, 0xb5, 0xd1, 0x80, 0xd0, 0xb3}; const uint8_t token5[] = {0xd0, 0xb8, 0xd0, 0xbc, 0xd0, 0xb5, 0xd0, 0xbb}; const uint8_t token6[] = {0xd0, 0xbf, 0xd1, 0x80, 0xd0, 0xb5, 0xd0, 0xb4, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xb2, 0xd0, 0xbb, 0xd0, 0xb5, 0xd0, 0xbd }; checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3), UTF8_TO_STRING(token4), UTF8_TO_STRING(token5), UTF8_TO_STRING(token6) )); } TEST_F(RussianAnalyzerTest, testReusableTokenStream2) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = { 0xd0, 0x9d, 0xd0, 0xbe, 0x20, 0xd0, 0xb7, 0xd0, 0xbd, 0xd0, 0xb0, 0xd0, 0xbd, 0xd0, 0xb8, 0xd0, 0xb5, 0x20, 0xd1, 0x8d, 0xd1, 0x82, 0xd0, 0xbe, 0x20, 0xd1, 0x85, 0xd1, 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd0, 0xb8, 0xd0, 0xbb, 0xd0, 0xbe, 0xd1, 0x81, 0xd1, 0x8c, 0x20, 0xd0, 0xb2, 0x20, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xb9, 0xd0, 0xbd, 0xd0, 0xb5 }; const uint8_t token1[] = {0xd0, 0xb7, 0xd0, 0xbd, 0xd0, 0xb0, 0xd0, 0xbd}; const uint8_t token2[] = {0xd1, 0x85, 0xd1, 0x80, 0xd0, 0xb0, 0xd0, 0xbd}; const uint8_t token3[] = {0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xb9, 0xd0, 0xbd}; checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( UTF8_TO_STRING(token1), UTF8_TO_STRING(token2), UTF8_TO_STRING(token3) )); } LucenePlusPlus-rel_3.0.9/src/test/contrib/analyzers/common/analysis/ru/RussianStemTest.cpp000066400000000000000000000035251456444476200321350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "TestUtils.h" #include "BaseTokenStreamFixture.h" #include "RussianStemmer.h" #include "FileReader.h" #include "BufferedReader.h" #include "InputStreamReader.h" #include "FileUtils.h" using namespace Lucene; class RussianStemmerTest : public BaseTokenStreamFixture { public: RussianStemmerTest() { words = Collection::newInstance(); stems = Collection::newInstance(); String wordsFile(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"russian"), L"wordsUTF8.txt")); String stemsFile(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"russian"), L"stemsUTF8.txt")); BufferedReaderPtr inWords = newLucene(newLucene(newLucene(wordsFile))); String word; while (inWords->readLine(word)) { words.add(word); } inWords->close(); BufferedReaderPtr inStems = newLucene(newLucene(newLucene(stemsFile))); String stem; while (inStems->readLine(stem)) { stems.add(stem); } inStems->close(); } virtual ~RussianStemmerTest() { } protected: Collection words; Collection stems; }; TEST_F(RussianStemmerTest, testStem) { EXPECT_EQ(words.size(), stems.size()); for (int32_t i = 0; i < words.size(); ++i) { String realStem = RussianStemmer::stemWord(words[i]); EXPECT_EQ(stems[i], realStem); } } LucenePlusPlus-rel_3.0.9/src/test/contrib/highlighter/000077500000000000000000000000001456444476200230345ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/highlighter/HighlighterTest.cpp000066400000000000000000002604671456444476200266550ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "BaseTokenStreamFixture.h" #include "Highlighter.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "IndexReader.h" #include "IndexSearcher.h" #include "StandardAnalyzer.h" #include "Document.h" #include "NumericField.h" #include "SimpleAnalyzer.h" #include "QueryParser.h" #include "TopDocs.h" #include "QueryScorer.h" #include "TokenStream.h" #include "SimpleFragmenter.h" #include "SimpleSpanFragmenter.h" #include "SimpleHTMLFormatter.h" #include "StringReader.h" #include "TokenSources.h" #include "MultiTermQuery.h" #include "WhitespaceAnalyzer.h" #include "TokenGroup.h" #include "NumericRangeQuery.h" #include "PhraseQuery.h" #include "MultiPhraseQuery.h" #include "SpanNearQuery.h" #include "SpanNotQuery.h" #include "SpanTermQuery.h" #include "QueryTermScorer.h" #include "WeightedSpanTerm.h" #include "WeightedTerm.h" #include "BooleanQuery.h" #include "WildcardQuery.h" #include "NullFragmenter.h" #include "TermRangeFilter.h" #include "LowerCaseTokenizer.h" #include "TermAttribute.h" #include "PositionIncrementAttribute.h" #include "OffsetAttribute.h" #include "TextFragment.h" #include "SimpleHTMLEncoder.h" #include "MultiSearcher.h" #include "ScoreDoc.h" #include "Term.h" #include "FilteredQuery.h" #include "Token.h" #include "TermQuery.h" using namespace Lucene; class HighlighterTest; namespace HighlighterTestNS { class TestFormatter : public Formatter, public LuceneObject { public: TestFormatter(HighlighterTest* fixture); virtual ~TestFormatter(); LUCENE_CLASS(TestFormatter); protected: HighlighterTest* fixture; public: virtual String highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup); }; } class HighlighterTest : public BaseTokenStreamFixture { public: HighlighterTest() { numHighlights = 0; analyzer = newLucene(TEST_VERSION); texts = newCollection( L"Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy", L"JFK has been shot", L"John Kennedy has been shot", L"This text has a typo in referring to Keneddy", L"wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", L"y z x y z a b", L"lets is a the lets is a the lets is a the lets" ); ramDir = newLucene(); IndexWriterPtr writer = newLucene(ramDir, newLucene(TEST_VERSION), true, IndexWriter::MaxFieldLengthUNLIMITED); for (int32_t i = 0; i < texts.size(); ++i) { addDoc(writer, texts[i]); } DocumentPtr doc = newLucene(); NumericFieldPtr nfield = newLucene(NUMERIC_FIELD_NAME, Field::STORE_YES, true); nfield->setIntValue(1); doc->add(nfield); writer->addDocument(doc, analyzer); nfield = newLucene(NUMERIC_FIELD_NAME, Field::STORE_YES, true); nfield->setIntValue(3); doc = newLucene(); doc->add(nfield); writer->addDocument(doc, analyzer); nfield = newLucene(NUMERIC_FIELD_NAME, Field::STORE_YES, true); nfield->setIntValue(5); doc = newLucene(); doc->add(nfield); writer->addDocument(doc, analyzer); nfield = newLucene(NUMERIC_FIELD_NAME, Field::STORE_YES, true); nfield->setIntValue(7); doc = newLucene(); doc->add(nfield); writer->addDocument(doc, analyzer); writer->optimize(); writer->close(); reader = IndexReader::open(ramDir, true); dir = newLucene(); a = newLucene(); } virtual ~HighlighterTest() { } public: IndexReaderPtr reader; QueryPtr query; RAMDirectoryPtr ramDir; IndexSearcherPtr searcher; public: int32_t numHighlights; AnalyzerPtr analyzer; TopDocsPtr hits; Collection texts; DirectoryPtr dir; AnalyzerPtr a; static const LuceneVersion::Version TEST_VERSION; static const String FIELD_NAME; static const String NUMERIC_FIELD_NAME; public: void addDoc(const IndexWriterPtr& writer, const String& text) { DocumentPtr doc = newLucene(); FieldPtr field = newLucene(FIELD_NAME, text, Field::STORE_YES, Field::INDEX_ANALYZED); doc->add(field); writer->addDocument(doc); } String highlightField(const QueryPtr& query, const String& fieldName, const String& text) { TokenStreamPtr tokenStream = newLucene(TEST_VERSION)->tokenStream(fieldName, newLucene(text)); // Assuming "", "" used to highlight SimpleHTMLFormatterPtr formatter = newLucene(); QueryScorerPtr scorer = newLucene(query, fieldName, FIELD_NAME); HighlighterPtr highlighter = newLucene(formatter, scorer); highlighter->setTextFragmenter(newLucene(INT_MAX)); String rv = highlighter->getBestFragments(tokenStream, text, 1, L"(FIELD TEXT TRUNCATED)"); return rv.empty() ? text : rv; } void doSearching(const String& queryString) { QueryParserPtr parser = newLucene(TEST_VERSION, FIELD_NAME, analyzer); parser->setEnablePositionIncrements(true); parser->setMultiTermRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); query = parser->parse(queryString); doSearching(query); } void doSearching(const QueryPtr& unReWrittenQuery) { searcher = newLucene(ramDir, true); // for any multi-term queries to work (prefix, wildcard, range,fuzzy etc) you must use a rewritten query query = unReWrittenQuery->rewrite(reader); hits = searcher->search(query, FilterPtr(), 1000); } void checkExpectedHighlightCount(int32_t maxNumFragmentsRequired, int32_t expectedHighlights, Collection expected) { Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); EXPECT_EQ(numHighlights, expectedHighlights); } EXPECT_EQ(results.size(), expected.size()); for (int32_t i = 0; i < results.size(); ++i) { EXPECT_EQ(results[i], expected[i]); } } void makeIndex() { IndexWriterPtr writer = newLucene(dir, a, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(doc(L"t_text1", L"random words for highlighting tests del")); writer->addDocument(doc(L"t_text1", L"more random words for second field del")); writer->addDocument(doc(L"t_text1", L"random words for highlighting tests del")); writer->addDocument(doc(L"t_text1", L"more random words for second field")); writer->optimize(); writer->close(); } DocumentPtr doc(const String& f, const String& v) { DocumentPtr doc = newLucene(); doc->add(newLucene(f, v, Field::STORE_YES, Field::INDEX_ANALYZED)); return doc; } void deleteDocument() { IndexWriterPtr writer = newLucene(dir, a, false, IndexWriter::MaxFieldLengthLIMITED); writer->deleteDocuments(newLucene(L"t_text1", L"del")); writer->close(); } void searchIndex() { String q = L"t_text1:random"; QueryParserPtr parser = newLucene(TEST_VERSION, L"t_text1", a ); QueryPtr query = parser->parse(q); IndexSearcherPtr searcher = newLucene(dir, true); // This scorer can return negative idf -> null fragment HighlighterScorerPtr scorer = newLucene(query, searcher->getIndexReader(), L"t_text1"); HighlighterPtr h = newLucene(scorer); TopDocsPtr hits = searcher->search(query, FilterPtr(), 10); for (int32_t i = 0; i < hits->totalHits; ++i) { DocumentPtr doc = searcher->doc(hits->scoreDocs[i]->doc); String result = h->getBestFragment(a, L"t_text1", doc->get(L"t_text1")); EXPECT_EQ(L"more random words for second field", result); } searcher->close(); } }; const LuceneVersion::Version HighlighterTest::TEST_VERSION = LuceneVersion::LUCENE_CURRENT; const String HighlighterTest::FIELD_NAME = L"contents"; const String HighlighterTest::NUMERIC_FIELD_NAME = L"nfield"; namespace HighlighterTestNS { TestFormatter::TestFormatter(HighlighterTest* fixture) { this->fixture = fixture; } TestFormatter::~TestFormatter() { } String TestFormatter::highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup) { if (tokenGroup->getTotalScore() <= 0) { return originalText; } ++fixture->numHighlights; // update stats used in assertions return L"" + originalText + L""; } DECLARE_SHARED_PTR(TestHighlightRunner) class TestHighlightRunner : public LuceneObject { public: TestHighlightRunner(HighlighterTest* fixture) { this->fixture = fixture; mode = QUERY; frag = newLucene(20); } virtual ~TestHighlightRunner() { } LUCENE_CLASS(TestHighlightRunner); protected: HighlighterTest* fixture; static const int32_t QUERY; static const int32_t QUERY_TERM; public: int32_t mode; FragmenterPtr frag; public: virtual HighlighterPtr getHighlighter(const QueryPtr& query, const String& fieldName, const TokenStreamPtr& stream, const FormatterPtr& formatter) { return getHighlighter(query, fieldName, stream, formatter, true); } virtual HighlighterPtr getHighlighter(const QueryPtr& query, const String& fieldName, const TokenStreamPtr& stream, const FormatterPtr& formatter, bool expanMultiTerm) { HighlighterScorerPtr scorer; if (mode == QUERY) { scorer = newLucene(query, fieldName); if (!expanMultiTerm) { boost::dynamic_pointer_cast(scorer)->setExpandMultiTermQuery(false); } } else if (mode == QUERY_TERM) { scorer = newLucene(query); } else { boost::throw_exception(IllegalArgumentException(L"Unknown highlight mode")); } return newLucene(formatter, scorer); } virtual HighlighterPtr getHighlighter(Collection weightedTerms, const FormatterPtr& formatter) { if (mode == QUERY) { Collection weightedSpanTerms = Collection::newInstance(weightedTerms.size()); for (int32_t i = 0; i < weightedTerms.size(); ++i) { weightedSpanTerms[i] = boost::dynamic_pointer_cast(weightedTerms[i]); } return newLucene(formatter, newLucene(weightedSpanTerms)); } else if (mode == QUERY_TERM) { return newLucene(formatter, newLucene(weightedTerms)); } else { boost::throw_exception(IllegalArgumentException(L"Unknown highlight mode")); } return HighlighterPtr(); } virtual void doStandardHighlights(const AnalyzerPtr& analyzer, const IndexSearcherPtr& searcher, const TopDocsPtr& hits, const QueryPtr& query, const FormatterPtr& formatter, Collection expected, bool expandMT = false) { Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(HighlighterTest::FIELD_NAME); int32_t maxNumFragmentsRequired = 2; String fragmentSeparator = L"..."; HighlighterScorerPtr scorer; TokenStreamPtr tokenStream = analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); if (mode == QUERY) { scorer = newLucene(query); } else if (mode == QUERY_TERM) { scorer = newLucene(query); } HighlighterPtr highlighter = newLucene(formatter, scorer); highlighter->setTextFragmenter(frag); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator)); } EXPECT_EQ(results.size(), expected.size()); for (int32_t i = 0; i < results.size(); ++i) { EXPECT_EQ(results[i], expected[i]); } } virtual void run(Collection expected) = 0; virtual void start(Collection expected = Collection()) { run(expected); mode = QUERY_TERM; run(expected); } }; const int32_t TestHighlightRunner::QUERY = 0; const int32_t TestHighlightRunner::QUERY_TERM = 1; } TEST_F(HighlighterTest, testQueryScorerHits) { AnalyzerPtr analyzer = newLucene(); QueryParserPtr qp = newLucene(TEST_VERSION, FIELD_NAME, analyzer); query = qp->parse(L"\"very long\""); searcher = newLucene(ramDir, true); TopDocsPtr hits = searcher->search(query, 10); QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(scorer); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->scoreDocs.size(); ++i) { DocumentPtr doc = searcher->doc(hits->scoreDocs[i]->doc); String storedField = doc->get(FIELD_NAME); TokenStreamPtr stream = TokenSources::getAnyTokenStream(searcher->getIndexReader(), hits->scoreDocs[i]->doc, FIELD_NAME, doc, analyzer); FragmenterPtr fragmenter = newLucene(scorer); highlighter->setTextFragmenter(fragmenter); results.add(highlighter->getBestFragment(stream, storedField)); } EXPECT_EQ(results.size(), 2); EXPECT_EQ(results[0], L"Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot"); EXPECT_EQ(results[1], L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very"); } TEST_F(HighlighterTest, testHighlightingWithDefaultField) { String s1 = L"I call our world Flatland, not because we call it so,"; QueryParserPtr parser = newLucene(TEST_VERSION, FIELD_NAME, newLucene(TEST_VERSION)); // Verify that a query against the default field results in text being highlighted regardless of the field name. QueryPtr q = parser->parse(L"\"world Flatland\"~3"); String expected = L"I call our world Flatland, not because we call it so,"; String observed = highlightField(q, L"SOME_FIELD_NAME", s1); EXPECT_EQ(expected, observed); // Verify that a query against a named field does not result in any ighlighting when the query field name differs // from the name of the field being highlighted, which in this example happens to be the default field name. q = parser->parse(L"text:\"world Flatland\"~3"); expected = s1; observed = highlightField(q, FIELD_NAME, s1); EXPECT_EQ(s1, highlightField(q, FIELD_NAME, s1)); } TEST_F(HighlighterTest, testSimpleSpanHighlighter) { doSearching(L"Kennedy"); int32_t maxNumFragmentsRequired = 2; QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(scorer); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } EXPECT_EQ(results.size(), 3); EXPECT_EQ(results[0], L"John Kennedy has been shot"); EXPECT_EQ(results[1], L"This piece of text refers to Kennedy... to Kennedy"); EXPECT_EQ(results[2], L" kennedy has been shot"); } TEST_F(HighlighterTest, testRepeatingTermsInMultBooleans) { String content = L"x y z a b c d e f g b c g"; String ph1 = L"\"a b c d\""; String ph2 = L"\"b c g\""; String f1 = L"f1"; String f2 = L"f2"; String f1c = f1 + L":"; String f2c = f2 + L":"; String q = L"(" + f1c + ph1 + L" OR " + f2c + ph1 + L") AND (" + f1c + ph2 + L" OR " + f2c + ph2 + L")"; AnalyzerPtr analyzer = newLucene(); QueryParserPtr qp = newLucene(TEST_VERSION, f1, analyzer); QueryPtr query = qp->parse(q); QueryScorerPtr scorer = newLucene(query, f1); scorer->setExpandMultiTermQuery(false); HighlighterPtr h = newLucene(newLucene(this), scorer); h->getBestFragment(analyzer, f1, content); EXPECT_EQ(numHighlights, 7); } TEST_F(HighlighterTest, testSimpleQueryScorerPhraseHighlighting) { doSearching(L"\"very long and contains\""); int32_t maxNumFragmentsRequired = 2; QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } EXPECT_EQ(results.size(), 1); EXPECT_EQ(results[0], L"Hello this is a piece of text that is very long and contains too much preamble"); EXPECT_EQ(numHighlights, 3); numHighlights = 0; doSearching(L"\"This piece of text refers to Kennedy\""); maxNumFragmentsRequired = 2; scorer = newLucene(query, FIELD_NAME); highlighter = newLucene(newLucene(this), scorer); results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } EXPECT_EQ(results.size(), 1); EXPECT_EQ(results[0], L"This piece of text refers to Kennedy at the beginning then has a longer piece"); EXPECT_EQ(numHighlights, 4); numHighlights = 0; doSearching(L"\"lets is a the lets is a the lets is a the lets\""); maxNumFragmentsRequired = 2; scorer = newLucene(query, FIELD_NAME); highlighter = newLucene(newLucene(this), scorer); results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } EXPECT_EQ(results.size(), 1); EXPECT_EQ(results[0], L"lets is a the lets is a the lets is a the lets"); EXPECT_EQ(numHighlights, 4); } TEST_F(HighlighterTest, testSpanRegexQuery) { // todo } TEST_F(HighlighterTest, testRegexQuery) { // todo } TEST_F(HighlighterTest, testNumericRangeQuery) { // doesn't currently highlight, but make sure it doesn't cause exception either query = NumericRangeQuery::newIntRange(NUMERIC_FIELD_NAME, 2, 6, true, true); searcher = newLucene(ramDir, true); hits = searcher->search(query, 100); int32_t maxNumFragmentsRequired = 2; QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(NUMERIC_FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } EXPECT_EQ(results.size(), 2); EXPECT_EQ(results[0], L""); EXPECT_EQ(results[1], L""); EXPECT_EQ(numHighlights, 0); } TEST_F(HighlighterTest, testSimpleQueryScorerPhraseHighlighting2) { doSearching(L"\"text piece long\"~5"); int32_t maxNumFragmentsRequired = 2; QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); highlighter->setTextFragmenter(newLucene(40)); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } EXPECT_EQ(results.size(), 2); EXPECT_EQ(results[0], L"Hello this is a piece of text that is very long and contains too much preamble"); EXPECT_EQ(results[1], L" at the beginning then has a longer piece of text that is very long in the middle"); EXPECT_EQ(numHighlights, 6); } TEST_F(HighlighterTest, testSimpleQueryScorerPhraseHighlighting3) { doSearching(L"\"x y z\""); int32_t maxNumFragmentsRequired = 2; Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); EXPECT_EQ(numHighlights, 3); } EXPECT_EQ(results.size(), 1); EXPECT_EQ(results[0], L"y z x y z a b"); } TEST_F(HighlighterTest, testSimpleSpanFragmenter) { doSearching(L"\"piece of text that is very long\""); int32_t maxNumFragmentsRequired = 2; QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(scorer, 5)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } EXPECT_EQ(results.size(), 2); EXPECT_EQ(results[0], L" this is a piece of text"); EXPECT_EQ(results[1], L" piece of text that is very long"); doSearching(L"\"been shot\""); maxNumFragmentsRequired = 2; scorer = newLucene(query, FIELD_NAME); highlighter = newLucene(newLucene(this), scorer); results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(scorer, 20)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } EXPECT_EQ(numHighlights, 14); EXPECT_EQ(results.size(), 3); EXPECT_EQ(results[0], L"JFK has been shot"); EXPECT_EQ(results[1], L"John Kennedy has been shot"); EXPECT_EQ(results[2], L" kennedy has been shot"); } /// position sensitive query added after position insensitive query TEST_F(HighlighterTest, testPosTermStdTerm) { doSearching(L"y \"x y z\""); int32_t maxNumFragmentsRequired = 2; QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); EXPECT_EQ(numHighlights, 4); } EXPECT_EQ(results.size(), 1); EXPECT_EQ(results[0], L"y z x y z a b"); } TEST_F(HighlighterTest, testQueryScorerMultiPhraseQueryHighlighting) { MultiPhraseQueryPtr mpq = newLucene(); mpq->add(newCollection(newLucene(FIELD_NAME, L"wordx"), newLucene(FIELD_NAME, L"wordb"))); mpq->add(newLucene(FIELD_NAME, L"wordy")); doSearching(mpq); int32_t maxNumFragmentsRequired = 2; Collection expected = newCollection(L"wordx wordy wordz wordx wordy wordx worda wordb wordy wordc"); checkExpectedHighlightCount(maxNumFragmentsRequired, 6, expected); } TEST_F(HighlighterTest, testQueryScorerMultiPhraseQueryHighlightingWithGap) { MultiPhraseQueryPtr mpq = newLucene(); // The toString of MultiPhraseQuery doesn't work so well with these out-of-order additions, but the Query itself seems to match accurately. mpq->add(newCollection(newLucene(FIELD_NAME, L"wordz")), 2); mpq->add(newCollection(newLucene(FIELD_NAME, L"wordx")), 0); doSearching(mpq); int32_t maxNumFragmentsRequired = 1; int32_t expectedHighlights = 2; Collection expected = newCollection(L"wordx wordy wordz wordx wordy wordx"); checkExpectedHighlightCount(maxNumFragmentsRequired, expectedHighlights, expected); } namespace TestNearSpanSimpleQuery { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { mode = QUERY; doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); } }; } TEST_F(HighlighterTest, testNearSpanSimpleQuery) { doSearching(newLucene(newCollection( newLucene(newLucene(FIELD_NAME, L"beginning")), newLucene(newLucene(FIELD_NAME, L"kennedy"))), 3, false)); HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); Collection expected = newCollection(L" refers to Kennedy at the beginning"); helper->run(expected); EXPECT_EQ(numHighlights, 2); } TEST_F(HighlighterTest, testSimpleQueryTermScorerHighlighter) { doSearching(L"Kennedy"); HighlighterPtr highlighter = newLucene(newLucene(query)); highlighter->setTextFragmenter(newLucene(40)); int32_t maxNumFragmentsRequired = 2; Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } EXPECT_EQ(results.size(), 3); EXPECT_EQ(results[0], L"John Kennedy has been shot"); EXPECT_EQ(results[1], L"This piece of text refers to Kennedy... to Kennedy"); EXPECT_EQ(results[2], L" kennedy has been shot"); } namespace TestSpanHighlighting { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { mode = QUERY; doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); } }; } TEST_F(HighlighterTest, testSpanHighlighting) { QueryPtr query1 = newLucene(newCollection( newLucene(newLucene(FIELD_NAME, L"wordx")), newLucene(newLucene(FIELD_NAME, L"wordy"))), 1, false); QueryPtr query2 = newLucene(newCollection( newLucene(newLucene(FIELD_NAME, L"wordy")), newLucene(newLucene(FIELD_NAME, L"wordc"))), 1, false); BooleanQueryPtr bquery = newLucene(); bquery->add(query1, BooleanClause::SHOULD); bquery->add(query2, BooleanClause::SHOULD); doSearching(bquery); HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); Collection expected = newCollection(L"wordx wordy wordz wordx wordy wordx"); helper->run(expected); EXPECT_EQ(numHighlights, 7); } namespace TestNotSpanSimpleQuery { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { mode = QUERY; doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); } }; } TEST_F(HighlighterTest, testNotSpanSimpleQuery) { doSearching(newLucene(newLucene(newCollection( newLucene(newLucene(FIELD_NAME, L"shot")), newLucene(newLucene(FIELD_NAME, L"kennedy"))), 3, false), newLucene(newLucene(FIELD_NAME, L"john")))); HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); Collection expected = newCollection( L"John Kennedy has been shot", L" kennedy has been shot" ); helper->run(expected); EXPECT_EQ(numHighlights, 4); } namespace TestGetBestFragmentsSimpleQuery { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"Kennedy"); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); EXPECT_EQ(fixture->numHighlights, 4); } }; } TEST_F(HighlighterTest, testGetBestFragmentsSimpleQuery) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start( newCollection( L"John Kennedy has been shot", L" refers to Kennedy... to Kennedy", L" kennedy has been shot" ) ); } namespace TestGetFuzzyFragments { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"Kinnedy~"); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected, true); EXPECT_EQ(fixture->numHighlights, 5); } }; } TEST_F(HighlighterTest, testGetFuzzyFragments) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start( newCollection( L"John Kennedy has been shot", L" refers to Kennedy... to Kennedy", L" kennedy has been shot", L" to Keneddy" ) ); } namespace TestGetWildCardFragments { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"K?nnedy"); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); EXPECT_EQ(fixture->numHighlights, 4); } }; } TEST_F(HighlighterTest, testGetWildCardFragments) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start( newCollection( L"John Kennedy has been shot", L" refers to Kennedy... to Kennedy", L" kennedy has been shot" ) ); } namespace TestGetMidWildCardFragments { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"K*dy"); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); EXPECT_EQ(fixture->numHighlights, 5); } }; } TEST_F(HighlighterTest, testGetMidWildCardFragments) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start( newCollection( L" to Keneddy", L"John Kennedy has been shot", L" refers to Kennedy... to Kennedy", L" kennedy has been shot" ) ); } namespace TestGetRangeFragments { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; String queryString = HighlighterTest::FIELD_NAME + L":[kannedy TO kznnedy]"; // Need to explicitly set the QueryParser property to use TermRangeQuery rather than RangeFilters QueryParserPtr parser = newLucene(HighlighterTest::TEST_VERSION, HighlighterTest::FIELD_NAME, fixture->analyzer); parser->setMultiTermRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); fixture->query = parser->parse(queryString); fixture->doSearching(fixture->query); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); EXPECT_EQ(fixture->numHighlights, 5); } }; } TEST_F(HighlighterTest, testGetRangeFragments) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start( newCollection( L" to Keneddy", L"John Kennedy has been shot", L" refers to Kennedy... to Kennedy", L" kennedy has been shot" ) ); } TEST_F(HighlighterTest, testConstantScoreMultiTermQuery) { numHighlights = 0; query = newLucene(newLucene(FIELD_NAME, L"ken*")); boost::dynamic_pointer_cast(query)->setRewriteMethod(MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE()); searcher = newLucene(ramDir, true); // can't rewrite ConstantScore if you want to highlight it - it rewrites to ConstantScoreQuery which cannot be highlighted // query = unReWrittenQuery.rewrite(reader); hits = searcher->search(query, FilterPtr(), 1000); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); int32_t maxNumFragmentsRequired = 2; String fragmentSeparator = L"..."; TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); highlighter->setTextFragmenter(newLucene(20)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator)); } EXPECT_EQ(numHighlights, 5); EXPECT_EQ(results.size(), 4); EXPECT_EQ(results[0], L" kennedy has been shot"); EXPECT_EQ(results[1], L" refers to Kennedy... to Kennedy"); EXPECT_EQ(results[2], L"John Kennedy has been shot"); EXPECT_EQ(results[3], L" to Keneddy"); // try null field hits = searcher->search(query, FilterPtr(), 1000); numHighlights = 0; results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); int32_t maxNumFragmentsRequired = 2; String fragmentSeparator = L"..."; TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); QueryScorerPtr scorer = newLucene(query, L""); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); highlighter->setTextFragmenter(newLucene(20)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator)); } EXPECT_EQ(numHighlights, 5); EXPECT_EQ(results.size(), 4); EXPECT_EQ(results[0], L" kennedy has been shot"); EXPECT_EQ(results[1], L" refers to Kennedy... to Kennedy"); EXPECT_EQ(results[2], L"John Kennedy has been shot"); EXPECT_EQ(results[3], L" to Keneddy"); // try default field hits = searcher->search(query, FilterPtr(), 1000); numHighlights = 0; results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); int32_t maxNumFragmentsRequired = 2; String fragmentSeparator = L"..."; TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); QueryScorerPtr scorer = newLucene(query, L"random_field", FIELD_NAME); HighlighterPtr highlighter = newLucene(newLucene(this), scorer); highlighter->setTextFragmenter(newLucene(20)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator)); } EXPECT_EQ(numHighlights, 5); EXPECT_EQ(results.size(), 4); EXPECT_EQ(results[0], L" kennedy has been shot"); EXPECT_EQ(results[1], L" refers to Kennedy... to Kennedy"); EXPECT_EQ(results[2], L"John Kennedy has been shot"); EXPECT_EQ(results[3], L" to Keneddy"); } namespace TestGetBestFragmentsPhrase { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"\"John Kennedy\""); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); // Currently highlights "John" and "Kennedy" separately EXPECT_EQ(fixture->numHighlights, 2); } }; } TEST_F(HighlighterTest, testGetBestFragmentsPhrase) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(newCollection(L"John Kennedy has been shot")); } namespace TestGetBestFragmentsQueryScorer { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; Collection clauses = newCollection( newLucene(newLucene(L"contents", L"john")), newLucene(newLucene(L"contents", L"kennedy")) ); SpanNearQueryPtr snq = newLucene(clauses, 1, true); fixture->doSearching(snq); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); // Currently highlights "John" and "Kennedy" separately EXPECT_EQ(fixture->numHighlights, 2); } }; } TEST_F(HighlighterTest, testGetBestFragmentsQueryScorer) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(newCollection(L"John Kennedy has been shot")); } namespace TestOffByOne { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { TermQueryPtr query = newLucene(newLucene(L"data", L"help")); HighlighterPtr hg = newLucene(newLucene(), newLucene(query)); hg->setTextFragmenter(newLucene()); String match = hg->getBestFragment(fixture->analyzer, L"data", L"help me [54-65]"); EXPECT_EQ(L"help me [54-65]", match); } }; } TEST_F(HighlighterTest, testOffByOne) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestGetBestFragmentsFilteredQuery { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; TermRangeFilterPtr rf = newLucene(L"contents", L"john", L"john", true, true); Collection clauses = newCollection( newLucene(newLucene(L"contents", L"john")), newLucene(newLucene(L"contents", L"kennedy")) ); SpanNearQueryPtr snq = newLucene(clauses, 1, true); FilteredQueryPtr fq = newLucene(snq, rf); fixture->doSearching(fq); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); // Currently highlights "John" and "Kennedy" separately EXPECT_EQ(fixture->numHighlights, 2); } }; } TEST_F(HighlighterTest, testGetBestFragmentsFilteredQuery) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(newCollection(L"John Kennedy has been shot")); } namespace TestGetBestFragmentsFilteredPhraseQuery { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; TermRangeFilterPtr rf = newLucene(L"contents", L"john", L"john", true, true); PhraseQueryPtr pq = newLucene(); pq->add(newLucene(L"contents", L"john")); pq->add(newLucene(L"contents", L"kennedy")); FilteredQueryPtr fq = newLucene(pq, rf); fixture->doSearching(fq); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); // Currently highlights "John" and "Kennedy" separately EXPECT_EQ(fixture->numHighlights, 2); } }; } TEST_F(HighlighterTest, testGetBestFragmentsFilteredPhraseQuery) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(newCollection(L"John Kennedy has been shot")); } namespace TestGetBestFragmentsMultiTerm { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"John Kenn*"); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); EXPECT_EQ(fixture->numHighlights, 5); } }; } TEST_F(HighlighterTest, testGetBestFragmentsMultiTerm) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start( newCollection( L"John Kennedy has been shot", L" refers to Kennedy... to Kennedy", L" kennedy has been shot" ) ); } namespace TestGetBestFragmentsWithOr { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"JFK OR Kennedy"); doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); EXPECT_EQ(fixture->numHighlights, 5); } }; } TEST_F(HighlighterTest, testGetBestFragmentsWithOr) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start( newCollection( L"JFK has been shot", L"John Kennedy has been shot", L" refers to Kennedy... to Kennedy", L" kennedy has been shot" ) ); } namespace TestGetBestSingleFragment { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->doSearching(L"Kennedy"); fixture->numHighlights = 0; Collection results = Collection::newInstance(); for (int32_t i = 0; i < fixture->hits->totalHits; ++i) { String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTest::FIELD_NAME); TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTest::FIELD_NAME, tokenStream, newLucene(fixture)); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragment(tokenStream, text)); } EXPECT_EQ(fixture->numHighlights, 4); EXPECT_EQ(results.size(), 3); EXPECT_EQ(results[0], L"John Kennedy has been shot"); EXPECT_EQ(results[1], L"This piece of text refers to Kennedy"); EXPECT_EQ(results[2], L" kennedy has been shot"); fixture->numHighlights = 0; results = Collection::newInstance(); for (int32_t i = 0; i < fixture->hits->totalHits; ++i) { String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTest::FIELD_NAME); TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTest::FIELD_NAME, tokenStream, newLucene(fixture)); results.add(highlighter->getBestFragment(fixture->analyzer, HighlighterTest::FIELD_NAME, text)); } EXPECT_EQ(fixture->numHighlights, 4); EXPECT_EQ(results.size(), 3); EXPECT_EQ(results[0], L"John Kennedy has been shot"); EXPECT_EQ(results[1], L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very"); EXPECT_EQ(results[2], L" is really here which says kennedy has been shot"); fixture->numHighlights = 0; results = Collection::newInstance(); for (int32_t i = 0; i < fixture->hits->totalHits; ++i) { String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTest::FIELD_NAME); TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTest::FIELD_NAME, tokenStream, newLucene(fixture)); highlighter->setTextFragmenter(newLucene(40)); Collection result = highlighter->getBestFragments(fixture->analyzer, HighlighterTest::FIELD_NAME, text, 10); results.addAll(result.begin(), result.end()); } EXPECT_EQ(fixture->numHighlights, 4); EXPECT_EQ(results.size(), 3); EXPECT_EQ(results[0], L"John Kennedy has been shot"); EXPECT_EQ(results[1], L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy"); EXPECT_EQ(results[2], L"Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot"); } }; } TEST_F(HighlighterTest, testGetBestSingleFragment) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestGetBestSingleFragmentWithWeights { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { Collection wTerms = Collection::newInstance(2); wTerms[0] = newLucene(10.0, L"hello"); Collection positionSpans = newCollection(newLucene(0, 0)); boost::dynamic_pointer_cast(wTerms[0])->addPositionSpans(positionSpans); wTerms[1] = newLucene(1.0, L"kennedy"); positionSpans = newCollection(newLucene(14, 14)); boost::dynamic_pointer_cast(wTerms[1])->addPositionSpans(positionSpans); HighlighterPtr highlighter = getHighlighter(wTerms, newLucene(fixture)); TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(fixture->texts[0])); highlighter->setTextFragmenter(newLucene(2)); String result = highlighter->getBestFragment(tokenStream, fixture->texts[0]); boost::trim(result); EXPECT_EQ(L"Hello", result); wTerms[1]->setWeight(50.0); tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(fixture->texts[0])); highlighter = getHighlighter(wTerms, newLucene(fixture)); highlighter->setTextFragmenter(newLucene(2)); result = highlighter->getBestFragment(tokenStream, fixture->texts[0]); boost::trim(result); EXPECT_EQ(L"kennedy", result); } }; } TEST_F(HighlighterTest, testGetBestSingleFragmentWithWeights) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestOverlapAnalyzer { class SynonymTokenizer : public TokenStream { public: SynonymTokenizer(const TokenStreamPtr& realStream, MapStringString synonyms) { this->realStream = realStream; this->synonyms = synonyms; this->synonymToken = 0; this->realTermAtt = realStream->addAttribute(); this->realPosIncrAtt = realStream->addAttribute(); this->realOffsetAtt = realStream->addAttribute(); this->termAtt = addAttribute(); this->posIncrAtt = addAttribute(); this->offsetAtt = addAttribute(); } virtual ~SynonymTokenizer() { } protected: TokenStreamPtr realStream; TokenPtr currentRealToken; TokenPtr cRealToken; MapStringString synonyms; Collection synonymTokens; int32_t synonymToken; TermAttributePtr realTermAtt; PositionIncrementAttributePtr realPosIncrAtt; OffsetAttributePtr realOffsetAtt; TermAttributePtr termAtt; PositionIncrementAttributePtr posIncrAtt; OffsetAttributePtr offsetAtt; public: virtual bool incrementToken() { if (!currentRealToken) { bool next = realStream->incrementToken(); if (!next) { return false; } clearAttributes(); termAtt->setTermBuffer(realTermAtt->term()); offsetAtt->setOffset(realOffsetAtt->startOffset(), realOffsetAtt->endOffset()); posIncrAtt->setPositionIncrement(realPosIncrAtt->getPositionIncrement()); if (!synonyms.contains(realTermAtt->term())) { return true; } String expansions = synonyms.get(realTermAtt->term()); synonymTokens = StringUtils::split(expansions, L","); synonymToken = 0; if (!synonymTokens.empty()) { currentRealToken = newLucene(realOffsetAtt->startOffset(), realOffsetAtt->endOffset()); currentRealToken->setTermBuffer(realTermAtt->term()); } return true; } else { String tok = synonymTokens[synonymToken++]; clearAttributes(); termAtt->setTermBuffer(tok); offsetAtt->setOffset(currentRealToken->startOffset(), currentRealToken->endOffset()); posIncrAtt->setPositionIncrement(0); if (synonymToken == synonymTokens.size()) { currentRealToken.reset(); synonymTokens.reset(); synonymToken = 0; } return true; } } }; class SynonymAnalyzer : public Analyzer { public: SynonymAnalyzer(MapStringString synonyms) { this->synonyms = synonyms; } virtual ~SynonymAnalyzer() { } protected: MapStringString synonyms; public: virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader) { LowerCaseTokenizerPtr stream = newLucene(reader); stream->addAttribute(); stream->addAttribute(); stream->addAttribute(); return newLucene(stream, synonyms); } }; class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { MapStringString synonyms = MapStringString::newInstance(); synonyms.put(L"football", L"soccer,footie"); AnalyzerPtr analyzer = newLucene(synonyms); String srchkey = L"football"; String s = L"football-soccer in the euro 2004 footie competition"; QueryParserPtr parser = newLucene(HighlighterTest::TEST_VERSION, L"bookid", analyzer); QueryPtr query = parser->parse(srchkey); TokenStreamPtr tokenStream = analyzer->tokenStream(L"", newLucene(s)); HighlighterPtr highlighter = getHighlighter(query, L"", tokenStream, newLucene(fixture)); // Get 3 best fragments and separate with a "..." tokenStream = analyzer->tokenStream(L"", newLucene(s)); String result = highlighter->getBestFragments(tokenStream, s, 3, L"..."); String expectedResult = L"football-soccer in the euro 2004 footie competition"; EXPECT_EQ(expectedResult, result); } }; } /// tests a "complex" analyzer that produces multiple overlapping tokens TEST_F(HighlighterTest, testOverlapAnalyzer) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestGetSimpleHighlight { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"Kennedy"); Collection results = Collection::newInstance(); for (int32_t i = 0; i < fixture->hits->totalHits; ++i) { String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTest::FIELD_NAME); TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTest::FIELD_NAME, tokenStream, newLucene(fixture)); results.add(highlighter->getBestFragment(tokenStream, text)); } EXPECT_EQ(fixture->numHighlights, 4); EXPECT_EQ(results.size(), 3); EXPECT_EQ(results[0], L"John Kennedy has been shot"); EXPECT_EQ(results[1], L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very"); EXPECT_EQ(results[2], L" is really here which says kennedy has been shot"); } }; } TEST_F(HighlighterTest, testGetSimpleHighlight) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestGetTextFragments { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->doSearching(L"Kennedy"); for (int32_t i = 0; i < fixture->hits->totalHits; ++i) { String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTest::FIELD_NAME); TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTest::FIELD_NAME, tokenStream, newLucene(fixture)); highlighter->setTextFragmenter(newLucene(20)); Collection stringResults = highlighter->getBestFragments(tokenStream, text, 10); tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); Collection fragmentResults = highlighter->getBestTextFragments(tokenStream, text, true, 10); EXPECT_EQ(fragmentResults.size(), stringResults.size()); for (int32_t j = 0; j < stringResults.size(); ++j) { EXPECT_EQ(fragmentResults[j]->toString(), stringResults[j]); } } } }; } TEST_F(HighlighterTest, testGetTextFragments) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestMaxSizeHighlight { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; fixture->doSearching(L"meat"); TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(fixture->texts[0])); HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTest::FIELD_NAME, tokenStream, newLucene(fixture)); highlighter->setMaxDocCharsToAnalyze(30); highlighter->getBestFragment(tokenStream, fixture->texts[0]); EXPECT_EQ(fixture->numHighlights, 0); } }; } TEST_F(HighlighterTest, testMaxSizeHighlight) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestMaxSizeHighlightTruncates { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { String goodWord = L"goodtoken"; HashSet stopWords = HashSet::newInstance(); stopWords.add(L"stoppedtoken"); TermQueryPtr query = newLucene(newLucene(L"data", goodWord)); StringStream buffer; buffer << goodWord; for (int32_t i = 0; i < 10000; ++i) { // only one stopword buffer << L" " << *stopWords.begin(); } SimpleHTMLFormatterPtr fm = newLucene(); HighlighterPtr hg = getHighlighter(query, L"data", newLucene(HighlighterTest::TEST_VERSION, stopWords)->tokenStream(L"data", newLucene(buffer.str())), fm); hg->setTextFragmenter(newLucene()); hg->setMaxDocCharsToAnalyze(100); String match = hg->getBestFragment(newLucene(HighlighterTest::TEST_VERSION, stopWords), L"data", buffer.str()); EXPECT_TRUE((int32_t)match.length() < hg->getMaxDocCharsToAnalyze()); // add another tokenized word to the overall length - but set way beyond the length of text under consideration // (after a large slug of stop words + whitespace) buffer << L" " << goodWord; match = hg->getBestFragment(newLucene(HighlighterTest::TEST_VERSION, stopWords), L"data", buffer.str()); EXPECT_TRUE((int32_t)match.length() < hg->getMaxDocCharsToAnalyze()); } }; } TEST_F(HighlighterTest, testMaxSizeHighlightTruncates) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestMaxSizeEndHighlight { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { HashSet stopWords = HashSet::newInstance(); stopWords.add(L"in"); stopWords.add(L"it"); TermQueryPtr query = newLucene(newLucene(L"text", L"searchterm")); String text = L"this is a text with searchterm in it"; SimpleHTMLFormatterPtr fm = newLucene(); HighlighterPtr hg = getHighlighter(query, L"text", newLucene(HighlighterTest::TEST_VERSION, stopWords)->tokenStream(L"text", newLucene(text)), fm); hg->setTextFragmenter(newLucene()); hg->setMaxDocCharsToAnalyze(36); String match = hg->getBestFragment(newLucene(HighlighterTest::TEST_VERSION, stopWords), L"text", text); EXPECT_TRUE(boost::ends_with(match, L"in it")); } }; } TEST_F(HighlighterTest, testMaxSizeEndHighlight) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestUnRewrittenQuery { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->numHighlights = 0; // test to show how rewritten query can still be used fixture->searcher = newLucene(fixture->ramDir, true); AnalyzerPtr analyzer = newLucene(HighlighterTest::TEST_VERSION); QueryParserPtr parser = newLucene(HighlighterTest::TEST_VERSION, HighlighterTest::FIELD_NAME, analyzer); QueryPtr query = parser->parse(L"JF? or Kenned*"); TopDocsPtr hits = fixture->searcher->search(query, FilterPtr(), 1000); int32_t maxNumFragmentsRequired = 3; for (int32_t i = 0; i < hits->totalHits; ++i) { String text = fixture->searcher->doc(hits->scoreDocs[i]->doc)->get(HighlighterTest::FIELD_NAME); TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); HighlighterPtr highlighter = getHighlighter(query, HighlighterTest::FIELD_NAME, tokenStream, newLucene(fixture), false); highlighter->setTextFragmenter(newLucene(40)); highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"..."); } // We expect to have zero highlights if the query is multi-terms and is not rewritten EXPECT_EQ(fixture->numHighlights, 0); } }; } TEST_F(HighlighterTest, testUnRewrittenQuery) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestNoFragments { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { fixture->doSearching(L"AnInvalidQueryWhichShouldYieldNoResults"); for (int32_t i = 0; i < fixture->texts.size(); ++i) { String text = fixture->texts[i]; TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTest::FIELD_NAME, tokenStream, newLucene(fixture)); String result = highlighter->getBestFragment(tokenStream, text); EXPECT_TRUE(result.empty()); } } }; } TEST_F(HighlighterTest, testNoFragments) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestEncoding { class NullScorer : public HighlighterScorer, public LuceneObject { public: virtual ~NullScorer() { } public: virtual void startFragment(const TextFragmentPtr& newFragment) { } virtual double getTokenScore() { return 0.0; } virtual double getFragmentScore() { return 1.0; } virtual TokenStreamPtr init(const TokenStreamPtr& tokenStream) { return TokenStreamPtr(); } }; } /// Demonstrates creation of an XHTML compliant doc using new encoding facilities. TEST_F(HighlighterTest, testEncoding) { String rawDocContent = L"\"Smith & sons' prices < 3 and >4\" claims article"; // run the highlighter on the raw content (scorer does not score any tokens for // highlighting but scores a single fragment for selection HighlighterPtr highlighter = newLucene(newLucene(this), newLucene(), newLucene()); highlighter->setTextFragmenter(newLucene(2000)); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(rawDocContent)); String encodedSnippet = highlighter->getBestFragments(tokenStream, rawDocContent, 1, L""); EXPECT_EQ(encodedSnippet, L""Smith & sons' prices < 3 and >4" claims article"); } TEST_F(HighlighterTest, testMultiSearcher) { // setup index 1 RAMDirectoryPtr ramDir1 = newLucene(); IndexWriterPtr writer1 = newLucene(ramDir1, newLucene(TEST_VERSION), true, IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr d = newLucene(); FieldPtr f = newLucene(FIELD_NAME, L"multiOne", Field::STORE_YES, Field::INDEX_ANALYZED); d->add(f); writer1->addDocument(d); writer1->optimize(); writer1->close(); IndexReaderPtr reader1 = IndexReader::open(ramDir1, true); // setup index 2 RAMDirectoryPtr ramDir2 = newLucene(); IndexWriterPtr writer2 = newLucene(ramDir2, newLucene(TEST_VERSION), true, IndexWriter::MaxFieldLengthUNLIMITED); d = newLucene(); f = newLucene(FIELD_NAME, L"multiTwo", Field::STORE_YES, Field::INDEX_ANALYZED); d->add(f); writer2->addDocument(d); writer2->optimize(); writer2->close(); IndexReaderPtr reader2 = IndexReader::open(ramDir2, true); Collection searchers = newCollection( newLucene(ramDir1, true), newLucene(ramDir2, true) ); MultiSearcherPtr multiSearcher = newLucene(searchers); QueryParserPtr parser = newLucene(TEST_VERSION, FIELD_NAME, newLucene(TEST_VERSION)); parser->setMultiTermRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); query = parser->parse(L"multi*"); // at this point the multisearcher calls combine(query[]) hits = multiSearcher->search(query, FilterPtr(), 1000); Collection expandedQueries = newCollection( query->rewrite(reader1), query->rewrite(reader2) ); query = query->combine(expandedQueries); // create an instance of the highlighter with the tags used to surround highlighted text HighlighterPtr highlighter = newLucene(newLucene(this), newLucene(query)); Collection results = Collection::newInstance(); for (int32_t i = 0; i < hits->totalHits; ++i) { String text = multiSearcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); String highlightedText = highlighter->getBestFragment(tokenStream, text); results.add(highlightedText); } EXPECT_EQ(results.size(), 2); EXPECT_EQ(results[0], L"multiOne"); EXPECT_EQ(results[1], L"multiTwo"); EXPECT_EQ(numHighlights, 2); } namespace TestFieldSpecificHighlighting { class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { String docMainText = L"fred is one of the people"; QueryParserPtr parser = newLucene(HighlighterTest::TEST_VERSION, HighlighterTest::FIELD_NAME, fixture->analyzer); QueryPtr query = parser->parse(L"fred category:people"); // highlighting respects fieldnames used in query HighlighterScorerPtr fieldSpecificScorer; if (mode == QUERY) { fieldSpecificScorer = newLucene(query, HighlighterTest::FIELD_NAME); } else if (mode == QUERY_TERM) { fieldSpecificScorer = newLucene(query, L"contents"); } HighlighterPtr fieldSpecificHighlighter = newLucene(newLucene(), fieldSpecificScorer); fieldSpecificHighlighter->setTextFragmenter(newLucene()); String result = fieldSpecificHighlighter->getBestFragment(fixture->analyzer, HighlighterTest::FIELD_NAME, docMainText); EXPECT_EQ(result, L"fred is one of the people"); // highlighting does not respect fieldnames used in query HighlighterScorerPtr fieldInSpecificScorer; if (mode == QUERY) { fieldInSpecificScorer = newLucene(query, L""); } else if (mode == QUERY_TERM) { fieldInSpecificScorer = newLucene(query); } HighlighterPtr fieldInSpecificHighlighter = newLucene(newLucene(), fieldInSpecificScorer); fieldInSpecificHighlighter->setTextFragmenter(newLucene()); result = fieldInSpecificHighlighter->getBestFragment(fixture->analyzer, HighlighterTest::FIELD_NAME, docMainText); EXPECT_EQ(result, L"fred is one of the people"); fixture->reader->close(); } }; } TEST_F(HighlighterTest, testFieldSpecificHighlighting) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } namespace TestOverlapAnalyzer2 { class TS2 : public TokenStream { public: TS2() { termAtt = addAttribute(); posIncrAtt = addAttribute(); offsetAtt = addAttribute(); lst = Collection::newInstance(); TokenPtr t = createToken(L"hi", 0, 2); t->setPositionIncrement(1); lst.add(t); t = createToken(L"hispeed", 0, 8); t->setPositionIncrement(1); lst.add(t); t = createToken(L"speed", 3, 8); t->setPositionIncrement(0); lst.add(t); t = createToken(L"10", 8, 10); t->setPositionIncrement(1); lst.add(t); t = createToken(L"foo", 11, 14); t->setPositionIncrement(1); lst.add(t); tokenPos = 0; } virtual ~TS2() { } protected: Collection lst; int32_t tokenPos; TermAttributePtr termAtt; PositionIncrementAttributePtr posIncrAtt; OffsetAttributePtr offsetAtt; public: virtual bool incrementToken() { if (tokenPos < (int32_t)lst.size()) { TokenPtr token = lst[tokenPos++]; clearAttributes(); termAtt->setTermBuffer(token->term()); posIncrAtt->setPositionIncrement(token->getPositionIncrement()); offsetAtt->setOffset(token->startOffset(), token->endOffset()); return true; } return false; } protected: TokenPtr createToken(const String& term, int32_t start, int32_t offset) { TokenPtr token = newLucene(start, offset); token->setTermBuffer(term); return token; } }; /// same token-stream as above, but the bigger token comes first this time class TS2a : public TokenStream { public: TS2a() { termAtt = addAttribute(); posIncrAtt = addAttribute(); offsetAtt = addAttribute(); lst = Collection::newInstance(); TokenPtr t = createToken(L"hispeed", 0, 8); t->setPositionIncrement(1); lst.add(t); t = createToken(L"hi", 0, 2); t->setPositionIncrement(0); lst.add(t); t = createToken(L"speed", 3, 8); t->setPositionIncrement(1); lst.add(t); t = createToken(L"10", 8, 10); t->setPositionIncrement(1); lst.add(t); t = createToken(L"foo", 11, 14); t->setPositionIncrement(1); lst.add(t); tokenPos = 0; } virtual ~TS2a() { } protected: Collection lst; int32_t tokenPos; TermAttributePtr termAtt; PositionIncrementAttributePtr posIncrAtt; OffsetAttributePtr offsetAtt; public: virtual bool incrementToken() { if (tokenPos < (int32_t)lst.size()) { TokenPtr token = lst[tokenPos++]; clearAttributes(); termAtt->setTermBuffer(token->term()); posIncrAtt->setPositionIncrement(token->getPositionIncrement()); offsetAtt->setOffset(token->startOffset(), token->endOffset()); return true; } return false; } protected: TokenPtr createToken(const String& term, int32_t start, int32_t offset) { TokenPtr token = newLucene(start, offset); token->setTermBuffer(term); return token; } }; class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { public: HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { } virtual ~HelperHighlightRunner() { } public: virtual void run(Collection expected) { String s = L"Hi-Speed10 foo"; QueryPtr query; HighlighterPtr highlighter; String result; query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"foo"); highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); EXPECT_EQ(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"10"); highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); EXPECT_EQ(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"hi"); highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); EXPECT_EQ(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"speed"); highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); EXPECT_EQ(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"hispeed"); highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); EXPECT_EQ(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"hi speed"); highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); EXPECT_EQ(L"Hi-Speed10 foo", result); // same tests, just put the bigger overlapping token first query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"foo"); highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); EXPECT_EQ(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"10"); highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); EXPECT_EQ(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"hi"); highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); EXPECT_EQ(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"speed"); highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); EXPECT_EQ(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"hispeed"); highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); EXPECT_EQ(L"Hi-Speed10 foo", result); query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"hi speed"); highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); EXPECT_EQ(L"Hi-Speed10 foo", result); } TokenStreamPtr getTS2() { return newLucene(); } TokenStreamPtr getTS2a() { return newLucene(); } }; } TEST_F(HighlighterTest, testOverlapAnalyzer2) { HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } TEST_F(HighlighterTest, testWeightedTermsWithDeletes) { makeIndex(); deleteDocument(); searchIndex(); } LucenePlusPlus-rel_3.0.9/src/test/contrib/memory/000077500000000000000000000000001456444476200220465ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/memory/MemoryIndexTest.cpp000066400000000000000000000171151456444476200256570ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include "TestUtils.h" #include "BaseTokenStreamFixture.h" #include "BufferedReader.h" #include "FileReader.h" #include "StopAnalyzer.h" #include "SimpleAnalyzer.h" #include "StandardAnalyzer.h" #include "Document.h" #include "Field.h" #include "MemoryIndex.h" #include "IndexSearcher.h" #include "TermDocs.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "QueryParser.h" #include "TopDocs.h" #include "Random.h" #include "FileUtils.h" using namespace Lucene; /// Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour, /// returning the same results for queries on some randomish indexes. class MemoryIndexTest : public BaseTokenStreamFixture { public: MemoryIndexTest() { fileDir = FileUtils::joinPath(getTestDir(), L"memory"); queries = HashSet::newInstance(); HashSet test1 = readQueries(L"testqueries.txt"); queries.addAll(test1.begin(), test1.end()); HashSet test2 = readQueries(L"testqueries2.txt"); queries.addAll(test2.begin(), test2.end()); random = newLucene(123); buffer = CharArray::newInstance(20); /// Some terms to be indexed, in addition to random words. /// These terms are commonly used in the queries. TEST_TERMS = Collection::newInstance(); TEST_TERMS.add(L"term"); TEST_TERMS.add(L"tErm"); TEST_TERMS.add(L"TERM"); TEST_TERMS.add(L"telm"); TEST_TERMS.add(L"stop"); TEST_TERMS.add(L"drop"); TEST_TERMS.add(L"roll"); TEST_TERMS.add(L"phrase"); TEST_TERMS.add(L"a"); TEST_TERMS.add(L"c"); TEST_TERMS.add(L"bar"); TEST_TERMS.add(L"blar"); TEST_TERMS.add(L"gack"); TEST_TERMS.add(L"weltbank"); TEST_TERMS.add(L"worlbank"); TEST_TERMS.add(L"hello"); TEST_TERMS.add(L"on"); TEST_TERMS.add(L"the"); TEST_TERMS.add(L"apache"); TEST_TERMS.add(L"Apache"); TEST_TERMS.add(L"copyright"); TEST_TERMS.add(L"Copyright"); } virtual ~MemoryIndexTest() { } protected: String fileDir; HashSet queries; RandomPtr random; CharArray buffer; static const int32_t ITERATIONS; Collection TEST_TERMS; public: /// read a set of queries from a resource file HashSet readQueries(const String& resource) { HashSet queries = HashSet::newInstance(); BufferedReaderPtr reader = newLucene(newLucene(FileUtils::joinPath(fileDir, resource))); String line; while (reader->readLine(line)) { boost::trim(line); if (!line.empty() && !boost::starts_with(line, L"#") && !boost::starts_with(line, L"//")) { queries.add(line); } } reader->close(); return queries; } /// Build a randomish document for both RAMDirectory and MemoryIndex, and run all the queries against it. void checkAgainstRAMDirectory() { StringStream fooField; StringStream termField; // add up to 250 terms to field "foo" int32_t fieldCount = random->nextInt(250) + 1; for (int32_t i = 0; i < fieldCount; ++i) { fooField << L" " << randomTerm(); } // add up to 250 terms to field "foo" int32_t termCount = random->nextInt(250) + 1; for (int32_t i = 0; i < termCount; ++i) { termField << L" " << randomTerm(); } RAMDirectoryPtr ramdir = newLucene(); AnalyzerPtr analyzer = randomAnalyzer(); IndexWriterPtr writer = newLucene(ramdir, analyzer, IndexWriter::MaxFieldLengthUNLIMITED); DocumentPtr doc = newLucene(); FieldPtr field1 = newLucene(L"foo", fooField.str(), Field::STORE_NO, Field::INDEX_ANALYZED); FieldPtr field2 = newLucene(L"term", termField.str(), Field::STORE_NO, Field::INDEX_ANALYZED); doc->add(field1); doc->add(field2); writer->addDocument(doc); writer->close(); MemoryIndexPtr memory = newLucene(); memory->addField(L"foo", fooField.str(), analyzer); memory->addField(L"term", termField.str(), analyzer); checkAllQueries(memory, ramdir, analyzer); } void checkAllQueries(const MemoryIndexPtr& memory, const RAMDirectoryPtr& ramdir, const AnalyzerPtr& analyzer) { IndexSearcherPtr ram = newLucene(ramdir); IndexSearcherPtr mem = memory->createSearcher(); QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"foo", analyzer); for (HashSet::iterator query = queries.begin(); query != queries.end(); ++query) { TopDocsPtr ramDocs = ram->search(qp->parse(*query), 1); TopDocsPtr memDocs = mem->search(qp->parse(*query), 1); EXPECT_EQ(ramDocs->totalHits, memDocs->totalHits); } } AnalyzerPtr randomAnalyzer() { switch (random->nextInt(3)) { case 0: return newLucene(); case 1: return newLucene(LuceneVersion::LUCENE_CURRENT); default: return newLucene(LuceneVersion::LUCENE_CURRENT); } } /// half of the time, returns a random term from TEST_TERMS. /// the other half of the time, returns a random unicode string. String randomTerm() { if (random->nextInt() % 2 == 1) { // return a random TEST_TERM return TEST_TERMS[random->nextInt(TEST_TERMS.size())]; } else { // return a random unicode term return randomString(); } } /// Return a random unicode term, like StressIndexingTest. String randomString() { int32_t end = random->nextInt(20); if (buffer.size() < 1 + end) { buffer.resize((int32_t)((double)(1 + end) * 1.25)); } for (int32_t i = 0; i < end; ++i) { int32_t t = random->nextInt(5); if (t == 0 && i < end - 1) { #ifdef LPP_UNICODE_CHAR_SIZE_2 // Make a surrogate pair // High surrogate buffer[i++] = (wchar_t)nextInt(0xd800, 0xdc00); // Low surrogate buffer[i] = (wchar_t)nextInt(0xdc00, 0xe000); #else buffer[i] = (wchar_t)nextInt(0xdc00, 0xe000); #endif } else if (t <= 1) { buffer[i] = (wchar_t)nextInt(0x01, 0x80); } else if (t == 2) { buffer[i] = (wchar_t)nextInt(0x80, 0x800); } else if (t == 3) { buffer[i] = (wchar_t)nextInt(0x800, 0xd800); } else if (t == 4) { buffer[i] = (wchar_t)nextInt(0xe000, 0xfff0); } } return String(buffer.get(), end); } /// start is inclusive and end is exclusive int32_t nextInt(int32_t start, int32_t end) { return start + random->nextInt(end - start); } }; const int32_t MemoryIndexTest::ITERATIONS = 100; /// runs random tests, up to ITERATIONS times. TEST_F(MemoryIndexTest, testRandomQueries) { for (int32_t i = 0; i < ITERATIONS; ++i) { checkAgainstRAMDirectory(); } } LucenePlusPlus-rel_3.0.9/src/test/contrib/snowball/000077500000000000000000000000001456444476200223575ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/contrib/snowball/SnowballTest.cpp000066400000000000000000000025541456444476200255120ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "BaseTokenStreamFixture.h" #include "SnowballAnalyzer.h" #include "StopAnalyzer.h" using namespace Lucene; typedef BaseTokenStreamFixture SnowballTest; TEST_F(SnowballTest, testEnglish) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT, L"english"); checkAnalyzesTo(a, L"he abhorred accents", newCollection(L"he", L"abhor", L"accent")); } TEST_F(SnowballTest, testStopwords) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT, L"english", StopAnalyzer::ENGLISH_STOP_WORDS_SET()); checkAnalyzesTo(a, L"the quick brown fox jumped", newCollection(L"quick", L"brown", L"fox", L"jump")); } TEST_F(SnowballTest, testReusableTokenStream) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT, L"english"); checkAnalyzesToReuse(a, L"he abhorred accents", newCollection(L"he", L"abhor", L"accent")); checkAnalyzesToReuse(a, L"she abhorred him", newCollection(L"she", L"abhor", L"him")); } LucenePlusPlus-rel_3.0.9/src/test/document/000077500000000000000000000000001456444476200207145ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/document/BinaryDocumentTest.cpp000066400000000000000000000110441456444476200252030ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Field.h" #include "Document.h" #include "MockRAMDirectory.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "IndexReader.h" #include "CompressionTools.h" using namespace Lucene; typedef LuceneTestFixture BinaryDocumentTest; static String binaryValStored = L"this text will be stored as a byte array in the index"; static String binaryValCompressed = L"this text will be also stored and compressed as a byte array in the index"; TEST_F(BinaryDocumentTest, testBinaryFieldInIndex) { ByteArray binaryStored = ByteArray::newInstance(binaryValStored.length() * sizeof(wchar_t)); std::wcsncpy((wchar_t*)binaryStored.get(), binaryValStored.c_str(), binaryValStored.length()); FieldablePtr binaryFldStored = newLucene(L"binaryStored", binaryStored, Field::STORE_YES); FieldablePtr stringFldStored = newLucene(L"stringStored", binaryValStored, Field::STORE_YES, Field::INDEX_NO, Field::TERM_VECTOR_NO); // binary fields with store off are not allowed try { newLucene(L"fail", binaryStored, Field::STORE_NO); } catch (IllegalArgumentException& e) { EXPECT_TRUE(check_exception(LuceneException::IllegalArgument)(e)); } DocumentPtr doc = newLucene(); doc->add(binaryFldStored); doc->add(stringFldStored); // test for field count EXPECT_EQ(2, doc->getFields().size()); // add the doc to a ram index MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(doc); writer->close(); // open a reader and fetch the document IndexReaderPtr reader = IndexReader::open(dir, false); DocumentPtr docFromReader = reader->document(0); EXPECT_TRUE(docFromReader); // fetch the binary stored field and compare it's content with the original one ByteArray storedTest = docFromReader->getBinaryValue(L"binaryStored"); String binaryFldStoredTest((wchar_t*)storedTest.get(), storedTest.size() / sizeof(wchar_t)); EXPECT_EQ(binaryFldStoredTest, binaryValStored); // fetch the string field and compare it's content with the original one String stringFldStoredTest = docFromReader->get(L"stringStored"); EXPECT_EQ(stringFldStoredTest, binaryValStored); // delete the document from index reader->deleteDocument(0); EXPECT_EQ(0, reader->numDocs()); reader->close(); dir->close(); } TEST_F(BinaryDocumentTest, testCompressionTools) { ByteArray binaryCompressed = ByteArray::newInstance(binaryValCompressed.length() * sizeof(wchar_t)); std::wcsncpy((wchar_t*)binaryCompressed.get(), binaryValCompressed.c_str(), binaryValCompressed.length()); FieldablePtr binaryFldCompressed = newLucene(L"binaryCompressed", CompressionTools::compress(binaryCompressed), Field::STORE_YES); FieldablePtr stringFldCompressed = newLucene(L"stringCompressed", CompressionTools::compressString(binaryValCompressed), Field::STORE_YES); DocumentPtr doc = newLucene(); doc->add(binaryFldCompressed); doc->add(stringFldCompressed); // add the doc to a ram index MockRAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(doc); writer->close(); // open a reader and fetch the document IndexReaderPtr reader = IndexReader::open(dir, false); DocumentPtr docFromReader = reader->document(0); EXPECT_TRUE(docFromReader); // fetch the binary compressed field and compare it's content with the original one ByteArray compressTest = CompressionTools::decompress(docFromReader->getBinaryValue(L"binaryCompressed")); String binaryFldCompressedTest((wchar_t*)compressTest.get(), compressTest.size() / sizeof(wchar_t)); EXPECT_EQ(binaryFldCompressedTest, binaryValCompressed); EXPECT_EQ(CompressionTools::decompressString(docFromReader->getBinaryValue(L"stringCompressed")), binaryValCompressed); reader->close(); dir->close(); } LucenePlusPlus-rel_3.0.9/src/test/document/DateFieldTest.cpp000066400000000000000000000021051456444476200240770ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "DateField.h" using namespace Lucene; typedef LuceneTestFixture DateFieldTest; TEST_F(DateFieldTest, testMinDate) { EXPECT_EQ(DateField::MIN_DATE_STRING(), L"000000000"); } TEST_F(DateFieldTest, testMaxDate) { EXPECT_EQ(DateField::MAX_DATE_STRING(), L"zzzzzzzzz"); } TEST_F(DateFieldTest, testDateToString) { EXPECT_EQ(DateField::dateToString(boost::posix_time::ptime(boost::gregorian::date(2010, boost::gregorian::Jan, 14))), L"0g4erxmo0"); } TEST_F(DateFieldTest, testTimeToString) { EXPECT_EQ(DateField::timeToString(1263427200000LL), L"0g4erxmo0"); } TEST_F(DateFieldTest, testStringToTime) { EXPECT_EQ(DateField::stringToTime(L"0g4erxmo0"), 1263427200000LL); } LucenePlusPlus-rel_3.0.9/src/test/document/DateToolsTest.cpp000066400000000000000000000227051456444476200241640ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include #include #include "LuceneTestFixture.h" #include "DateTools.h" using namespace Lucene; using namespace boost::posix_time; using namespace boost::gregorian; typedef LuceneTestFixture DateToolsTest; TEST_F(DateToolsTest, testDateToString) { EXPECT_EQ(DateTools::dateToString(ptime(date(2010, Jan, 14)), DateTools::RESOLUTION_YEAR), L"2010"); EXPECT_EQ(DateTools::dateToString(ptime(date(2010, Jan, 14)), DateTools::RESOLUTION_MONTH), L"201001"); EXPECT_EQ(DateTools::dateToString(ptime(date(2010, Jan, 14)), DateTools::RESOLUTION_DAY), L"20100114"); EXPECT_EQ(DateTools::dateToString(ptime(date(2010, Jan, 14), hours(3) + minutes(41) + seconds(5)), DateTools::RESOLUTION_HOUR), L"2010011403"); EXPECT_EQ(DateTools::dateToString(ptime(date(2010, Jan, 14), hours(3) + minutes(41) + seconds(5)), DateTools::RESOLUTION_MINUTE), L"201001140341"); EXPECT_EQ(DateTools::dateToString(ptime(date(2010, Jan, 14), hours(3) + minutes(41) + seconds(5)), DateTools::RESOLUTION_SECOND), L"20100114034105"); EXPECT_EQ(DateTools::dateToString(ptime(date(2010, Jan, 14), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_MILLISECOND), L"20100114034105123"); } TEST_F(DateToolsTest, testTimeToString) { EXPECT_EQ(DateTools::timeToString(1263427200000LL, DateTools::RESOLUTION_YEAR), L"2010"); EXPECT_EQ(DateTools::timeToString(1263427200000LL, DateTools::RESOLUTION_MONTH), L"201001"); EXPECT_EQ(DateTools::timeToString(1263427200000LL, DateTools::RESOLUTION_DAY), L"20100114"); EXPECT_EQ(DateTools::timeToString(1263440465000LL, DateTools::RESOLUTION_HOUR), L"2010011403"); EXPECT_EQ(DateTools::timeToString(1263440465000LL, DateTools::RESOLUTION_MINUTE), L"201001140341"); EXPECT_EQ(DateTools::timeToString(1263440465000LL, DateTools::RESOLUTION_SECOND), L"20100114034105"); EXPECT_EQ(DateTools::timeToString(1263440465123LL, DateTools::RESOLUTION_MILLISECOND), L"20100114034105123"); } TEST_F(DateToolsTest, testStringToTime) { EXPECT_EQ(DateTools::stringToTime(L"2010"), 1262304000000LL); EXPECT_EQ(DateTools::stringToTime(L"201001"), 1262304000000LL); EXPECT_EQ(DateTools::stringToTime(L"20100114"), 1263427200000LL); EXPECT_EQ(DateTools::stringToTime(L"2010011403"), 1263438000000LL); EXPECT_EQ(DateTools::stringToTime(L"201001140341"), 1263440460000LL); EXPECT_EQ(DateTools::stringToTime(L"20100114034105"), 1263440465000LL); EXPECT_EQ(DateTools::stringToTime(L"20100114034105123"), 1263440465123LL); } TEST_F(DateToolsTest, testDateRound) { EXPECT_EQ(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_YEAR), ptime(date(2010, Jan, 1))); EXPECT_EQ(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_MONTH), ptime(date(2010, Feb, 1))); EXPECT_EQ(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_DAY), ptime(date(2010, Feb, 16))); EXPECT_EQ(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_HOUR), ptime(date(2010, Feb, 16), hours(3))); EXPECT_EQ(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_MINUTE), ptime(date(2010, Feb, 16), hours(3) + minutes(41))); EXPECT_EQ(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_SECOND), ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5))); EXPECT_EQ(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_MILLISECOND), ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123))); } TEST_F(DateToolsTest, testParseDateGB) { DateTools::setDateOrder(DateTools::DATEORDER_DMY); EXPECT_EQ(DateTools::parseDate(L"01122005"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"011205"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"01/12/2005"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"01/12/05"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"1/12/2005"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"1/12/05"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"1/1/05"), ptime(date(2005, 01, 01))); EXPECT_EQ(DateTools::parseDate(L"1/Jan/05"), ptime(date(2005, 01, 01))); EXPECT_EQ(DateTools::parseDate(L"01/Jan/05"), ptime(date(2005, 01, 01))); EXPECT_EQ(DateTools::parseDate(L"01/Jan/2005"), ptime(date(2005, 01, 01))); } TEST_F(DateToolsTest, testParseDateUS) { DateTools::setDateOrder(DateTools::DATEORDER_MDY); EXPECT_EQ(DateTools::parseDate(L"12012005"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"120105"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"12/01/2005"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"12/01/05"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"12/1/2005"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"12/1/05"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"1/1/05"), ptime(date(2005, 01, 01))); EXPECT_EQ(DateTools::parseDate(L"Jan/1/05"), ptime(date(2005, 01, 01))); EXPECT_EQ(DateTools::parseDate(L"Jan/01/05"), ptime(date(2005, 01, 01))); EXPECT_EQ(DateTools::parseDate(L"Jan/01/2005"), ptime(date(2005, 01, 01))); } TEST_F(DateToolsTest, testParseDateLocale) { bool hasThisLocale = false; try { std::locale("en_GB.UTF-8"); hasThisLocale = true; } catch (...) { } if (hasThisLocale) { DateTools::setDateOrder(DateTools::DATEORDER_LOCALE); EXPECT_EQ(DateTools::parseDate(L"01122005", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"011205", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"01/12/2005", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"01/12/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"1/12/2005", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"1/12/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"1/1/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 01, 01))); EXPECT_EQ(DateTools::parseDate(L"1/Jan/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 01, 01))); EXPECT_EQ(DateTools::parseDate(L"01/Jan/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 01, 01))); EXPECT_EQ(DateTools::parseDate(L"01/Jan/2005", std::locale("en_GB.UTF-8")), ptime(date(2005, 01, 01))); } try { std::locale("en_US.UTF-8"); hasThisLocale = true; } catch (...) { hasThisLocale = false; } if (hasThisLocale) { DateTools::setDateOrder(DateTools::DATEORDER_LOCALE); EXPECT_EQ(DateTools::parseDate(L"12012005", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"120105", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"12/01/2005", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"12/01/05", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"12/1/2005", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"12/1/05", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"1/1/05", std::locale("en_US.UTF-8")), ptime(date(2005, 01, 01))); EXPECT_EQ(DateTools::parseDate(L"Jan/1/05", std::locale("en_US.UTF-8")), ptime(date(2005, 01, 01))); EXPECT_EQ(DateTools::parseDate(L"Jan/01/05", std::locale("en_US.UTF-8")), ptime(date(2005, 01, 01))); EXPECT_EQ(DateTools::parseDate(L"Jan/01/2005", std::locale("en_US.UTF-8")), ptime(date(2005, 01, 01))); } } TEST_F(DateToolsTest, testParseDateSeparator) { DateTools::setDateOrder(DateTools::DATEORDER_DMY); EXPECT_EQ(DateTools::parseDate(L"01122005"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"011205"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"01-12-2005"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"01 12 05"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"1.12.2005"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"1.12.05"), ptime(date(2005, 12, 01))); EXPECT_EQ(DateTools::parseDate(L"1 1 05"), ptime(date(2005, 01, 01))); EXPECT_EQ(DateTools::parseDate(L"1 Jan 05"), ptime(date(2005, 01, 01))); EXPECT_EQ(DateTools::parseDate(L"01-Jan-05"), ptime(date(2005, 01, 01))); EXPECT_EQ(DateTools::parseDate(L"01,Jan,2005"), ptime(date(2005, 01, 01))); } LucenePlusPlus-rel_3.0.9/src/test/document/DocumentTest.cpp000066400000000000000000000237321456444476200240450ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "Document.h" #include "Field.h" #include "RAMDirectory.h" #include "IndexWriter.h" #include "StandardAnalyzer.h" #include "IndexSearcher.h" #include "TermQuery.h" #include "Term.h" #include "ScoreDoc.h" #include "TopDocs.h" using namespace Lucene; typedef LuceneTestFixture DocumentTest; static String binaryVal = L"this text will be stored as a byte array in the index"; static String binaryVal2 = L"this text will be also stored as a byte array in the index"; static DocumentPtr makeDocumentWithFields() { DocumentPtr doc = newLucene(); doc->add(newLucene(L"keyword", L"test1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"keyword", L"test2", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"text", L"test1", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"text", L"test2", Field::STORE_YES, Field::INDEX_ANALYZED)); doc->add(newLucene(L"unindexed", L"test1", Field::STORE_YES, Field::INDEX_NO)); doc->add(newLucene(L"unindexed", L"test2", Field::STORE_YES, Field::INDEX_NO)); doc->add(newLucene(L"unstored", L"test1", Field::STORE_NO, Field::INDEX_ANALYZED)); doc->add(newLucene(L"unstored", L"test2", Field::STORE_NO, Field::INDEX_ANALYZED)); return doc; } static void checkDocument(const DocumentPtr& doc, bool fromIndex) { Collection keywordFieldValues = doc->getValues(L"keyword"); Collection textFieldValues = doc->getValues(L"text"); Collection unindexedFieldValues = doc->getValues(L"unindexed"); Collection unstoredFieldValues = doc->getValues(L"unstored"); EXPECT_EQ(keywordFieldValues.size(), 2); EXPECT_EQ(textFieldValues.size(), 2); EXPECT_EQ(unindexedFieldValues.size(), 2); // this test cannot work for documents retrieved from the index since unstored fields will obviously not be returned if (!fromIndex) { EXPECT_EQ(unstoredFieldValues.size(), 2); } EXPECT_EQ(keywordFieldValues[0], L"test1"); EXPECT_EQ(keywordFieldValues[1], L"test2"); EXPECT_EQ(textFieldValues[0], L"test1"); EXPECT_EQ(textFieldValues[1], L"test2"); EXPECT_EQ(unindexedFieldValues[0], L"test1"); EXPECT_EQ(unindexedFieldValues[1], L"test2"); // this test cannot work for documents retrieved from the index since unstored fields will obviously not be returned if (!fromIndex) { EXPECT_EQ(unstoredFieldValues[0], L"test1"); EXPECT_EQ(unstoredFieldValues[1], L"test2"); } } TEST_F(DocumentTest, testBinaryField) { DocumentPtr doc = newLucene(); FieldablePtr stringFld = newLucene(L"string", binaryVal, Field::STORE_YES, Field::INDEX_NO); ByteArray binaryBytes1 = ByteArray::newInstance(binaryVal.length() * sizeof(wchar_t)); std::wcsncpy((wchar_t*)binaryBytes1.get(), binaryVal.c_str(), binaryVal.length()); FieldablePtr binaryFld = newLucene(L"binary", binaryBytes1, Field::STORE_YES); ByteArray binaryBytes2 = ByteArray::newInstance(binaryVal2.length() * sizeof(wchar_t)); std::wcsncpy((wchar_t*)binaryBytes2.get(), binaryVal2.c_str(), binaryVal2.length()); FieldablePtr binaryFld2 = newLucene(L"binary", binaryBytes2, Field::STORE_YES); doc->add(stringFld); doc->add(binaryFld); EXPECT_EQ(2, doc->getFields().size()); EXPECT_TRUE(binaryFld->isBinary()); EXPECT_TRUE(binaryFld->isStored()); EXPECT_TRUE(!binaryFld->isIndexed()); EXPECT_TRUE(!binaryFld->isTokenized()); ByteArray bytesTest = doc->getBinaryValue(L"binary"); String binaryTest((wchar_t*)bytesTest.get(), bytesTest.size() / sizeof(wchar_t)); EXPECT_EQ(binaryTest, binaryVal); String stringTest = doc->get(L"string"); EXPECT_EQ(binaryTest, stringTest); doc->add(binaryFld2); EXPECT_EQ(3, doc->getFields().size()); Collection binaryTests = doc->getBinaryValues(L"binary"); EXPECT_EQ(2, binaryTests.size()); bytesTest = binaryTests[0]; binaryTest = String((wchar_t*)bytesTest.get(), bytesTest.size() / sizeof(wchar_t)); ByteArray bytesTest2 = binaryTests[1]; String binaryTest2((wchar_t*)bytesTest2.get(), bytesTest2.size() / sizeof(wchar_t)); EXPECT_NE(binaryTest, binaryTest2); EXPECT_EQ(binaryTest, binaryVal); EXPECT_EQ(binaryTest2, binaryVal2); doc->removeField(L"string"); EXPECT_EQ(2, doc->getFields().size()); doc->removeFields(L"binary"); EXPECT_EQ(0, doc->getFields().size()); } /// Tests {@link Document#removeField(String)} method for a brand new Document that has not been indexed yet. TEST_F(DocumentTest, testRemoveForNewDocument) { DocumentPtr doc = makeDocumentWithFields(); EXPECT_EQ(8, doc->getFields().size()); doc->removeFields(L"keyword"); EXPECT_EQ(6, doc->getFields().size()); doc->removeFields(L"doesnotexists"); // removing non-existing fields is silently ignored doc->removeFields(L"keyword"); // removing a field more than once EXPECT_EQ(6, doc->getFields().size()); doc->removeField(L"text"); EXPECT_EQ(5, doc->getFields().size()); doc->removeField(L"text"); EXPECT_EQ(4, doc->getFields().size()); doc->removeField(L"text"); EXPECT_EQ(4, doc->getFields().size()); doc->removeField(L"doesnotexists"); // removing non-existing fields is silently ignored EXPECT_EQ(4, doc->getFields().size()); doc->removeFields(L"unindexed"); EXPECT_EQ(2, doc->getFields().size()); doc->removeFields(L"unstored"); EXPECT_EQ(0, doc->getFields().size()); doc->removeFields(L"doesnotexists"); // removing non-existing fields is silently ignored EXPECT_EQ(0, doc->getFields().size()); } TEST_F(DocumentTest, testConstructorExceptions) { newLucene(L"name", L"value", Field::STORE_YES, Field::INDEX_NO); // ok newLucene(L"name", L"value", Field::STORE_NO, Field::INDEX_NOT_ANALYZED); // ok try { newLucene(L"name", L"value", Field::STORE_NO, Field::INDEX_NO); } catch (IllegalArgumentException& e) { EXPECT_TRUE(check_exception(LuceneException::IllegalArgument)(e)); } newLucene(L"name", L"value", Field::STORE_YES, Field::INDEX_NO, Field::TERM_VECTOR_NO); // ok try { newLucene(L"name", L"value", Field::STORE_YES, Field::INDEX_NO, Field::TERM_VECTOR_YES); } catch (IllegalArgumentException& e) { EXPECT_TRUE(check_exception(LuceneException::IllegalArgument)(e)); } } /// Tests {@link Document#getValues(String)} method for a brand new Document that has not been indexed yet. TEST_F(DocumentTest, testGetValuesForNewDocument) { checkDocument(makeDocumentWithFields(), false); } /// Tests {@link Document#getValues(String)} method for a Document retrieved from an index. TEST_F(DocumentTest, testGetValuesForIndexedDocument) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(makeDocumentWithFields()); writer->close(); SearcherPtr searcher = newLucene(dir, true); // search for something that does exists QueryPtr query = newLucene(newLucene(L"keyword", L"test1")); // ensure that queries return expected results without DateFilter first Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; EXPECT_EQ(1, hits.size()); checkDocument(searcher->doc(hits[0]->doc), true); searcher->close(); } TEST_F(DocumentTest, testFieldSetValue) { FieldPtr field = newLucene(L"id", L"id1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED); DocumentPtr doc = newLucene(); doc->add(field); doc->add(newLucene(L"keyword", L"test", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(doc); field->setValue(L"id2"); writer->addDocument(doc); field->setValue(L"id3"); writer->addDocument(doc); writer->close(); SearcherPtr searcher = newLucene(dir, true); QueryPtr query = newLucene(newLucene(L"keyword", L"test")); // ensure that queries return expected results without DateFilter first Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; EXPECT_EQ(3, hits.size()); int32_t result = 0; for (int32_t i = 0; i < 3; ++i) { DocumentPtr doc2 = searcher->doc(hits[i]->doc); FieldPtr f = doc2->getField(L"id"); if (f->stringValue() == L"id1") { result |= 1; } else if (f->stringValue() == L"id2") { result |= 2; } else if (f->stringValue() == L"id3") { result |= 4; } else { FAIL() << "unexpected id field"; } } searcher->close(); dir->close(); EXPECT_EQ(7, result); } TEST_F(DocumentTest, testFieldSetValueChangeBinary) { FieldPtr field1 = newLucene(L"field1", ByteArray::newInstance(0), Field::STORE_YES); FieldPtr field2 = newLucene(L"field2", L"", Field::STORE_YES, Field::INDEX_ANALYZED); try { field1->setValue(L"abc"); } catch (IllegalArgumentException& e) { EXPECT_TRUE(check_exception(LuceneException::IllegalArgument)(e)); } try { field2->setValue(ByteArray::newInstance(0)); } catch (IllegalArgumentException& e) { EXPECT_TRUE(check_exception(LuceneException::IllegalArgument)(e)); } } LucenePlusPlus-rel_3.0.9/src/test/document/NumberToolsTest.cpp000066400000000000000000000046041456444476200245350ustar00rootroot00000000000000///////////////////////////////////////////////////////////////////////////// // Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "TestInc.h" #include "LuceneTestFixture.h" #include "NumberTools.h" using namespace Lucene; typedef LuceneTestFixture NumberToolsTest; TEST_F(NumberToolsTest, testMinValue) { EXPECT_EQ(NumberTools::MIN_STRING_VALUE(), L"-0000000000000"); } TEST_F(NumberToolsTest, testMaxValue) { EXPECT_EQ(NumberTools::MAX_STRING_VALUE(), L"01y2p0ij32e8e7"); } TEST_F(NumberToolsTest, testValueSize) { EXPECT_EQ(NumberTools::STR_SIZE(), 14); } TEST_F(NumberToolsTest, testLongToString) { EXPECT_EQ(NumberTools::longToString(LLONG_MIN), L"-0000000000000"); EXPECT_EQ(NumberTools::longToString(LLONG_MAX), L"01y2p0ij32e8e7"); EXPECT_EQ(NumberTools::longToString(1LL), L"00000000000001"); EXPECT_EQ(NumberTools::longToString(999LL), L"000000000000rr"); EXPECT_EQ(NumberTools::longToString(34234LL), L"00000000000qey"); EXPECT_EQ(NumberTools::longToString(4345325254LL), L"00000001zv3efa"); EXPECT_EQ(NumberTools::longToString(986778657657575LL), L"00009ps7uuwdlz"); EXPECT_EQ(NumberTools::longToString(23232143543434234LL), L"0006cr3vell8my"); } TEST_F(NumberToolsTest, testStringToLong) { EXPECT_EQ(NumberTools::stringToLong(L"-0000000000000"), LLONG_MIN); EXPECT_EQ(NumberTools::stringToLong(L"01y2p0ij32e8e7"), LLONG_MAX); EXPECT_EQ(NumberTools::stringToLong(L"00000000000001"), 1LL); EXPECT_EQ(NumberTools::stringToLong(L"000000000000rr"), 999LL); EXPECT_EQ(NumberTools::stringToLong(L"00000000000qey"), 34234LL); EXPECT_EQ(NumberTools::stringToLong(L"00000001zv3efa"), 4345325254LL); EXPECT_EQ(NumberTools::stringToLong(L"00009ps7uuwdlz"), 986778657657575LL); EXPECT_EQ(NumberTools::stringToLong(L"0006cr3vell8my"), 23232143543434234LL); try { NumberTools::stringToLong(L"32132"); } catch (LuceneException& e) { EXPECT_TRUE(check_exception(LuceneException::NumberFormat)(e)); // wrong length } try { NumberTools::stringToLong(L"9006cr3vell8my"); } catch (LuceneException& e) { EXPECT_TRUE(check_exception(LuceneException::NumberFormat)(e)); // wrong prefix } } LucenePlusPlus-rel_3.0.9/src/test/gtest/000077500000000000000000000000001456444476200202245ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/gtest/.clang-format000066400000000000000000000001641456444476200226000ustar00rootroot00000000000000# Run manually to reformat a file: # clang-format -i --style=file Language: Cpp BasedOnStyle: Google LucenePlusPlus-rel_3.0.9/src/test/gtest/.gitignore000066400000000000000000000030311456444476200222110ustar00rootroot00000000000000# Ignore CI build directory build/ xcuserdata cmake-build-debug/ .idea/ bazel-bin bazel-genfiles bazel-googletest bazel-out bazel-testlogs # python *.pyc # Visual Studio files .vs *.sdf *.opensdf *.VC.opendb *.suo *.user _ReSharper.Caches/ Win32-Debug/ Win32-Release/ x64-Debug/ x64-Release/ # Ignore autoconf / automake files Makefile.in aclocal.m4 configure build-aux/ autom4te.cache/ googletest/m4/libtool.m4 googletest/m4/ltoptions.m4 googletest/m4/ltsugar.m4 googletest/m4/ltversion.m4 googletest/m4/lt~obsolete.m4 googlemock/m4 # Ignore generated directories. googlemock/fused-src/ googletest/fused-src/ # macOS files .DS_Store googletest/.DS_Store googletest/xcode/.DS_Store # Ignore cmake generated directories and files. CMakeFiles CTestTestfile.cmake Makefile cmake_install.cmake googlemock/CMakeFiles googlemock/CTestTestfile.cmake googlemock/Makefile googlemock/cmake_install.cmake googlemock/gtest /bin /googlemock/gmock.dir /googlemock/gmock_main.dir /googlemock/RUN_TESTS.vcxproj.filters /googlemock/RUN_TESTS.vcxproj /googlemock/INSTALL.vcxproj.filters /googlemock/INSTALL.vcxproj /googlemock/gmock_main.vcxproj.filters /googlemock/gmock_main.vcxproj /googlemock/gmock.vcxproj.filters /googlemock/gmock.vcxproj /googlemock/gmock.sln /googlemock/ALL_BUILD.vcxproj.filters /googlemock/ALL_BUILD.vcxproj /lib /Win32 /ZERO_CHECK.vcxproj.filters /ZERO_CHECK.vcxproj /RUN_TESTS.vcxproj.filters /RUN_TESTS.vcxproj /INSTALL.vcxproj.filters /INSTALL.vcxproj /googletest-distribution.sln /CMakeCache.txt /ALL_BUILD.vcxproj.filters /ALL_BUILD.vcxproj LucenePlusPlus-rel_3.0.9/src/test/gtest/.travis.yml000066400000000000000000000046061456444476200223430ustar00rootroot00000000000000# Build matrix / environment variable are explained on: # https://docs.travis-ci.com/user/customizing-the-build/ # This file can be validated on: # http://lint.travis-ci.org/ language: cpp # Define the matrix explicitly, manually expanding the combinations of (os, compiler, env). # It is more tedious, but grants us far more flexibility. matrix: include: - os: linux before_install: chmod -R +x ./ci/*platformio.sh install: ./ci/install-platformio.sh script: ./ci/build-platformio.sh - os: linux dist: xenial compiler: gcc install: ./ci/install-linux.sh && ./ci/log-config.sh script: ./ci/build-linux-bazel.sh - os: linux dist: xenial compiler: clang install: ./ci/install-linux.sh && ./ci/log-config.sh script: ./ci/build-linux-bazel.sh - os: linux compiler: gcc env: BUILD_TYPE=Debug VERBOSE=1 CXX_FLAGS=-std=c++11 - os: linux compiler: clang env: BUILD_TYPE=Release VERBOSE=1 CXX_FLAGS=-std=c++11 -Wgnu-zero-variadic-macro-arguments - os: linux compiler: clang env: BUILD_TYPE=Release VERBOSE=1 CXX_FLAGS=-std=c++11 NO_EXCEPTION=ON NO_RTTI=ON COMPILER_IS_GNUCXX=ON - os: osx compiler: gcc env: BUILD_TYPE=Release VERBOSE=1 CXX_FLAGS=-std=c++11 HOMEBREW_LOGS=~/homebrew-logs HOMEBREW_TEMP=~/homebrew-temp - os: osx compiler: clang env: BUILD_TYPE=Release VERBOSE=1 CXX_FLAGS=-std=c++11 HOMEBREW_LOGS=~/homebrew-logs HOMEBREW_TEMP=~/homebrew-temp # These are the install and build (script) phases for the most common entries in the matrix. They could be included # in each entry in the matrix, but that is just repetitive. install: - ./ci/install-${TRAVIS_OS_NAME}.sh - . ./ci/env-${TRAVIS_OS_NAME}.sh - ./ci/log-config.sh script: ./ci/travis.sh # This section installs the necessary dependencies. addons: apt: # List of whitelisted in travis packages for ubuntu-precise can be found here: # https://github.com/travis-ci/apt-package-whitelist/blob/master/ubuntu-precise # List of whitelisted in travis apt-sources: # https://github.com/travis-ci/apt-source-whitelist/blob/master/ubuntu.json sources: - ubuntu-toolchain-r-test - llvm-toolchain-precise-3.9 packages: - g++-4.9 - clang-3.9 update: true homebrew: packages: - ccache - gcc@4.9 - llvm@4 update: true notifications: email: false LucenePlusPlus-rel_3.0.9/src/test/gtest/BUILD.bazel000066400000000000000000000126241456444476200221070ustar00rootroot00000000000000# Copyright 2017 Google Inc. # All Rights Reserved. # # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following disclaimer # in the documentation and/or other materials provided with the # distribution. # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Bazel Build for Google C++ Testing Framework(Google Test) load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") package(default_visibility = ["//visibility:public"]) licenses(["notice"]) config_setting( name = "windows", constraint_values = ["@bazel_tools//platforms:windows"], ) config_setting( name = "has_absl", values = {"define": "absl=1"}, ) # Library that defines the FRIEND_TEST macro. cc_library( name = "gtest_prod", hdrs = ["googletest/include/gtest/gtest_prod.h"], includes = ["googletest/include"], ) # Google Test including Google Mock cc_library( name = "gtest", srcs = glob( include = [ "googletest/src/*.cc", "googletest/src/*.h", "googletest/include/gtest/**/*.h", "googlemock/src/*.cc", "googlemock/include/gmock/**/*.h", ], exclude = [ "googletest/src/gtest-all.cc", "googletest/src/gtest_main.cc", "googlemock/src/gmock-all.cc", "googlemock/src/gmock_main.cc", ], ), hdrs = glob([ "googletest/include/gtest/*.h", "googlemock/include/gmock/*.h", ]), copts = select({ ":windows": [], "//conditions:default": ["-pthread"], }), defines = select({ ":has_absl": ["GTEST_HAS_ABSL=1"], "//conditions:default": [], }), features = select({ ":windows": ["windows_export_all_symbols"], "//conditions:default": [], }), includes = [ "googlemock", "googlemock/include", "googletest", "googletest/include", ], linkopts = select({ ":windows": [], "//conditions:default": ["-pthread"], }), deps = select({ ":has_absl": [ "@com_google_absl//absl/debugging:failure_signal_handler", "@com_google_absl//absl/debugging:stacktrace", "@com_google_absl//absl/debugging:symbolize", "@com_google_absl//absl/strings", "@com_google_absl//absl/types:optional", "@com_google_absl//absl/types:variant", ], "//conditions:default": [], }), ) cc_library( name = "gtest_main", srcs = ["googlemock/src/gmock_main.cc"], features = select({ ":windows": ["windows_export_all_symbols"], "//conditions:default": [], }), deps = [":gtest"], ) # The following rules build samples of how to use gTest. cc_library( name = "gtest_sample_lib", srcs = [ "googletest/samples/sample1.cc", "googletest/samples/sample2.cc", "googletest/samples/sample4.cc", ], hdrs = [ "googletest/samples/prime_tables.h", "googletest/samples/sample1.h", "googletest/samples/sample2.h", "googletest/samples/sample3-inl.h", "googletest/samples/sample4.h", ], features = select({ ":windows": ["windows_export_all_symbols"], "//conditions:default": [], }), ) cc_test( name = "gtest_samples", size = "small", # All Samples except: # sample9 (main) # sample10 (main and takes a command line option and needs to be separate) srcs = [ "googletest/samples/sample1_unittest.cc", "googletest/samples/sample2_unittest.cc", "googletest/samples/sample3_unittest.cc", "googletest/samples/sample4_unittest.cc", "googletest/samples/sample5_unittest.cc", "googletest/samples/sample6_unittest.cc", "googletest/samples/sample7_unittest.cc", "googletest/samples/sample8_unittest.cc", ], linkstatic = 0, deps = [ "gtest_sample_lib", ":gtest_main", ], ) cc_test( name = "sample9_unittest", size = "small", srcs = ["googletest/samples/sample9_unittest.cc"], deps = [":gtest"], ) cc_test( name = "sample10_unittest", size = "small", srcs = ["googletest/samples/sample10_unittest.cc"], deps = [":gtest"], ) LucenePlusPlus-rel_3.0.9/src/test/gtest/CMakeLists.txt000066400000000000000000000015671456444476200227750ustar00rootroot00000000000000# Note: CMake support is community-based. The maintainers do not use CMake # internally. cmake_minimum_required(VERSION 2.8.8) if (POLICY CMP0048) cmake_policy(SET CMP0048 NEW) endif (POLICY CMP0048) project(googletest-distribution) set(GOOGLETEST_VERSION 1.10.0) if (CMAKE_VERSION VERSION_LESS "3.1") add_definitions(-std=c++11) else() set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) if(NOT CYGWIN) set(CMAKE_CXX_EXTENSIONS OFF) endif() endif() enable_testing() include(CMakeDependentOption) include(GNUInstallDirs) #Note that googlemock target already builds googletest option(BUILD_GMOCK "Builds the googlemock subproject" ON) option(INSTALL_GTEST "Enable installation of googletest. (Projects embedding googletest may want to turn this OFF.)" ON) if(BUILD_GMOCK) add_subdirectory( googlemock ) else() add_subdirectory( googletest ) endif() LucenePlusPlus-rel_3.0.9/src/test/gtest/CONTRIBUTING.md000066400000000000000000000140331456444476200224560ustar00rootroot00000000000000# How to become a contributor and submit your own code ## Contributor License Agreements We'd love to accept your patches! Before we can take them, we have to jump a couple of legal hurdles. Please fill out either the individual or corporate Contributor License Agreement (CLA). * If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](https://developers.google.com/open-source/cla/individual). * If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](https://developers.google.com/open-source/cla/corporate). Follow either of the two links above to access the appropriate CLA and instructions for how to sign and return it. Once we receive it, we'll be able to accept your pull requests. ## Are you a Googler? If you are a Googler, please make an attempt to submit an internal change rather than a GitHub Pull Request. If you are not able to submit an internal change a PR is acceptable as an alternative. ## Contributing A Patch 1. Submit an issue describing your proposed change to the [issue tracker](https://github.com/google/googletest). 2. Please don't mix more than one logical change per submittal, because it makes the history hard to follow. If you want to make a change that doesn't have a corresponding issue in the issue tracker, please create one. 3. Also, coordinate with team members that are listed on the issue in question. This ensures that work isn't being duplicated and communicating your plan early also generally leads to better patches. 4. If your proposed change is accepted, and you haven't already done so, sign a Contributor License Agreement (see details above). 5. Fork the desired repo, develop and test your code changes. 6. Ensure that your code adheres to the existing style in the sample to which you are contributing. 7. Ensure that your code has an appropriate set of unit tests which all pass. 8. Submit a pull request. ## The Google Test and Google Mock Communities The Google Test community exists primarily through the [discussion group](http://groups.google.com/group/googletestframework) and the GitHub repository. Likewise, the Google Mock community exists primarily through their own [discussion group](http://groups.google.com/group/googlemock). You are definitely encouraged to contribute to the discussion and you can also help us to keep the effectiveness of the group high by following and promoting the guidelines listed here. ### Please Be Friendly Showing courtesy and respect to others is a vital part of the Google culture, and we strongly encourage everyone participating in Google Test development to join us in accepting nothing less. Of course, being courteous is not the same as failing to constructively disagree with each other, but it does mean that we should be respectful of each other when enumerating the 42 technical reasons that a particular proposal may not be the best choice. There's never a reason to be antagonistic or dismissive toward anyone who is sincerely trying to contribute to a discussion. Sure, C++ testing is serious business and all that, but it's also a lot of fun. Let's keep it that way. Let's strive to be one of the friendliest communities in all of open source. As always, discuss Google Test in the official GoogleTest discussion group. You don't have to actually submit code in order to sign up. Your participation itself is a valuable contribution. ## Style To keep the source consistent, readable, diffable and easy to merge, we use a fairly rigid coding style, as defined by the [google-styleguide](https://github.com/google/styleguide) project. All patches will be expected to conform to the style outlined [here](https://google.github.io/styleguide/cppguide.html). Use [.clang-format](https://github.com/google/googletest/blob/master/.clang-format) to check your formatting ## Requirements for Contributors If you plan to contribute a patch, you need to build Google Test, Google Mock, and their own tests from a git checkout, which has further requirements: * [Python](https://www.python.org/) v2.3 or newer (for running some of the tests and re-generating certain source files from templates) * [CMake](https://cmake.org/) v2.6.4 or newer ## Developing Google Test and Google Mock This section discusses how to make your own changes to the Google Test project. ### Testing Google Test and Google Mock Themselves To make sure your changes work as intended and don't break existing functionality, you'll want to compile and run Google Test and GoogleMock's own tests. For that you can use CMake: mkdir mybuild cd mybuild cmake -Dgtest_build_tests=ON -Dgmock_build_tests=ON ${GTEST_REPO_DIR} To choose between building only Google Test or Google Mock, you may modify your cmake command to be one of each cmake -Dgtest_build_tests=ON ${GTEST_DIR} # sets up Google Test tests cmake -Dgmock_build_tests=ON ${GMOCK_DIR} # sets up Google Mock tests Make sure you have Python installed, as some of Google Test's tests are written in Python. If the cmake command complains about not being able to find Python (`Could NOT find PythonInterp (missing: PYTHON_EXECUTABLE)`), try telling it explicitly where your Python executable can be found: cmake -DPYTHON_EXECUTABLE=path/to/python ... Next, you can build Google Test and / or Google Mock and all desired tests. On \*nix, this is usually done by make To run the tests, do make test All tests should pass. ### Regenerating Source Files Some of Google Test's source files are generated from templates (not in the C++ sense) using a script. For example, the file include/gtest/internal/gtest-type-util.h.pump is used to generate gtest-type-util.h in the same directory. You don't need to worry about regenerating the source files unless you need to modify them. You would then modify the corresponding `.pump` files and run the '[pump.py](googletest/scripts/pump.py)' generator script. See the [Pump Manual](googletest/docs/pump_manual.md). LucenePlusPlus-rel_3.0.9/src/test/gtest/LICENSE000066400000000000000000000027031456444476200212330ustar00rootroot00000000000000Copyright 2008, Google Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. LucenePlusPlus-rel_3.0.9/src/test/gtest/README.md000066400000000000000000000111211456444476200214770ustar00rootroot00000000000000# Google Test #### OSS Builds Status: [![Build Status](https://api.travis-ci.org/google/googletest.svg?branch=master)](https://travis-ci.org/google/googletest) [![Build status](https://ci.appveyor.com/api/projects/status/4o38plt0xbo1ubc8/branch/master?svg=true)](https://ci.appveyor.com/project/GoogleTestAppVeyor/googletest/branch/master) ### Future Plans #### 1.8.x Release: [the 1.8.x](https://github.com/google/googletest/releases/tag/release-1.8.1) is the last release that works with pre-C++11 compilers. The 1.8.x will not accept any requests for any new features and any bugfix requests will only be accepted if proven "critical" #### Post 1.8.x: On-going work to improve/cleanup/pay technical debt. When this work is completed there will be a 1.9.x tagged release #### Post 1.9.x Post 1.9.x googletest will follow [Abseil Live at Head philosophy](https://abseil.io/about/philosophy) ## Welcome to **Google Test**, Google's C++ test framework! This repository is a merger of the formerly separate GoogleTest and GoogleMock projects. These were so closely related that it makes sense to maintain and release them together. Please subscribe to the mailing list at googletestframework@googlegroups.com for questions, discussions, and development. ### Getting started: The information for **Google Test** is available in the [Google Test Primer](googletest/docs/primer.md) documentation. **Google Mock** is an extension to Google Test for writing and using C++ mock classes. See the separate [Google Mock documentation](googlemock/README.md). More detailed documentation for googletest is in its interior [googletest/README.md](googletest/README.md) file. ## Features * An [xUnit](https://en.wikipedia.org/wiki/XUnit) test framework. * Test discovery. * A rich set of assertions. * User-defined assertions. * Death tests. * Fatal and non-fatal failures. * Value-parameterized tests. * Type-parameterized tests. * Various options for running the tests. * XML test report generation. ## Platforms Google test has been used on a variety of platforms: * Linux * Mac OS X * Windows * Cygwin * MinGW * Windows Mobile * Symbian * PlatformIO ## Who Is Using Google Test? In addition to many internal projects at Google, Google Test is also used by the following notable projects: * The [Chromium projects](http://www.chromium.org/) (behind the Chrome browser and Chrome OS). * The [LLVM](http://llvm.org/) compiler. * [Protocol Buffers](https://github.com/google/protobuf), Google's data interchange format. * The [OpenCV](http://opencv.org/) computer vision library. * [tiny-dnn](https://github.com/tiny-dnn/tiny-dnn): header only, dependency-free deep learning framework in C++11. ## Related Open Source Projects [GTest Runner](https://github.com/nholthaus/gtest-runner) is a Qt5 based automated test-runner and Graphical User Interface with powerful features for Windows and Linux platforms. [Google Test UI](https://github.com/ospector/gtest-gbar) is test runner that runs your test binary, allows you to track its progress via a progress bar, and displays a list of test failures. Clicking on one shows failure text. Google Test UI is written in C#. [GTest TAP Listener](https://github.com/kinow/gtest-tap-listener) is an event listener for Google Test that implements the [TAP protocol](https://en.wikipedia.org/wiki/Test_Anything_Protocol) for test result output. If your test runner understands TAP, you may find it useful. [gtest-parallel](https://github.com/google/gtest-parallel) is a test runner that runs tests from your binary in parallel to provide significant speed-up. [GoogleTest Adapter](https://marketplace.visualstudio.com/items?itemName=DavidSchuldenfrei.gtest-adapter) is a VS Code extension allowing to view Google Tests in a tree view, and run/debug your tests. ## Requirements Google Test is designed to have fairly minimal requirements to build and use with your projects, but there are some. If you notice any problems on your platform, please notify [googletestframework@googlegroups.com](https://groups.google.com/forum/#!forum/googletestframework). Patches for fixing them are welcome! ### Build Requirements These are the base requirements to build and use Google Test from a source package: * [Bazel](https://bazel.build/) or [CMake](https://cmake.org/). NOTE: Bazel is the build system that googletest is using internally and tests against. CMake is community-supported. * a C++11-standard-compliant compiler ## Contributing change Please read the [`CONTRIBUTING.md`](CONTRIBUTING.md) for details on how to contribute to this project. Happy testing! LucenePlusPlus-rel_3.0.9/src/test/gtest/WORKSPACE000066400000000000000000000011301456444476200215000ustar00rootroot00000000000000workspace(name = "com_google_googletest") load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") # Abseil http_archive( name = "com_google_absl", urls = ["https://github.com/abseil/abseil-cpp/archive/master.zip"], strip_prefix = "abseil-cpp-master", ) http_archive( name = "rules_cc", strip_prefix = "rules_cc-master", urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"], ) http_archive( name = "rules_python", strip_prefix = "rules_python-master", urls = ["https://github.com/bazelbuild/rules_python/archive/master.zip"], ) LucenePlusPlus-rel_3.0.9/src/test/gtest/appveyor.yml000066400000000000000000000117451456444476200226240ustar00rootroot00000000000000version: '{build}' os: Visual Studio 2015 environment: matrix: - compiler: msvc-15-seh generator: "Visual Studio 15 2017" build_system: cmake APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 - compiler: msvc-15-seh generator: "Visual Studio 15 2017 Win64" build_system: cmake APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 enabled_on_pr: yes - compiler: msvc-15-seh build_system: bazel APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 enabled_on_pr: yes - compiler: msvc-14-seh build_system: cmake generator: "Visual Studio 14 2015" enabled_on_pr: yes - compiler: msvc-14-seh build_system: cmake generator: "Visual Studio 14 2015 Win64" - compiler: gcc-6.3.0-posix build_system: cmake generator: "MinGW Makefiles" cxx_path: 'C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin' enabled_on_pr: yes configuration: - Debug build: verbosity: minimal install: - ps: | Write-Output "Compiler: $env:compiler" Write-Output "Generator: $env:generator" Write-Output "Env:Configuation: $env:configuration" Write-Output "Env: $env" if (-not (Test-Path env:APPVEYOR_PULL_REQUEST_NUMBER)) { Write-Output "This is *NOT* a pull request build" } else { Write-Output "This is a pull request build" if (-not (Test-Path env:enabled_on_pr) -or $env:enabled_on_pr -ne "yes") { Write-Output "PR builds are *NOT* explicitly enabled" } } # install Bazel if ($env:build_system -eq "bazel") { appveyor DownloadFile https://github.com/bazelbuild/bazel/releases/download/0.28.1/bazel-0.28.1-windows-x86_64.exe -FileName bazel.exe } if ($env:build_system -eq "cmake") { # git bash conflicts with MinGW makefiles if ($env:generator -eq "MinGW Makefiles") { $env:path = $env:path.replace("C:\Program Files\Git\usr\bin;", "") if ($env:cxx_path -ne "") { $env:path += ";$env:cxx_path" } } } before_build: - ps: | $env:root=$env:APPVEYOR_BUILD_FOLDER Write-Output "env:root: $env:root" build_script: - ps: | # Only enable some builds for pull requests, the AppVeyor queue is too long. if ((Test-Path env:APPVEYOR_PULL_REQUEST_NUMBER) -And (-not (Test-Path env:enabled_on_pr) -or $env:enabled_on_pr -ne "yes")) { return } else { # special case - build with Bazel if ($env:build_system -eq "bazel") { & $env:root\bazel.exe build -c opt //:gtest_samples if ($LastExitCode -eq 0) { # bazel writes to StdErr and PowerShell interprets it as an error $host.SetShouldExit(0) } else { # a real error throw "Exec: $ErrorMessage" } return } } # by default build with CMake md _build -Force | Out-Null cd _build $conf = if ($env:generator -eq "MinGW Makefiles") {"-DCMAKE_BUILD_TYPE=$env:configuration"} else {"-DCMAKE_CONFIGURATION_TYPES=Debug;Release"} # Disable test for MinGW (gtest tests fail, gmock tests can not build) $gtest_build_tests = if ($env:generator -eq "MinGW Makefiles") {"-Dgtest_build_tests=OFF"} else {"-Dgtest_build_tests=ON"} $gmock_build_tests = if ($env:generator -eq "MinGW Makefiles") {"-Dgmock_build_tests=OFF"} else {"-Dgmock_build_tests=ON"} & cmake -G "$env:generator" $conf -Dgtest_build_samples=ON $gtest_build_tests $gmock_build_tests .. if ($LastExitCode -ne 0) { throw "Exec: $ErrorMessage" } $cmake_parallel = if ($env:generator -eq "MinGW Makefiles") {"-j2"} else {"/m"} & cmake --build . --config $env:configuration -- $cmake_parallel if ($LastExitCode -ne 0) { throw "Exec: $ErrorMessage" } skip_commits: files: - '**/*.md' test_script: - ps: | # Only enable some builds for pull requests, the AppVeyor queue is too long. if ((Test-Path env:APPVEYOR_PULL_REQUEST_NUMBER) -And (-not (Test-Path env:enabled_on_pr) -or $env:enabled_on_pr -ne "yes")) { return } if ($env:build_system -eq "bazel") { # special case - testing with Bazel & $env:root\bazel.exe test //:gtest_samples if ($LastExitCode -eq 0) { # bazel writes to StdErr and PowerShell interprets it as an error $host.SetShouldExit(0) } else { # a real error throw "Exec: $ErrorMessage" } } if ($env:build_system -eq "cmake") { # built with CMake - test with CTest if ($env:generator -eq "MinGW Makefiles") { return # No test available for MinGW } & ctest -C $env:configuration --timeout 600 --output-on-failure if ($LastExitCode -ne 0) { throw "Exec: $ErrorMessage" } } artifacts: - path: '_build/CMakeFiles/*.log' name: logs - path: '_build/Testing/**/*.xml' name: test_results - path: 'bazel-testlogs/**/test.log' name: test_logs - path: 'bazel-testlogs/**/test.xml' name: test_results LucenePlusPlus-rel_3.0.9/src/test/gtest/ci/000077500000000000000000000000001456444476200206175ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/gtest/ci/build-linux-bazel.sh000077500000000000000000000032301456444476200245030ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright 2017 Google Inc. # All Rights Reserved. # # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following disclaimer # in the documentation and/or other materials provided with the # distribution. # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. set -e bazel version bazel build --curses=no //...:all bazel test --curses=no //...:all bazel test --curses=no //...:all --define absl=1 LucenePlusPlus-rel_3.0.9/src/test/gtest/ci/build-platformio.sh000066400000000000000000000000471456444476200244250ustar00rootroot00000000000000# run PlatformIO builds platformio run LucenePlusPlus-rel_3.0.9/src/test/gtest/ci/env-linux.sh000077500000000000000000000036021456444476200231040ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright 2017 Google Inc. # All Rights Reserved. # # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following disclaimer # in the documentation and/or other materials provided with the # distribution. # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # This file should be sourced, and not executed as a standalone script. # # TODO() - we can check if this is being sourced using $BASH_VERSION and $BASH_SOURCE[0] != ${0}. if [ "${TRAVIS_OS_NAME}" = "linux" ]; then if [ "$CXX" = "g++" ]; then export CXX="g++-4.9" CC="gcc-4.9"; fi if [ "$CXX" = "clang++" ]; then export CXX="clang++-3.9" CC="clang-3.9"; fi fi LucenePlusPlus-rel_3.0.9/src/test/gtest/ci/env-osx.sh000077500000000000000000000041161456444476200225570ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright 2017 Google Inc. # All Rights Reserved. # # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following disclaimer # in the documentation and/or other materials provided with the # distribution. # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # This file should be sourced, and not executed as a standalone script. # # TODO() - we can check if this is being sourced using $BASH_VERSION and $BASH_SOURCE[0] != ${0}. # if [ "${TRAVIS_OS_NAME}" = "osx" ]; then if [ "$CXX" = "clang++" ]; then # $PATH needs to be adjusted because the llvm tap doesn't install the # package to /usr/local/bin, etc, like the gcc tap does. # See: https://github.com/Homebrew/legacy-homebrew/issues/29733 clang_version=3.9 export PATH="/usr/local/opt/llvm@${clang_version}/bin:$PATH"; fi fi LucenePlusPlus-rel_3.0.9/src/test/gtest/ci/get-nprocessors.sh000077500000000000000000000042541456444476200243200ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright 2017 Google Inc. # All Rights Reserved. # # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following disclaimer # in the documentation and/or other materials provided with the # distribution. # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # This file is typically sourced by another script. # if possible, ask for the precise number of processors, # otherwise take 2 processors as reasonable default; see # https://docs.travis-ci.com/user/speeding-up-the-build/#Makefile-optimization if [ -x /usr/bin/getconf ]; then NPROCESSORS=$(/usr/bin/getconf _NPROCESSORS_ONLN) else NPROCESSORS=2 fi # as of 2017-09-04 Travis CI reports 32 processors, but GCC build # crashes if parallelized too much (maybe memory consumption problem), # so limit to 4 processors for the time being. if [ $NPROCESSORS -gt 4 ] ; then echo "$0:Note: Limiting processors to use by make from $NPROCESSORS to 4." NPROCESSORS=4 fi LucenePlusPlus-rel_3.0.9/src/test/gtest/ci/install-linux.sh000077500000000000000000000042651456444476200237700ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright 2017 Google Inc. # All Rights Reserved. # # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following disclaimer # in the documentation and/or other materials provided with the # distribution. # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. set -eu if [ "${TRAVIS_OS_NAME}" != linux ]; then echo "Not a Linux build; skipping installation" exit 0 fi if [ "${TRAVIS_SUDO}" = "true" ]; then echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | \ sudo tee /etc/apt/sources.list.d/bazel.list curl https://bazel.build/bazel-release.pub.gpg | sudo apt-key add - sudo apt-get update && sudo apt-get install -y bazel gcc-4.9 g++-4.9 clang-3.9 elif [ "${CXX}" = "clang++" ]; then # Use ccache, assuming $HOME/bin is in the path, which is true in the Travis build environment. ln -sf /usr/bin/ccache $HOME/bin/${CXX}; ln -sf /usr/bin/ccache $HOME/bin/${CC}; fi LucenePlusPlus-rel_3.0.9/src/test/gtest/ci/install-osx.sh000077500000000000000000000032541456444476200234370ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright 2017 Google Inc. # All Rights Reserved. # # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following disclaimer # in the documentation and/or other materials provided with the # distribution. # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. set -eu if [ "${TRAVIS_OS_NAME}" != "osx" ]; then echo "Not a macOS build; skipping installation" exit 0 fi brew update brew install ccache gcc@4.9 LucenePlusPlus-rel_3.0.9/src/test/gtest/ci/install-platformio.sh000066400000000000000000000001331456444476200247700ustar00rootroot00000000000000# install PlatformIO sudo pip install -U platformio # update PlatformIO platformio update LucenePlusPlus-rel_3.0.9/src/test/gtest/ci/log-config.sh000077500000000000000000000040551456444476200232060ustar00rootroot00000000000000#!/usr/bin/env bash # Copyright 2017 Google Inc. # All Rights Reserved. # # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following disclaimer # in the documentation and/or other materials provided with the # distribution. # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. set -e # ccache on OS X needs installation first # reset ccache statistics ccache --zero-stats echo PATH=${PATH} echo "Compiler configuration:" echo CXX=${CXX} echo CC=${CC} echo CXXFLAGS=${CXXFLAGS} echo "C++ compiler version:" ${CXX} --version || echo "${CXX} does not seem to support the --version flag" ${CXX} -v || echo "${CXX} does not seem to support the -v flag" echo "C compiler version:" ${CC} --version || echo "${CXX} does not seem to support the --version flag" ${CC} -v || echo "${CXX} does not seem to support the -v flag" LucenePlusPlus-rel_3.0.9/src/test/gtest/ci/travis.sh000077500000000000000000000024361456444476200224730ustar00rootroot00000000000000#!/usr/bin/env sh set -evx . ci/get-nprocessors.sh # if possible, ask for the precise number of processors, # otherwise take 2 processors as reasonable default; see # https://docs.travis-ci.com/user/speeding-up-the-build/#Makefile-optimization if [ -x /usr/bin/getconf ]; then NPROCESSORS=$(/usr/bin/getconf _NPROCESSORS_ONLN) else NPROCESSORS=2 fi # as of 2017-09-04 Travis CI reports 32 processors, but GCC build # crashes if parallelized too much (maybe memory consumption problem), # so limit to 4 processors for the time being. if [ $NPROCESSORS -gt 4 ] ; then echo "$0:Note: Limiting processors to use by make from $NPROCESSORS to 4." NPROCESSORS=4 fi # Tell make to use the processors. No preceding '-' required. MAKEFLAGS="j${NPROCESSORS}" export MAKEFLAGS env | sort # Set default values to OFF for these variables if not specified. : "${NO_EXCEPTION:=OFF}" : "${NO_RTTI:=OFF}" : "${COMPILER_IS_GNUCXX:=OFF}" mkdir build || true cd build cmake -Dgtest_build_samples=ON \ -Dgtest_build_tests=ON \ -Dgmock_build_tests=ON \ -Dcxx_no_exception=$NO_EXCEPTION \ -Dcxx_no_rtti=$NO_RTTI \ -DCMAKE_COMPILER_IS_GNUCXX=$COMPILER_IS_GNUCXX \ -DCMAKE_CXX_FLAGS=$CXX_FLAGS \ -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ .. make CTEST_OUTPUT_ON_FAILURE=1 make test LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/000077500000000000000000000000001456444476200223525ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/CMakeLists.txt000066400000000000000000000207331456444476200251170ustar00rootroot00000000000000######################################################################## # Note: CMake support is community-based. The maintainers do not use CMake # internally. # # CMake build script for Google Mock. # # To run the tests for Google Mock itself on Linux, use 'make test' or # ctest. You can select which tests to run using 'ctest -R regex'. # For more options, run 'ctest --help'. option(gmock_build_tests "Build all of Google Mock's own tests." OFF) # A directory to find Google Test sources. if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/gtest/CMakeLists.txt") set(gtest_dir gtest) else() set(gtest_dir ../googletest) endif() # Defines pre_project_set_up_hermetic_build() and set_up_hermetic_build(). include("${gtest_dir}/cmake/hermetic_build.cmake" OPTIONAL) if (COMMAND pre_project_set_up_hermetic_build) # Google Test also calls hermetic setup functions from add_subdirectory, # although its changes will not affect things at the current scope. pre_project_set_up_hermetic_build() endif() ######################################################################## # # Project-wide settings # Name of the project. # # CMake files in this project can refer to the root source directory # as ${gmock_SOURCE_DIR} and to the root binary directory as # ${gmock_BINARY_DIR}. # Language "C" is required for find_package(Threads). if (CMAKE_VERSION VERSION_LESS 3.0) project(gmock CXX C) else() cmake_policy(SET CMP0048 NEW) project(gmock VERSION ${GOOGLETEST_VERSION} LANGUAGES CXX C) endif() cmake_minimum_required(VERSION 2.6.4) if (COMMAND set_up_hermetic_build) set_up_hermetic_build() endif() # Instructs CMake to process Google Test's CMakeLists.txt and add its # targets to the current scope. We are placing Google Test's binary # directory in a subdirectory of our own as VC compilation may break # if they are the same (the default). add_subdirectory("${gtest_dir}" "${gmock_BINARY_DIR}/${gtest_dir}") # These commands only run if this is the main project if(CMAKE_PROJECT_NAME STREQUAL "gmock" OR CMAKE_PROJECT_NAME STREQUAL "googletest-distribution") # BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to # make it prominent in the GUI. option(BUILD_SHARED_LIBS "Build shared libraries (DLLs)." OFF) else() mark_as_advanced(gmock_build_tests) endif() # Although Google Test's CMakeLists.txt calls this function, the # changes there don't affect the current scope. Therefore we have to # call it again here. config_compiler_and_linker() # from ${gtest_dir}/cmake/internal_utils.cmake # Adds Google Mock's and Google Test's header directories to the search path. set(gmock_build_include_dirs "${gmock_SOURCE_DIR}/include" "${gmock_SOURCE_DIR}" "${gtest_SOURCE_DIR}/include" # This directory is needed to build directly from Google Test sources. "${gtest_SOURCE_DIR}") include_directories(${gmock_build_include_dirs}) ######################################################################## # # Defines the gmock & gmock_main libraries. User tests should link # with one of them. # Google Mock libraries. We build them using more strict warnings than what # are used for other targets, to ensure that Google Mock can be compiled by # a user aggressive about warnings. if (MSVC) cxx_library(gmock "${cxx_strict}" "${gtest_dir}/src/gtest-all.cc" src/gmock-all.cc) cxx_library(gmock_main "${cxx_strict}" "${gtest_dir}/src/gtest-all.cc" src/gmock-all.cc src/gmock_main.cc) else() cxx_library(gmock "${cxx_strict}" src/gmock-all.cc) target_link_libraries(gmock PUBLIC gtest) cxx_library(gmock_main "${cxx_strict}" src/gmock_main.cc) target_link_libraries(gmock_main PUBLIC gmock) endif() # If the CMake version supports it, attach header directory information # to the targets for when we are part of a parent build (ie being pulled # in via add_subdirectory() rather than being a standalone build). if (DEFINED CMAKE_VERSION AND NOT "${CMAKE_VERSION}" VERSION_LESS "2.8.11") target_include_directories(gmock SYSTEM INTERFACE "$" "$/${CMAKE_INSTALL_INCLUDEDIR}>") target_include_directories(gmock_main SYSTEM INTERFACE "$" "$/${CMAKE_INSTALL_INCLUDEDIR}>") endif() ######################################################################## # # Install rules install_project(gmock gmock_main) ######################################################################## # # Google Mock's own tests. # # You can skip this section if you aren't interested in testing # Google Mock itself. # # The tests are not built by default. To build them, set the # gmock_build_tests option to ON. You can do it by running ccmake # or specifying the -Dgmock_build_tests=ON flag when running cmake. if (gmock_build_tests) # This must be set in the root directory for the tests to be run by # 'make test' or ctest. enable_testing() if (WIN32) file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/$/RunTest.ps1" CONTENT "$project_bin = \"${CMAKE_BINARY_DIR}/bin/$\" $env:Path = \"$project_bin;$env:Path\" & $args") elseif (MINGW OR CYGWIN) file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/RunTest.ps1" CONTENT "$project_bin = (cygpath --windows ${CMAKE_BINARY_DIR}/bin) $env:Path = \"$project_bin;$env:Path\" & $args") endif() if (MINGW OR CYGWIN) if (CMAKE_VERSION VERSION_LESS "2.8.12") add_compile_options("-Wa,-mbig-obj") else() add_definitions("-Wa,-mbig-obj") endif() endif() ############################################################ # C++ tests built with standard compiler flags. cxx_test(gmock-actions_test gmock_main) cxx_test(gmock-cardinalities_test gmock_main) cxx_test(gmock_ex_test gmock_main) cxx_test(gmock-function-mocker_test gmock_main) cxx_test(gmock-generated-actions_test gmock_main) cxx_test(gmock-generated-function-mockers_test gmock_main) cxx_test(gmock-generated-matchers_test gmock_main) cxx_test(gmock-internal-utils_test gmock_main) cxx_test(gmock-matchers_test gmock_main) cxx_test(gmock-more-actions_test gmock_main) cxx_test(gmock-nice-strict_test gmock_main) cxx_test(gmock-port_test gmock_main) cxx_test(gmock-spec-builders_test gmock_main) cxx_test(gmock_link_test gmock_main test/gmock_link2_test.cc) cxx_test(gmock_test gmock_main) if (DEFINED GTEST_HAS_PTHREAD) cxx_test(gmock_stress_test gmock) endif() # gmock_all_test is commented to save time building and running tests. # Uncomment if necessary. # cxx_test(gmock_all_test gmock_main) ############################################################ # C++ tests built with non-standard compiler flags. if (MSVC) cxx_library(gmock_main_no_exception "${cxx_no_exception}" "${gtest_dir}/src/gtest-all.cc" src/gmock-all.cc src/gmock_main.cc) cxx_library(gmock_main_no_rtti "${cxx_no_rtti}" "${gtest_dir}/src/gtest-all.cc" src/gmock-all.cc src/gmock_main.cc) else() cxx_library(gmock_main_no_exception "${cxx_no_exception}" src/gmock_main.cc) target_link_libraries(gmock_main_no_exception PUBLIC gmock) cxx_library(gmock_main_no_rtti "${cxx_no_rtti}" src/gmock_main.cc) target_link_libraries(gmock_main_no_rtti PUBLIC gmock) endif() cxx_test_with_flags(gmock-more-actions_no_exception_test "${cxx_no_exception}" gmock_main_no_exception test/gmock-more-actions_test.cc) cxx_test_with_flags(gmock_no_rtti_test "${cxx_no_rtti}" gmock_main_no_rtti test/gmock-spec-builders_test.cc) cxx_shared_library(shared_gmock_main "${cxx_default}" "${gtest_dir}/src/gtest-all.cc" src/gmock-all.cc src/gmock_main.cc) # Tests that a binary can be built with Google Mock as a shared library. On # some system configurations, it may not possible to run the binary without # knowing more details about the system configurations. We do not try to run # this binary. To get a more robust shared library coverage, configure with # -DBUILD_SHARED_LIBS=ON. cxx_executable_with_flags(shared_gmock_test_ "${cxx_default}" shared_gmock_main test/gmock-spec-builders_test.cc) set_target_properties(shared_gmock_test_ PROPERTIES COMPILE_DEFINITIONS "GTEST_LINKED_AS_SHARED_LIBRARY=1") ############################################################ # Python tests. cxx_executable(gmock_leak_test_ test gmock_main) py_test(gmock_leak_test) cxx_executable(gmock_output_test_ test gmock) py_test(gmock_output_test) endif() LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/CONTRIBUTORS000066400000000000000000000025311456444476200242330ustar00rootroot00000000000000# This file contains a list of people who've made non-trivial # contribution to the Google C++ Mocking Framework project. People # who commit code to the project are encouraged to add their names # here. Please keep the list sorted by first names. Benoit Sigoure Bogdan Piloca Chandler Carruth Dave MacLachlan David Anderson Dean Sturtevant Gene Volovich Hal Burch Jeffrey Yasskin Jim Keller Joe Walnes Jon Wray Keir Mierle Keith Ray Kostya Serebryany Lev Makhlis Manuel Klimek Mario Tanev Mark Paskin Markus Heule Matthew Simmons Mike Bland Neal Norwitz Nermin Ozkiranartli Owen Carlsen Paneendra Ba Paul Menage Piotr Kaminski Russ Rufer Sverre Sundsdal Takeshi Yoshino Vadim Berman Vlad Losev Wolfgang Klier Zhanyong Wan LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/LICENSE000066400000000000000000000027031456444476200233610ustar00rootroot00000000000000Copyright 2008, Google Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/README.md000066400000000000000000000030671456444476200236370ustar00rootroot00000000000000# Googletest Mocking (gMock) Framework ### Overview Google's framework for writing and using C++ mock classes. It can help you derive better designs of your system and write better tests. It is inspired by: * [jMock](http://www.jmock.org/), * [EasyMock](http://www.easymock.org/), and * [Hamcrest](http://code.google.com/p/hamcrest/), and designed with C++'s specifics in mind. gMock: - provides a declarative syntax for defining mocks, - can define partial (hybrid) mocks, which are a cross of real and mock objects, - handles functions of arbitrary types and overloaded functions, - comes with a rich set of matchers for validating function arguments, - uses an intuitive syntax for controlling the behavior of a mock, - does automatic verification of expectations (no record-and-replay needed), - allows arbitrary (partial) ordering constraints on function calls to be expressed, - lets a user extend it by defining new matchers and actions. - does not use exceptions, and - is easy to learn and use. Details and examples can be found here: * [gMock for Dummies](docs/for_dummies.md) * [Legacy gMock FAQ](docs/gmock_faq.md) * [gMock Cookbook](docs/cook_book.md) * [gMock Cheat Sheet](docs/cheat_sheet.md) Please note that code under scripts/generator/ is from the [cppclean project](http://code.google.com/p/cppclean/) and under the Apache License, which is different from Google Mock's license. Google Mock is a part of [Google Test C++ testing framework](http://github.com/google/googletest/) and a subject to the same requirements. LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/cmake/000077500000000000000000000000001456444476200234325ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/cmake/gmock.pc.in000066400000000000000000000006041456444476200254630ustar00rootroot00000000000000prefix=${pcfiledir}/../.. libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ Name: gmock Description: GoogleMock (without main() function) Version: @PROJECT_VERSION@ URL: https://github.com/google/googletest Requires: gtest Libs: -L${libdir} -lgmock @CMAKE_THREAD_LIBS_INIT@ Cflags: -I${includedir} @GTEST_HAS_PTHREAD_MACRO@ @CMAKE_THREAD_LIBS_INIT@ LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/cmake/gmock_main.pc.in000066400000000000000000000006131456444476200264670ustar00rootroot00000000000000prefix=${pcfiledir}/../.. libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ Name: gmock_main Description: GoogleMock (with main() function) Version: @PROJECT_VERSION@ URL: https://github.com/google/googletest Requires: gmock Libs: -L${libdir} -lgmock_main @CMAKE_THREAD_LIBS_INIT@ Cflags: -I${includedir} @GTEST_HAS_PTHREAD_MACRO@ @CMAKE_THREAD_LIBS_INIT@ LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/docs/000077500000000000000000000000001456444476200233025ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/docs/cheat_sheet.md000066400000000000000000001071451456444476200261100ustar00rootroot00000000000000## gMock Cheat Sheet ### Defining a Mock Class #### Mocking a Normal Class {#MockClass} Given ```cpp class Foo { ... virtual ~Foo(); virtual int GetSize() const = 0; virtual string Describe(const char* name) = 0; virtual string Describe(int type) = 0; virtual bool Process(Bar elem, int count) = 0; }; ``` (note that `~Foo()` **must** be virtual) we can define its mock as ```cpp #include "gmock/gmock.h" class MockFoo : public Foo { ... MOCK_METHOD(int, GetSize, (), (const, override)); MOCK_METHOD(string, Describe, (const char* name), (override)); MOCK_METHOD(string, Describe, (int type), (override)); MOCK_METHOD(bool, Process, (Bar elem, int count), (override)); }; ``` To create a "nice" mock, which ignores all uninteresting calls, a "naggy" mock, which warns on all uninteresting calls, or a "strict" mock, which treats them as failures: ```cpp using ::testing::NiceMock; using ::testing::NaggyMock; using ::testing::StrictMock; NiceMock nice_foo; // The type is a subclass of MockFoo. NaggyMock naggy_foo; // The type is a subclass of MockFoo. StrictMock strict_foo; // The type is a subclass of MockFoo. ``` **Note:** A mock object is currently naggy by default. We may make it nice by default in the future. #### Mocking a Class Template {#MockTemplate} Class templates can be mocked just like any class. To mock ```cpp template class StackInterface { ... virtual ~StackInterface(); virtual int GetSize() const = 0; virtual void Push(const Elem& x) = 0; }; ``` (note that all member functions that are mocked, including `~StackInterface()` **must** be virtual). ```cpp template class MockStack : public StackInterface { ... MOCK_METHOD(int, GetSize, (), (const, override)); MOCK_METHOD(void, Push, (const Elem& x), (override)); }; ``` #### Specifying Calling Conventions for Mock Functions If your mock function doesn't use the default calling convention, you can specify it by adding `Calltype(convention)` to `MOCK_METHOD`'s 4th parameter. For example, ```cpp MOCK_METHOD(bool, Foo, (int n), (Calltype(STDMETHODCALLTYPE))); MOCK_METHOD(int, Bar, (double x, double y), (const, Calltype(STDMETHODCALLTYPE))); ``` where `STDMETHODCALLTYPE` is defined by `` on Windows. ### Using Mocks in Tests {#UsingMocks} The typical work flow is: 1. Import the gMock names you need to use. All gMock symbols are in the `testing` namespace unless they are macros or otherwise noted. 2. Create the mock objects. 3. Optionally, set the default actions of the mock objects. 4. Set your expectations on the mock objects (How will they be called? What will they do?). 5. Exercise code that uses the mock objects; if necessary, check the result using googletest assertions. 6. When a mock object is destructed, gMock automatically verifies that all expectations on it have been satisfied. Here's an example: ```cpp using ::testing::Return; // #1 TEST(BarTest, DoesThis) { MockFoo foo; // #2 ON_CALL(foo, GetSize()) // #3 .WillByDefault(Return(1)); // ... other default actions ... EXPECT_CALL(foo, Describe(5)) // #4 .Times(3) .WillRepeatedly(Return("Category 5")); // ... other expectations ... EXPECT_EQ("good", MyProductionFunction(&foo)); // #5 } // #6 ``` ### Setting Default Actions {#OnCall} gMock has a **built-in default action** for any function that returns `void`, `bool`, a numeric value, or a pointer. In C++11, it will additionally returns the default-constructed value, if one exists for the given type. To customize the default action for functions with return type *`T`*: ```cpp using ::testing::DefaultValue; // Sets the default value to be returned. T must be CopyConstructible. DefaultValue::Set(value); // Sets a factory. Will be invoked on demand. T must be MoveConstructible. // T MakeT(); DefaultValue::SetFactory(&MakeT); // ... use the mocks ... // Resets the default value. DefaultValue::Clear(); ``` Example usage: ```cpp // Sets the default action for return type std::unique_ptr to // creating a new Buzz every time. DefaultValue>::SetFactory( [] { return MakeUnique(AccessLevel::kInternal); }); // When this fires, the default action of MakeBuzz() will run, which // will return a new Buzz object. EXPECT_CALL(mock_buzzer_, MakeBuzz("hello")).Times(AnyNumber()); auto buzz1 = mock_buzzer_.MakeBuzz("hello"); auto buzz2 = mock_buzzer_.MakeBuzz("hello"); EXPECT_NE(nullptr, buzz1); EXPECT_NE(nullptr, buzz2); EXPECT_NE(buzz1, buzz2); // Resets the default action for return type std::unique_ptr, // to avoid interfere with other tests. DefaultValue>::Clear(); ``` To customize the default action for a particular method of a specific mock object, use `ON_CALL()`. `ON_CALL()` has a similar syntax to `EXPECT_CALL()`, but it is used for setting default behaviors (when you do not require that the mock method is called). See [here](cook_book.md#UseOnCall) for a more detailed discussion. ```cpp ON_CALL(mock-object, method(matchers)) .With(multi-argument-matcher) ? .WillByDefault(action); ``` ### Setting Expectations {#ExpectCall} `EXPECT_CALL()` sets **expectations** on a mock method (How will it be called? What will it do?): ```cpp EXPECT_CALL(mock-object, method (matchers)?) .With(multi-argument-matcher) ? .Times(cardinality) ? .InSequence(sequences) * .After(expectations) * .WillOnce(action) * .WillRepeatedly(action) ? .RetiresOnSaturation(); ? ``` For each item above, `?` means it can be used at most once, while `*` means it can be used any number of times. In order to pass, `EXPECT_CALL` must be used before the calls are actually made. The `(matchers)` is a comma-separated list of matchers that correspond to each of the arguments of `method`, and sets the expectation only for calls of `method` that matches all of the matchers. If `(matchers)` is omitted, the expectation is the same as if the matchers were set to anything matchers (for example, `(_, _, _, _)` for a four-arg method). If `Times()` is omitted, the cardinality is assumed to be: * `Times(1)` when there is neither `WillOnce()` nor `WillRepeatedly()`; * `Times(n)` when there are `n` `WillOnce()`s but no `WillRepeatedly()`, where `n` >= 1; or * `Times(AtLeast(n))` when there are `n` `WillOnce()`s and a `WillRepeatedly()`, where `n` >= 0. A method with no `EXPECT_CALL()` is free to be invoked *any number of times*, and the default action will be taken each time. ### Matchers {#MatcherList} A **matcher** matches a *single* argument. You can use it inside `ON_CALL()` or `EXPECT_CALL()`, or use it to validate a value directly using two macros: | Macro | Description | | :----------------------------------- | :------------------------------------ | | `EXPECT_THAT(actual_value, matcher)` | Asserts that `actual_value` matches `matcher`. | | `ASSERT_THAT(actual_value, matcher)` | The same as `EXPECT_THAT(actual_value, matcher)`, except that it generates a **fatal** failure. | Built-in matchers (where `argument` is the function argument, e.g. `actual_value` in the example above, or when used in the context of `EXPECT_CALL(mock_object, method(matchers))`, the arguments of `method`) are divided into several categories: #### Wildcard Matcher | Description :-------------------------- | :----------------------------------------------- `_` | `argument` can be any value of the correct type. `A()` or `An()` | `argument` can be any value of type `type`. #### Generic Comparison | Matcher | Description | | :--------------------- | :-------------------------------------------------- | | `Eq(value)` or `value` | `argument == value` | | `Ge(value)` | `argument >= value` | | `Gt(value)` | `argument > value` | | `Le(value)` | `argument <= value` | | `Lt(value)` | `argument < value` | | `Ne(value)` | `argument != value` | | `IsFalse()` | `argument` evaluates to `false` in a Boolean context. | | `IsTrue()` | `argument` evaluates to `true` in a Boolean context. | | `IsNull()` | `argument` is a `NULL` pointer (raw or smart). | | `NotNull()` | `argument` is a non-null pointer (raw or smart). | | `Optional(m)` | `argument` is `optional<>` that contains a value matching `m`. | | `VariantWith(m)` | `argument` is `variant<>` that holds the alternative of type T with a value matching `m`. | | `Ref(variable)` | `argument` is a reference to `variable`. | | `TypedEq(value)` | `argument` has type `type` and is equal to `value`. You may need to use this instead of `Eq(value)` when the mock function is overloaded. | Except `Ref()`, these matchers make a *copy* of `value` in case it's modified or destructed later. If the compiler complains that `value` doesn't have a public copy constructor, try wrap it in `ByRef()`, e.g. `Eq(ByRef(non_copyable_value))`. If you do that, make sure `non_copyable_value` is not changed afterwards, or the meaning of your matcher will be changed. #### Floating-Point Matchers {#FpMatchers} | Matcher | Description | | :------------------------------- | :--------------------------------- | | `DoubleEq(a_double)` | `argument` is a `double` value approximately equal to `a_double`, treating two NaNs as unequal. | | `FloatEq(a_float)` | `argument` is a `float` value approximately equal to `a_float`, treating two NaNs as unequal. | | `NanSensitiveDoubleEq(a_double)` | `argument` is a `double` value approximately equal to `a_double`, treating two NaNs as equal. | | `NanSensitiveFloatEq(a_float)` | `argument` is a `float` value approximately equal to `a_float`, treating two NaNs as equal. | The above matchers use ULP-based comparison (the same as used in googletest). They automatically pick a reasonable error bound based on the absolute value of the expected value. `DoubleEq()` and `FloatEq()` conform to the IEEE standard, which requires comparing two NaNs for equality to return false. The `NanSensitive*` version instead treats two NaNs as equal, which is often what a user wants. | Matcher | Description | | :------------------------------------------------ | :----------------------- | | `DoubleNear(a_double, max_abs_error)` | `argument` is a `double` value close to `a_double` (absolute error <= `max_abs_error`), treating two NaNs as unequal. | | `FloatNear(a_float, max_abs_error)` | `argument` is a `float` value close to `a_float` (absolute error <= `max_abs_error`), treating two NaNs as unequal. | | `NanSensitiveDoubleNear(a_double, max_abs_error)` | `argument` is a `double` value close to `a_double` (absolute error <= `max_abs_error`), treating two NaNs as equal. | | `NanSensitiveFloatNear(a_float, max_abs_error)` | `argument` is a `float` value close to `a_float` (absolute error <= `max_abs_error`), treating two NaNs as equal. | #### String Matchers The `argument` can be either a C string or a C++ string object: | Matcher | Description | | :---------------------- | :------------------------------------------------- | | `ContainsRegex(string)` | `argument` matches the given regular expression. | | `EndsWith(suffix)` | `argument` ends with string `suffix`. | | `HasSubstr(string)` | `argument` contains `string` as a sub-string. | | `MatchesRegex(string)` | `argument` matches the given regular expression with the match starting at the first character and ending at the last character. | | `StartsWith(prefix)` | `argument` starts with string `prefix`. | | `StrCaseEq(string)` | `argument` is equal to `string`, ignoring case. | | `StrCaseNe(string)` | `argument` is not equal to `string`, ignoring case. | | `StrEq(string)` | `argument` is equal to `string`. | | `StrNe(string)` | `argument` is not equal to `string`. | `ContainsRegex()` and `MatchesRegex()` take ownership of the `RE` object. They use the regular expression syntax defined [here](../../googletest/docs/advanced.md#regular-expression-syntax). `StrCaseEq()`, `StrCaseNe()`, `StrEq()`, and `StrNe()` work for wide strings as well. #### Container Matchers Most STL-style containers support `==`, so you can use `Eq(expected_container)` or simply `expected_container` to match a container exactly. If you want to write the elements in-line, match them more flexibly, or get more informative messages, you can use: | Matcher | Description | | :---------------------------------------- | :------------------------------- | | `BeginEndDistanceIs(m)` | `argument` is a container whose `begin()` and `end()` iterators are separated by a number of increments matching `m`. E.g. `BeginEndDistanceIs(2)` or `BeginEndDistanceIs(Lt(2))`. For containers that define a `size()` method, `SizeIs(m)` may be more efficient. | | `ContainerEq(container)` | The same as `Eq(container)` except that the failure message also includes which elements are in one container but not the other. | | `Contains(e)` | `argument` contains an element that matches `e`, which can be either a value or a matcher. | | `Each(e)` | `argument` is a container where *every* element matches `e`, which can be either a value or a matcher. | | `ElementsAre(e0, e1, ..., en)` | `argument` has `n + 1` elements, where the *i*-th element matches `ei`, which can be a value or a matcher. | | `ElementsAreArray({e0, e1, ..., en})`, `ElementsAreArray(a_container)`, `ElementsAreArray(begin, end)`, `ElementsAreArray(array)`, or `ElementsAreArray(array, count)` | The same as `ElementsAre()` except that the expected element values/matchers come from an initializer list, STL-style container, iterator range, or C-style array. | | `IsEmpty()` | `argument` is an empty container (`container.empty()`). | | `IsSubsetOf({e0, e1, ..., en})`, `IsSubsetOf(a_container)`, `IsSubsetOf(begin, end)`, `IsSubsetOf(array)`, or `IsSubsetOf(array, count)` | `argument` matches `UnorderedElementsAre(x0, x1, ..., xk)` for some subset `{x0, x1, ..., xk}` of the expected matchers. | | `IsSupersetOf({e0, e1, ..., en})`, `IsSupersetOf(a_container)`, `IsSupersetOf(begin, end)`, `IsSupersetOf(array)`, or `IsSupersetOf(array, count)` | Some subset of `argument` matches `UnorderedElementsAre(`expected matchers`)`. | | `Pointwise(m, container)`, `Pointwise(m, {e0, e1, ..., en})` | `argument` contains the same number of elements as in `container`, and for all i, (the i-th element in `argument`, the i-th element in `container`) match `m`, which is a matcher on 2-tuples. E.g. `Pointwise(Le(), upper_bounds)` verifies that each element in `argument` doesn't exceed the corresponding element in `upper_bounds`. See more detail below. | | `SizeIs(m)` | `argument` is a container whose size matches `m`. E.g. `SizeIs(2)` or `SizeIs(Lt(2))`. | | `UnorderedElementsAre(e0, e1, ..., en)` | `argument` has `n + 1` elements, and under *some* permutation of the elements, each element matches an `ei` (for a different `i`), which can be a value or a matcher. | | `UnorderedElementsAreArray({e0, e1, ..., en})`, `UnorderedElementsAreArray(a_container)`, `UnorderedElementsAreArray(begin, end)`, `UnorderedElementsAreArray(array)`, or `UnorderedElementsAreArray(array, count)` | The same as `UnorderedElementsAre()` except that the expected element values/matchers come from an initializer list, STL-style container, iterator range, or C-style array. | | `UnorderedPointwise(m, container)`, `UnorderedPointwise(m, {e0, e1, ..., en})` | Like `Pointwise(m, container)`, but ignores the order of elements. | | `WhenSorted(m)` | When `argument` is sorted using the `<` operator, it matches container matcher `m`. E.g. `WhenSorted(ElementsAre(1, 2, 3))` verifies that `argument` contains elements 1, 2, and 3, ignoring order. | | `WhenSortedBy(comparator, m)` | The same as `WhenSorted(m)`, except that the given comparator instead of `<` is used to sort `argument`. E.g. `WhenSortedBy(std::greater(), ElementsAre(3, 2, 1))`. | **Notes:** * These matchers can also match: 1. a native array passed by reference (e.g. in `Foo(const int (&a)[5])`), and 2. an array passed as a pointer and a count (e.g. in `Bar(const T* buffer, int len)` -- see [Multi-argument Matchers](#MultiArgMatchers)). * The array being matched may be multi-dimensional (i.e. its elements can be arrays). * `m` in `Pointwise(m, ...)` should be a matcher for `::std::tuple` where `T` and `U` are the element type of the actual container and the expected container, respectively. For example, to compare two `Foo` containers where `Foo` doesn't support `operator==`, one might write: ```cpp using ::std::get; MATCHER(FooEq, "") { return std::get<0>(arg).Equals(std::get<1>(arg)); } ... EXPECT_THAT(actual_foos, Pointwise(FooEq(), expected_foos)); ``` #### Member Matchers | Matcher | Description | | :------------------------------ | :----------------------------------------- | | `Field(&class::field, m)` | `argument.field` (or `argument->field` when `argument` is a plain pointer) matches matcher `m`, where `argument` is an object of type _class_. | | `Key(e)` | `argument.first` matches `e`, which can be either a value or a matcher. E.g. `Contains(Key(Le(5)))` can verify that a `map` contains a key `<= 5`. | | `Pair(m1, m2)` | `argument` is an `std::pair` whose `first` field matches `m1` and `second` field matches `m2`. | | `Property(&class::property, m)` | `argument.property()` (or `argument->property()` when `argument` is a plain pointer) matches matcher `m`, where `argument` is an object of type _class_. | #### Matching the Result of a Function, Functor, or Callback | Matcher | Description | | :--------------- | :------------------------------------------------ | | `ResultOf(f, m)` | `f(argument)` matches matcher `m`, where `f` is a function or functor. | #### Pointer Matchers | Matcher | Description | | :------------------------ | :---------------------------------------------- | | `Pointee(m)` | `argument` (either a smart pointer or a raw pointer) points to a value that matches matcher `m`. | | `WhenDynamicCastTo(m)` | when `argument` is passed through `dynamic_cast()`, it matches matcher `m`. | #### Multi-argument Matchers {#MultiArgMatchers} Technically, all matchers match a *single* value. A "multi-argument" matcher is just one that matches a *tuple*. The following matchers can be used to match a tuple `(x, y)`: Matcher | Description :------ | :---------- `Eq()` | `x == y` `Ge()` | `x >= y` `Gt()` | `x > y` `Le()` | `x <= y` `Lt()` | `x < y` `Ne()` | `x != y` You can use the following selectors to pick a subset of the arguments (or reorder them) to participate in the matching: | Matcher | Description | | :------------------------- | :---------------------------------------------- | | `AllArgs(m)` | Equivalent to `m`. Useful as syntactic sugar in `.With(AllArgs(m))`. | | `Args(m)` | The tuple of the `k` selected (using 0-based indices) arguments matches `m`, e.g. `Args<1, 2>(Eq())`. | #### Composite Matchers You can make a matcher from one or more other matchers: | Matcher | Description | | :------------------------------- | :-------------------------------------- | | `AllOf(m1, m2, ..., mn)` | `argument` matches all of the matchers `m1` to `mn`. | | `AllOfArray({m0, m1, ..., mn})`, `AllOfArray(a_container)`, `AllOfArray(begin, end)`, `AllOfArray(array)`, or `AllOfArray(array, count)` | The same as `AllOf()` except that the matchers come from an initializer list, STL-style container, iterator range, or C-style array. | | `AnyOf(m1, m2, ..., mn)` | `argument` matches at least one of the matchers `m1` to `mn`. | | `AnyOfArray({m0, m1, ..., mn})`, `AnyOfArray(a_container)`, `AnyOfArray(begin, end)`, `AnyOfArray(array)`, or `AnyOfArray(array, count)` | The same as `AnyOf()` except that the matchers come from an initializer list, STL-style container, iterator range, or C-style array. | | `Not(m)` | `argument` doesn't match matcher `m`. | #### Adapters for Matchers | Matcher | Description | | :---------------------- | :------------------------------------ | | `MatcherCast(m)` | casts matcher `m` to type `Matcher`. | | `SafeMatcherCast(m)` | [safely casts](cook_book.md#casting-matchers) matcher `m` to type `Matcher`. | | `Truly(predicate)` | `predicate(argument)` returns something considered by C++ to be true, where `predicate` is a function or functor. | `AddressSatisfies(callback)` and `Truly(callback)` take ownership of `callback`, which must be a permanent callback. #### Using Matchers as Predicates {#MatchersAsPredicatesCheat} | Matcher | Description | | :---------------------------- | :------------------------------------------ | | `Matches(m)(value)` | evaluates to `true` if `value` matches `m`. You can use `Matches(m)` alone as a unary functor. | | `ExplainMatchResult(m, value, result_listener)` | evaluates to `true` if `value` matches `m`, explaining the result to `result_listener`. | | `Value(value, m)` | evaluates to `true` if `value` matches `m`. | #### Defining Matchers | Matcher | Description | | :----------------------------------- | :------------------------------------ | | `MATCHER(IsEven, "") { return (arg % 2) == 0; }` | Defines a matcher `IsEven()` to match an even number. | | `MATCHER_P(IsDivisibleBy, n, "") { *result_listener << "where the remainder is " << (arg % n); return (arg % n) == 0; }` | Defines a macher `IsDivisibleBy(n)` to match a number divisible by `n`. | | `MATCHER_P2(IsBetween, a, b, std::string(negation ? "isn't" : "is") + " between " + PrintToString(a) + " and " + PrintToString(b)) { return a <= arg && arg <= b; }` | Defines a matcher `IsBetween(a, b)` to match a value in the range [`a`, `b`]. | **Notes:** 1. The `MATCHER*` macros cannot be used inside a function or class. 2. The matcher body must be *purely functional* (i.e. it cannot have any side effect, and the result must not depend on anything other than the value being matched and the matcher parameters). 3. You can use `PrintToString(x)` to convert a value `x` of any type to a string. ### Actions {#ActionList} **Actions** specify what a mock function should do when invoked. #### Returning a Value | | | | :-------------------------- | :-------------------------------------------- | | `Return()` | Return from a `void` mock function. | | `Return(value)` | Return `value`. If the type of `value` is different to the mock function's return type, `value` is converted to the latter type at the time the expectation is set, not when the action is executed. | | `ReturnArg()` | Return the `N`-th (0-based) argument. | | `ReturnNew(a1, ..., ak)` | Return `new T(a1, ..., ak)`; a different object is created each time. | | `ReturnNull()` | Return a null pointer. | | `ReturnPointee(ptr)` | Return the value pointed to by `ptr`. | | `ReturnRef(variable)` | Return a reference to `variable`. | | `ReturnRefOfCopy(value)` | Return a reference to a copy of `value`; the copy lives as long as the action. | #### Side Effects | | | | :--------------------------------- | :-------------------------------------- | | `Assign(&variable, value)` | Assign `value` to variable. | | `DeleteArg()` | Delete the `N`-th (0-based) argument, which must be a pointer. | | `SaveArg(pointer)` | Save the `N`-th (0-based) argument to `*pointer`. | | `SaveArgPointee(pointer)` | Save the value pointed to by the `N`-th (0-based) argument to `*pointer`. | | `SetArgReferee(value)` | Assign value to the variable referenced by the `N`-th (0-based) argument. | | `SetArgPointee(value)` | Assign `value` to the variable pointed by the `N`-th (0-based) argument. | | `SetArgumentPointee(value)` | Same as `SetArgPointee(value)`. Deprecated. Will be removed in v1.7.0. | | `SetArrayArgument(first, last)` | Copies the elements in source range [`first`, `last`) to the array pointed to by the `N`-th (0-based) argument, which can be either a pointer or an iterator. The action does not take ownership of the elements in the source range. | | `SetErrnoAndReturn(error, value)` | Set `errno` to `error` and return `value`. | | `Throw(exception)` | Throws the given exception, which can be any copyable value. Available since v1.1.0. | #### Using a Function, Functor, or Lambda as an Action In the following, by "callable" we mean a free function, `std::function`, functor, or lambda. | | | | :---------------------------------- | :------------------------------------- | | `f` | Invoke f with the arguments passed to the mock function, where f is a callable. | | `Invoke(f)` | Invoke `f` with the arguments passed to the mock function, where `f` can be a global/static function or a functor. | | `Invoke(object_pointer, &class::method)` | Invoke the method on the object with the arguments passed to the mock function. | | `InvokeWithoutArgs(f)` | Invoke `f`, which can be a global/static function or a functor. `f` must take no arguments. | | `InvokeWithoutArgs(object_pointer, &class::method)` | Invoke the method on the object, which takes no arguments. | | `InvokeArgument(arg1, arg2, ..., argk)` | Invoke the mock function's `N`-th (0-based) argument, which must be a function or a functor, with the `k` arguments. | The return value of the invoked function is used as the return value of the action. When defining a callable to be used with `Invoke*()`, you can declare any unused parameters as `Unused`: ```cpp using ::testing::Invoke; double Distance(Unused, double x, double y) { return sqrt(x*x + y*y); } ... EXPECT_CALL(mock, Foo("Hi", _, _)).WillOnce(Invoke(Distance)); ``` `Invoke(callback)` and `InvokeWithoutArgs(callback)` take ownership of `callback`, which must be permanent. The type of `callback` must be a base callback type instead of a derived one, e.g. ```cpp BlockingClosure* done = new BlockingClosure; ... Invoke(done) ...; // This won't compile! Closure* done2 = new BlockingClosure; ... Invoke(done2) ...; // This works. ``` In `InvokeArgument(...)`, if an argument needs to be passed by reference, wrap it inside `ByRef()`. For example, ```cpp using ::testing::ByRef; using ::testing::InvokeArgument; ... InvokeArgument<2>(5, string("Hi"), ByRef(foo)) ``` calls the mock function's #2 argument, passing to it `5` and `string("Hi")` by value, and `foo` by reference. #### Default Action | Matcher | Description | | :------------ | :----------------------------------------------------- | | `DoDefault()` | Do the default action (specified by `ON_CALL()` or the built-in one). | **Note:** due to technical reasons, `DoDefault()` cannot be used inside a composite action - trying to do so will result in a run-time error. #### Composite Actions | | | | :----------------------------- | :------------------------------------------ | | `DoAll(a1, a2, ..., an)` | Do all actions `a1` to `an` and return the result of `an` in each invocation. The first `n - 1` sub-actions must return void. | | `IgnoreResult(a)` | Perform action `a` and ignore its result. `a` must not return void. | | `WithArg(a)` | Pass the `N`-th (0-based) argument of the mock function to action `a` and perform it. | | `WithArgs(a)` | Pass the selected (0-based) arguments of the mock function to action `a` and perform it. | | `WithoutArgs(a)` | Perform action `a` without any arguments. | #### Defining Actions
`struct SumAction {`
 `template `
 `T operator()(T x, Ty) { return x + y; }`
`};`
Defines a generic functor that can be used as an action summing its arguments.
| | | | :--------------------------------- | :-------------------------------------- | | `ACTION(Sum) { return arg0 + arg1; }` | Defines an action `Sum()` to return the sum of the mock function's argument #0 and #1. | | `ACTION_P(Plus, n) { return arg0 + n; }` | Defines an action `Plus(n)` to return the sum of the mock function's argument #0 and `n`. | | `ACTION_Pk(Foo, p1, ..., pk) { statements; }` | Defines a parameterized action `Foo(p1, ..., pk)` to execute the given `statements`. | The `ACTION*` macros cannot be used inside a function or class. ### Cardinalities {#CardinalityList} These are used in `Times()` to specify how many times a mock function will be called: | | | | :---------------- | :----------------------------------------------------- | | `AnyNumber()` | The function can be called any number of times. | | `AtLeast(n)` | The call is expected at least `n` times. | | `AtMost(n)` | The call is expected at most `n` times. | | `Between(m, n)` | The call is expected between `m` and `n` (inclusive) times. | | `Exactly(n) or n` | The call is expected exactly `n` times. In particular, the call should never happen when `n` is 0. | ### Expectation Order By default, the expectations can be matched in *any* order. If some or all expectations must be matched in a given order, there are two ways to specify it. They can be used either independently or together. #### The After Clause {#AfterClause} ```cpp using ::testing::Expectation; ... Expectation init_x = EXPECT_CALL(foo, InitX()); Expectation init_y = EXPECT_CALL(foo, InitY()); EXPECT_CALL(foo, Bar()) .After(init_x, init_y); ``` says that `Bar()` can be called only after both `InitX()` and `InitY()` have been called. If you don't know how many pre-requisites an expectation has when you write it, you can use an `ExpectationSet` to collect them: ```cpp using ::testing::ExpectationSet; ... ExpectationSet all_inits; for (int i = 0; i < element_count; i++) { all_inits += EXPECT_CALL(foo, InitElement(i)); } EXPECT_CALL(foo, Bar()) .After(all_inits); ``` says that `Bar()` can be called only after all elements have been initialized (but we don't care about which elements get initialized before the others). Modifying an `ExpectationSet` after using it in an `.After()` doesn't affect the meaning of the `.After()`. #### Sequences {#UsingSequences} When you have a long chain of sequential expectations, it's easier to specify the order using **sequences**, which don't require you to given each expectation in the chain a different name. *All expected calls* in the same sequence must occur in the order they are specified. ```cpp using ::testing::Return; using ::testing::Sequence; Sequence s1, s2; ... EXPECT_CALL(foo, Reset()) .InSequence(s1, s2) .WillOnce(Return(true)); EXPECT_CALL(foo, GetSize()) .InSequence(s1) .WillOnce(Return(1)); EXPECT_CALL(foo, Describe(A())) .InSequence(s2) .WillOnce(Return("dummy")); ``` says that `Reset()` must be called before *both* `GetSize()` *and* `Describe()`, and the latter two can occur in any order. To put many expectations in a sequence conveniently: ```cpp using ::testing::InSequence; { InSequence seq; EXPECT_CALL(...)...; EXPECT_CALL(...)...; ... EXPECT_CALL(...)...; } ``` says that all expected calls in the scope of `seq` must occur in strict order. The name `seq` is irrelevant. ### Verifying and Resetting a Mock gMock will verify the expectations on a mock object when it is destructed, or you can do it earlier: ```cpp using ::testing::Mock; ... // Verifies and removes the expectations on mock_obj; // returns true if and only if successful. Mock::VerifyAndClearExpectations(&mock_obj); ... // Verifies and removes the expectations on mock_obj; // also removes the default actions set by ON_CALL(); // returns true if and only if successful. Mock::VerifyAndClear(&mock_obj); ``` You can also tell gMock that a mock object can be leaked and doesn't need to be verified: ```cpp Mock::AllowLeak(&mock_obj); ``` ### Mock Classes gMock defines a convenient mock class template ```cpp class MockFunction { public: MOCK_METHOD(R, Call, (A1, ..., An)); }; ``` See this [recipe](cook_book.md#using-check-points) for one application of it. ### Flags | Flag | Description | | :----------------------------- | :---------------------------------------- | | `--gmock_catch_leaked_mocks=0` | Don't report leaked mock objects as failures. | | `--gmock_verbose=LEVEL` | Sets the default verbosity level (`info`, `warning`, or `error`) of Google Mock messages. | LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/docs/cook_book.md000066400000000000000000004361401456444476200256010ustar00rootroot00000000000000# gMock Cookbook You can find recipes for using gMock here. If you haven't yet, please read [this](for_dummies.md) first to make sure you understand the basics. **Note:** gMock lives in the `testing` name space. For readability, it is recommended to write `using ::testing::Foo;` once in your file before using the name `Foo` defined by gMock. We omit such `using` statements in this section for brevity, but you should do it in your own code. ## Creating Mock Classes Mock classes are defined as normal classes, using the `MOCK_METHOD` macro to generate mocked methods. The macro gets 3 or 4 parameters: ```cpp class MyMock { public: MOCK_METHOD(ReturnType, MethodName, (Args...)); MOCK_METHOD(ReturnType, MethodName, (Args...), (Specs...)); }; ``` The first 3 parameters are simply the method declaration, split into 3 parts. The 4th parameter accepts a closed list of qualifiers, which affect the generated method: * **`const`** - Makes the mocked method a `const` method. Required if overriding a `const` method. * **`override`** - Marks the method with `override`. Recommended if overriding a `virtual` method. * **`noexcept`** - Marks the method with `noexcept`. Required if overriding a `noexcept` method. * **`Calltype(...)`** - Sets the call type for the method (e.g. to `STDMETHODCALLTYPE`), useful in Windows. ### Dealing with unprotected commas Unprotected commas, i.e. commas which are not surrounded by parentheses, prevent `MOCK_METHOD` from parsing its arguments correctly: ```cpp {.bad} class MockFoo { public: MOCK_METHOD(std::pair, GetPair, ()); // Won't compile! MOCK_METHOD(bool, CheckMap, (std::map, bool)); // Won't compile! }; ``` Solution 1 - wrap with parentheses: ```cpp {.good} class MockFoo { public: MOCK_METHOD((std::pair), GetPair, ()); MOCK_METHOD(bool, CheckMap, ((std::map), bool)); }; ``` Note that wrapping a return or argument type with parentheses is, in general, invalid C++. `MOCK_METHOD` removes the parentheses. Solution 2 - define an alias: ```cpp {.good} class MockFoo { public: using BoolAndInt = std::pair; MOCK_METHOD(BoolAndInt, GetPair, ()); using MapIntDouble = std::map; MOCK_METHOD(bool, CheckMap, (MapIntDouble, bool)); }; ``` ### Mocking Private or Protected Methods You must always put a mock method definition (`MOCK_METHOD`) in a `public:` section of the mock class, regardless of the method being mocked being `public`, `protected`, or `private` in the base class. This allows `ON_CALL` and `EXPECT_CALL` to reference the mock function from outside of the mock class. (Yes, C++ allows a subclass to change the access level of a virtual function in the base class.) Example: ```cpp class Foo { public: ... virtual bool Transform(Gadget* g) = 0; protected: virtual void Resume(); private: virtual int GetTimeOut(); }; class MockFoo : public Foo { public: ... MOCK_METHOD(bool, Transform, (Gadget* g), (override)); // The following must be in the public section, even though the // methods are protected or private in the base class. MOCK_METHOD(void, Resume, (), (override)); MOCK_METHOD(int, GetTimeOut, (), (override)); }; ``` ### Mocking Overloaded Methods You can mock overloaded functions as usual. No special attention is required: ```cpp class Foo { ... // Must be virtual as we'll inherit from Foo. virtual ~Foo(); // Overloaded on the types and/or numbers of arguments. virtual int Add(Element x); virtual int Add(int times, Element x); // Overloaded on the const-ness of this object. virtual Bar& GetBar(); virtual const Bar& GetBar() const; }; class MockFoo : public Foo { ... MOCK_METHOD(int, Add, (Element x), (override)); MOCK_METHOD(int, Add, (int times, Element x), (override)); MOCK_METHOD(Bar&, GetBar, (), (override)); MOCK_METHOD(const Bar&, GetBar, (), (const, override)); }; ``` **Note:** if you don't mock all versions of the overloaded method, the compiler will give you a warning about some methods in the base class being hidden. To fix that, use `using` to bring them in scope: ```cpp class MockFoo : public Foo { ... using Foo::Add; MOCK_METHOD(int, Add, (Element x), (override)); // We don't want to mock int Add(int times, Element x); ... }; ``` ### Mocking Class Templates You can mock class templates just like any class. ```cpp template class StackInterface { ... // Must be virtual as we'll inherit from StackInterface. virtual ~StackInterface(); virtual int GetSize() const = 0; virtual void Push(const Elem& x) = 0; }; template class MockStack : public StackInterface { ... MOCK_METHOD(int, GetSize, (), (override)); MOCK_METHOD(void, Push, (const Elem& x), (override)); }; ``` ### Mocking Non-virtual Methods {#MockingNonVirtualMethods} gMock can mock non-virtual functions to be used in Hi-perf dependency injection. In this case, instead of sharing a common base class with the real class, your mock class will be *unrelated* to the real class, but contain methods with the same signatures. The syntax for mocking non-virtual methods is the *same* as mocking virtual methods (just don't add `override`): ```cpp // A simple packet stream class. None of its members is virtual. class ConcretePacketStream { public: void AppendPacket(Packet* new_packet); const Packet* GetPacket(size_t packet_number) const; size_t NumberOfPackets() const; ... }; // A mock packet stream class. It inherits from no other, but defines // GetPacket() and NumberOfPackets(). class MockPacketStream { public: MOCK_METHOD(const Packet*, GetPacket, (size_t packet_number), (const)); MOCK_METHOD(size_t, NumberOfPackets, (), (const)); ... }; ``` Note that the mock class doesn't define `AppendPacket()`, unlike the real class. That's fine as long as the test doesn't need to call it. Next, you need a way to say that you want to use `ConcretePacketStream` in production code, and use `MockPacketStream` in tests. Since the functions are not virtual and the two classes are unrelated, you must specify your choice at *compile time* (as opposed to run time). One way to do it is to templatize your code that needs to use a packet stream. More specifically, you will give your code a template type argument for the type of the packet stream. In production, you will instantiate your template with `ConcretePacketStream` as the type argument. In tests, you will instantiate the same template with `MockPacketStream`. For example, you may write: ```cpp template void CreateConnection(PacketStream* stream) { ... } template class PacketReader { public: void ReadPackets(PacketStream* stream, size_t packet_num); }; ``` Then you can use `CreateConnection()` and `PacketReader` in production code, and use `CreateConnection()` and `PacketReader` in tests. ```cpp MockPacketStream mock_stream; EXPECT_CALL(mock_stream, ...)...; .. set more expectations on mock_stream ... PacketReader reader(&mock_stream); ... exercise reader ... ``` ### Mocking Free Functions It's possible to use gMock to mock a free function (i.e. a C-style function or a static method). You just need to rewrite your code to use an interface (abstract class). Instead of calling a free function (say, `OpenFile`) directly, introduce an interface for it and have a concrete subclass that calls the free function: ```cpp class FileInterface { public: ... virtual bool Open(const char* path, const char* mode) = 0; }; class File : public FileInterface { public: ... virtual bool Open(const char* path, const char* mode) { return OpenFile(path, mode); } }; ``` Your code should talk to `FileInterface` to open a file. Now it's easy to mock out the function. This may seem like a lot of hassle, but in practice you often have multiple related functions that you can put in the same interface, so the per-function syntactic overhead will be much lower. If you are concerned about the performance overhead incurred by virtual functions, and profiling confirms your concern, you can combine this with the recipe for [mocking non-virtual methods](#MockingNonVirtualMethods). ### Old-Style `MOCK_METHODn` Macros Before the generic `MOCK_METHOD` macro was introduced, mocks where created using a family of macros collectively called `MOCK_METHODn`. These macros are still supported, though migration to the new `MOCK_METHOD` is recommended. The macros in the `MOCK_METHODn` family differ from `MOCK_METHOD`: * The general structure is `MOCK_METHODn(MethodName, ReturnType(Args))`, instead of `MOCK_METHOD(ReturnType, MethodName, (Args))`. * The number `n` must equal the number of arguments. * When mocking a const method, one must use `MOCK_CONST_METHODn`. * When mocking a class template, the macro name must be suffixed with `_T`. * In order to specify the call type, the macro name must be suffixed with `_WITH_CALLTYPE`, and the call type is the first macro argument. Old macros and their new equivalents:
Simple
Old `MOCK_METHOD1(Foo, bool(int))`
New `MOCK_METHOD(bool, Foo, (int))`
Const Method
Old `MOCK_CONST_METHOD1(Foo, bool(int))`
New `MOCK_METHOD(bool, Foo, (int), (const))`
Method in a Class Template
Old `MOCK_METHOD1_T(Foo, bool(int))`
New `MOCK_METHOD(bool, Foo, (int))`
Const Method in a Class Template
Old `MOCK_CONST_METHOD1_T(Foo, bool(int))`
New `MOCK_METHOD(bool, Foo, (int), (const))`
Method with Call Type
Old `MOCK_METHOD1_WITH_CALLTYPE(STDMETHODCALLTYPE, Foo, bool(int))`
New `MOCK_METHOD(bool, Foo, (int), (Calltype(STDMETHODCALLTYPE)))`
Const Method with Call Type
Old `MOCK_CONST_METHOD1_WITH_CALLTYPE(STDMETHODCALLTYPE, Foo, bool(int))`
New `MOCK_METHOD(bool, Foo, (int), (const, Calltype(STDMETHODCALLTYPE)))`
Method with Call Type in a Class Template
Old `MOCK_METHOD1_T_WITH_CALLTYPE(STDMETHODCALLTYPE, Foo, bool(int))`
New `MOCK_METHOD(bool, Foo, (int), (Calltype(STDMETHODCALLTYPE)))`
Const Method with Call Type in a Class Template
Old `MOCK_CONST_METHOD1_T_WITH_CALLTYPE(STDMETHODCALLTYPE, Foo, bool(int))`
New `MOCK_METHOD(bool, Foo, (int), (const, Calltype(STDMETHODCALLTYPE)))`
### The Nice, the Strict, and the Naggy {#NiceStrictNaggy} If a mock method has no `EXPECT_CALL` spec but is called, we say that it's an "uninteresting call", and the default action (which can be specified using `ON_CALL()`) of the method will be taken. Currently, an uninteresting call will also by default cause gMock to print a warning. (In the future, we might remove this warning by default.) However, sometimes you may want to ignore these uninteresting calls, and sometimes you may want to treat them as errors. gMock lets you make the decision on a per-mock-object basis. Suppose your test uses a mock class `MockFoo`: ```cpp TEST(...) { MockFoo mock_foo; EXPECT_CALL(mock_foo, DoThis()); ... code that uses mock_foo ... } ``` If a method of `mock_foo` other than `DoThis()` is called, you will get a warning. However, if you rewrite your test to use `NiceMock` instead, you can suppress the warning: ```cpp using ::testing::NiceMock; TEST(...) { NiceMock mock_foo; EXPECT_CALL(mock_foo, DoThis()); ... code that uses mock_foo ... } ``` `NiceMock` is a subclass of `MockFoo`, so it can be used wherever `MockFoo` is accepted. It also works if `MockFoo`'s constructor takes some arguments, as `NiceMock` "inherits" `MockFoo`'s constructors: ```cpp using ::testing::NiceMock; TEST(...) { NiceMock mock_foo(5, "hi"); // Calls MockFoo(5, "hi"). EXPECT_CALL(mock_foo, DoThis()); ... code that uses mock_foo ... } ``` The usage of `StrictMock` is similar, except that it makes all uninteresting calls failures: ```cpp using ::testing::StrictMock; TEST(...) { StrictMock mock_foo; EXPECT_CALL(mock_foo, DoThis()); ... code that uses mock_foo ... // The test will fail if a method of mock_foo other than DoThis() // is called. } ``` NOTE: `NiceMock` and `StrictMock` only affects *uninteresting* calls (calls of *methods* with no expectations); they do not affect *unexpected* calls (calls of methods with expectations, but they don't match). See [Understanding Uninteresting vs Unexpected Calls](#uninteresting-vs-unexpected). There are some caveats though (I dislike them just as much as the next guy, but sadly they are side effects of C++'s limitations): 1. `NiceMock` and `StrictMock` only work for mock methods defined using the `MOCK_METHOD` macro **directly** in the `MockFoo` class. If a mock method is defined in a **base class** of `MockFoo`, the "nice" or "strict" modifier may not affect it, depending on the compiler. In particular, nesting `NiceMock` and `StrictMock` (e.g. `NiceMock >`) is **not** supported. 2. `NiceMock` and `StrictMock` may not work correctly if the destructor of `MockFoo` is not virtual. We would like to fix this, but it requires cleaning up existing tests. http://b/28934720 tracks the issue. 3. During the constructor or destructor of `MockFoo`, the mock object is *not* nice or strict. This may cause surprises if the constructor or destructor calls a mock method on `this` object. (This behavior, however, is consistent with C++'s general rule: if a constructor or destructor calls a virtual method of `this` object, that method is treated as non-virtual. In other words, to the base class's constructor or destructor, `this` object behaves like an instance of the base class, not the derived class. This rule is required for safety. Otherwise a base constructor may use members of a derived class before they are initialized, or a base destructor may use members of a derived class after they have been destroyed.) Finally, you should be **very cautious** about when to use naggy or strict mocks, as they tend to make tests more brittle and harder to maintain. When you refactor your code without changing its externally visible behavior, ideally you shouldn't need to update any tests. If your code interacts with a naggy mock, however, you may start to get spammed with warnings as the result of your change. Worse, if your code interacts with a strict mock, your tests may start to fail and you'll be forced to fix them. Our general recommendation is to use nice mocks (not yet the default) most of the time, use naggy mocks (the current default) when developing or debugging tests, and use strict mocks only as the last resort. ### Simplifying the Interface without Breaking Existing Code {#SimplerInterfaces} Sometimes a method has a long list of arguments that is mostly uninteresting. For example: ```cpp class LogSink { public: ... virtual void send(LogSeverity severity, const char* full_filename, const char* base_filename, int line, const struct tm* tm_time, const char* message, size_t message_len) = 0; }; ``` This method's argument list is lengthy and hard to work with (the `message` argument is not even 0-terminated). If we mock it as is, using the mock will be awkward. If, however, we try to simplify this interface, we'll need to fix all clients depending on it, which is often infeasible. The trick is to redispatch the method in the mock class: ```cpp class ScopedMockLog : public LogSink { public: ... virtual void send(LogSeverity severity, const char* full_filename, const char* base_filename, int line, const tm* tm_time, const char* message, size_t message_len) { // We are only interested in the log severity, full file name, and // log message. Log(severity, full_filename, std::string(message, message_len)); } // Implements the mock method: // // void Log(LogSeverity severity, // const string& file_path, // const string& message); MOCK_METHOD(void, Log, (LogSeverity severity, const string& file_path, const string& message)); }; ``` By defining a new mock method with a trimmed argument list, we make the mock class more user-friendly. This technique may also be applied to make overloaded methods more amenable to mocking. For example, when overloads have been used to implement default arguments: ```cpp class MockTurtleFactory : public TurtleFactory { public: Turtle* MakeTurtle(int length, int weight) override { ... } Turtle* MakeTurtle(int length, int weight, int speed) override { ... } // the above methods delegate to this one: MOCK_METHOD(Turtle*, DoMakeTurtle, ()); }; ``` This allows tests that don't care which overload was invoked to avoid specifying argument matchers: ```cpp ON_CALL(factory, DoMakeTurtle) .WillByDefault(MakeMockTurtle()); ``` ### Alternative to Mocking Concrete Classes Often you may find yourself using classes that don't implement interfaces. In order to test your code that uses such a class (let's call it `Concrete`), you may be tempted to make the methods of `Concrete` virtual and then mock it. Try not to do that. Making a non-virtual function virtual is a big decision. It creates an extension point where subclasses can tweak your class' behavior. This weakens your control on the class because now it's harder to maintain the class invariants. You should make a function virtual only when there is a valid reason for a subclass to override it. Mocking concrete classes directly is problematic as it creates a tight coupling between the class and the tests - any small change in the class may invalidate your tests and make test maintenance a pain. To avoid such problems, many programmers have been practicing "coding to interfaces": instead of talking to the `Concrete` class, your code would define an interface and talk to it. Then you implement that interface as an adaptor on top of `Concrete`. In tests, you can easily mock that interface to observe how your code is doing. This technique incurs some overhead: * You pay the cost of virtual function calls (usually not a problem). * There is more abstraction for the programmers to learn. However, it can also bring significant benefits in addition to better testability: * `Concrete`'s API may not fit your problem domain very well, as you may not be the only client it tries to serve. By designing your own interface, you have a chance to tailor it to your need - you may add higher-level functionalities, rename stuff, etc instead of just trimming the class. This allows you to write your code (user of the interface) in a more natural way, which means it will be more readable, more maintainable, and you'll be more productive. * If `Concrete`'s implementation ever has to change, you don't have to rewrite everywhere it is used. Instead, you can absorb the change in your implementation of the interface, and your other code and tests will be insulated from this change. Some people worry that if everyone is practicing this technique, they will end up writing lots of redundant code. This concern is totally understandable. However, there are two reasons why it may not be the case: * Different projects may need to use `Concrete` in different ways, so the best interfaces for them will be different. Therefore, each of them will have its own domain-specific interface on top of `Concrete`, and they will not be the same code. * If enough projects want to use the same interface, they can always share it, just like they have been sharing `Concrete`. You can check in the interface and the adaptor somewhere near `Concrete` (perhaps in a `contrib` sub-directory) and let many projects use it. You need to weigh the pros and cons carefully for your particular problem, but I'd like to assure you that the Java community has been practicing this for a long time and it's a proven effective technique applicable in a wide variety of situations. :-) ### Delegating Calls to a Fake {#DelegatingToFake} Some times you have a non-trivial fake implementation of an interface. For example: ```cpp class Foo { public: virtual ~Foo() {} virtual char DoThis(int n) = 0; virtual void DoThat(const char* s, int* p) = 0; }; class FakeFoo : public Foo { public: char DoThis(int n) override { return (n > 0) ? '+' : (n < 0) ? '-' : '0'; } void DoThat(const char* s, int* p) override { *p = strlen(s); } }; ``` Now you want to mock this interface such that you can set expectations on it. However, you also want to use `FakeFoo` for the default behavior, as duplicating it in the mock object is, well, a lot of work. When you define the mock class using gMock, you can have it delegate its default action to a fake class you already have, using this pattern: ```cpp class MockFoo : public Foo { public: // Normal mock method definitions using gMock. MOCK_METHOD(char, DoThis, (int n), (override)); MOCK_METHOD(void, DoThat, (const char* s, int* p), (override)); // Delegates the default actions of the methods to a FakeFoo object. // This must be called *before* the custom ON_CALL() statements. void DelegateToFake() { ON_CALL(*this, DoThis).WillByDefault([this](int n) { return fake_.DoThis(n); }); ON_CALL(*this, DoThat).WillByDefault([this](const char* s, int* p) { fake_.DoThat(s, p); }); } private: FakeFoo fake_; // Keeps an instance of the fake in the mock. }; ``` With that, you can use `MockFoo` in your tests as usual. Just remember that if you don't explicitly set an action in an `ON_CALL()` or `EXPECT_CALL()`, the fake will be called upon to do it.: ```cpp using ::testing::_; TEST(AbcTest, Xyz) { MockFoo foo; foo.DelegateToFake(); // Enables the fake for delegation. // Put your ON_CALL(foo, ...)s here, if any. // No action specified, meaning to use the default action. EXPECT_CALL(foo, DoThis(5)); EXPECT_CALL(foo, DoThat(_, _)); int n = 0; EXPECT_EQ('+', foo.DoThis(5)); // FakeFoo::DoThis() is invoked. foo.DoThat("Hi", &n); // FakeFoo::DoThat() is invoked. EXPECT_EQ(2, n); } ``` **Some tips:** * If you want, you can still override the default action by providing your own `ON_CALL()` or using `.WillOnce()` / `.WillRepeatedly()` in `EXPECT_CALL()`. * In `DelegateToFake()`, you only need to delegate the methods whose fake implementation you intend to use. * The general technique discussed here works for overloaded methods, but you'll need to tell the compiler which version you mean. To disambiguate a mock function (the one you specify inside the parentheses of `ON_CALL()`), use [this technique](#SelectOverload); to disambiguate a fake function (the one you place inside `Invoke()`), use a `static_cast` to specify the function's type. For instance, if class `Foo` has methods `char DoThis(int n)` and `bool DoThis(double x) const`, and you want to invoke the latter, you need to write `Invoke(&fake_, static_cast(&FakeFoo::DoThis))` instead of `Invoke(&fake_, &FakeFoo::DoThis)` (The strange-looking thing inside the angled brackets of `static_cast` is the type of a function pointer to the second `DoThis()` method.). * Having to mix a mock and a fake is often a sign of something gone wrong. Perhaps you haven't got used to the interaction-based way of testing yet. Or perhaps your interface is taking on too many roles and should be split up. Therefore, **don't abuse this**. We would only recommend to do it as an intermediate step when you are refactoring your code. Regarding the tip on mixing a mock and a fake, here's an example on why it may be a bad sign: Suppose you have a class `System` for low-level system operations. In particular, it does file and I/O operations. And suppose you want to test how your code uses `System` to do I/O, and you just want the file operations to work normally. If you mock out the entire `System` class, you'll have to provide a fake implementation for the file operation part, which suggests that `System` is taking on too many roles. Instead, you can define a `FileOps` interface and an `IOOps` interface and split `System`'s functionalities into the two. Then you can mock `IOOps` without mocking `FileOps`. ### Delegating Calls to a Real Object When using testing doubles (mocks, fakes, stubs, and etc), sometimes their behaviors will differ from those of the real objects. This difference could be either intentional (as in simulating an error such that you can test the error handling code) or unintentional. If your mocks have different behaviors than the real objects by mistake, you could end up with code that passes the tests but fails in production. You can use the *delegating-to-real* technique to ensure that your mock has the same behavior as the real object while retaining the ability to validate calls. This technique is very similar to the [delegating-to-fake](#DelegatingToFake) technique, the difference being that we use a real object instead of a fake. Here's an example: ```cpp using ::testing::AtLeast; class MockFoo : public Foo { public: MockFoo() { // By default, all calls are delegated to the real object. ON_CALL(*this, DoThis).WillByDefault([this](int n) { return real_.DoThis(n); }); ON_CALL(*this, DoThat).WillByDefault([this](const char* s, int* p) { real_.DoThat(s, p); }); ... } MOCK_METHOD(char, DoThis, ...); MOCK_METHOD(void, DoThat, ...); ... private: Foo real_; }; ... MockFoo mock; EXPECT_CALL(mock, DoThis()) .Times(3); EXPECT_CALL(mock, DoThat("Hi")) .Times(AtLeast(1)); ... use mock in test ... ``` With this, gMock will verify that your code made the right calls (with the right arguments, in the right order, called the right number of times, etc), and a real object will answer the calls (so the behavior will be the same as in production). This gives you the best of both worlds. ### Delegating Calls to a Parent Class Ideally, you should code to interfaces, whose methods are all pure virtual. In reality, sometimes you do need to mock a virtual method that is not pure (i.e, it already has an implementation). For example: ```cpp class Foo { public: virtual ~Foo(); virtual void Pure(int n) = 0; virtual int Concrete(const char* str) { ... } }; class MockFoo : public Foo { public: // Mocking a pure method. MOCK_METHOD(void, Pure, (int n), (override)); // Mocking a concrete method. Foo::Concrete() is shadowed. MOCK_METHOD(int, Concrete, (const char* str), (override)); }; ``` Sometimes you may want to call `Foo::Concrete()` instead of `MockFoo::Concrete()`. Perhaps you want to do it as part of a stub action, or perhaps your test doesn't need to mock `Concrete()` at all (but it would be oh-so painful to have to define a new mock class whenever you don't need to mock one of its methods). The trick is to leave a back door in your mock class for accessing the real methods in the base class: ```cpp class MockFoo : public Foo { public: // Mocking a pure method. MOCK_METHOD(void, Pure, (int n), (override)); // Mocking a concrete method. Foo::Concrete() is shadowed. MOCK_METHOD(int, Concrete, (const char* str), (override)); // Use this to call Concrete() defined in Foo. int FooConcrete(const char* str) { return Foo::Concrete(str); } }; ``` Now, you can call `Foo::Concrete()` inside an action by: ```cpp ... EXPECT_CALL(foo, Concrete).WillOnce([&foo](const char* str) { return foo.FooConcrete(str); }); ``` or tell the mock object that you don't want to mock `Concrete()`: ```cpp ... ON_CALL(foo, Concrete).WillByDefault([&foo](const char* str) { return foo.FooConcrete(str); }); ``` (Why don't we just write `{ return foo.Concrete(str); }`? If you do that, `MockFoo::Concrete()` will be called (and cause an infinite recursion) since `Foo::Concrete()` is virtual. That's just how C++ works.) ## Using Matchers ### Matching Argument Values Exactly You can specify exactly which arguments a mock method is expecting: ```cpp using ::testing::Return; ... EXPECT_CALL(foo, DoThis(5)) .WillOnce(Return('a')); EXPECT_CALL(foo, DoThat("Hello", bar)); ``` ### Using Simple Matchers You can use matchers to match arguments that have a certain property: ```cpp using ::testing::NotNull; using ::testing::Return; ... EXPECT_CALL(foo, DoThis(Ge(5))) // The argument must be >= 5. .WillOnce(Return('a')); EXPECT_CALL(foo, DoThat("Hello", NotNull())); // The second argument must not be NULL. ``` A frequently used matcher is `_`, which matches anything: ```cpp EXPECT_CALL(foo, DoThat(_, NotNull())); ``` ### Combining Matchers {#CombiningMatchers} You can build complex matchers from existing ones using `AllOf()`, `AllOfArray()`, `AnyOf()`, `AnyOfArray()` and `Not()`: ```cpp using ::testing::AllOf; using ::testing::Gt; using ::testing::HasSubstr; using ::testing::Ne; using ::testing::Not; ... // The argument must be > 5 and != 10. EXPECT_CALL(foo, DoThis(AllOf(Gt(5), Ne(10)))); // The first argument must not contain sub-string "blah". EXPECT_CALL(foo, DoThat(Not(HasSubstr("blah")), NULL)); ``` ### Casting Matchers {#SafeMatcherCast} gMock matchers are statically typed, meaning that the compiler can catch your mistake if you use a matcher of the wrong type (for example, if you use `Eq(5)` to match a `string` argument). Good for you! Sometimes, however, you know what you're doing and want the compiler to give you some slack. One example is that you have a matcher for `long` and the argument you want to match is `int`. While the two types aren't exactly the same, there is nothing really wrong with using a `Matcher` to match an `int` - after all, we can first convert the `int` argument to a `long` losslessly before giving it to the matcher. To support this need, gMock gives you the `SafeMatcherCast(m)` function. It casts a matcher `m` to type `Matcher`. To ensure safety, gMock checks that (let `U` be the type `m` accepts : 1. Type `T` can be *implicitly* cast to type `U`; 2. When both `T` and `U` are built-in arithmetic types (`bool`, integers, and floating-point numbers), the conversion from `T` to `U` is not lossy (in other words, any value representable by `T` can also be represented by `U`); and 3. When `U` is a reference, `T` must also be a reference (as the underlying matcher may be interested in the address of the `U` value). The code won't compile if any of these conditions isn't met. Here's one example: ```cpp using ::testing::SafeMatcherCast; // A base class and a child class. class Base { ... }; class Derived : public Base { ... }; class MockFoo : public Foo { public: MOCK_METHOD(void, DoThis, (Derived* derived), (override)); }; ... MockFoo foo; // m is a Matcher we got from somewhere. EXPECT_CALL(foo, DoThis(SafeMatcherCast(m))); ``` If you find `SafeMatcherCast(m)` too limiting, you can use a similar function `MatcherCast(m)`. The difference is that `MatcherCast` works as long as you can `static_cast` type `T` to type `U`. `MatcherCast` essentially lets you bypass C++'s type system (`static_cast` isn't always safe as it could throw away information, for example), so be careful not to misuse/abuse it. ### Selecting Between Overloaded Functions {#SelectOverload} If you expect an overloaded function to be called, the compiler may need some help on which overloaded version it is. To disambiguate functions overloaded on the const-ness of this object, use the `Const()` argument wrapper. ```cpp using ::testing::ReturnRef; class MockFoo : public Foo { ... MOCK_METHOD(Bar&, GetBar, (), (override)); MOCK_METHOD(const Bar&, GetBar, (), (const, override)); }; ... MockFoo foo; Bar bar1, bar2; EXPECT_CALL(foo, GetBar()) // The non-const GetBar(). .WillOnce(ReturnRef(bar1)); EXPECT_CALL(Const(foo), GetBar()) // The const GetBar(). .WillOnce(ReturnRef(bar2)); ``` (`Const()` is defined by gMock and returns a `const` reference to its argument.) To disambiguate overloaded functions with the same number of arguments but different argument types, you may need to specify the exact type of a matcher, either by wrapping your matcher in `Matcher()`, or using a matcher whose type is fixed (`TypedEq`, `An()`, etc): ```cpp using ::testing::An; using ::testing::Matcher; using ::testing::TypedEq; class MockPrinter : public Printer { public: MOCK_METHOD(void, Print, (int n), (override)); MOCK_METHOD(void, Print, (char c), (override)); }; TEST(PrinterTest, Print) { MockPrinter printer; EXPECT_CALL(printer, Print(An())); // void Print(int); EXPECT_CALL(printer, Print(Matcher(Lt(5)))); // void Print(int); EXPECT_CALL(printer, Print(TypedEq('a'))); // void Print(char); printer.Print(3); printer.Print(6); printer.Print('a'); } ``` ### Performing Different Actions Based on the Arguments When a mock method is called, the *last* matching expectation that's still active will be selected (think "newer overrides older"). So, you can make a method do different things depending on its argument values like this: ```cpp using ::testing::_; using ::testing::Lt; using ::testing::Return; ... // The default case. EXPECT_CALL(foo, DoThis(_)) .WillRepeatedly(Return('b')); // The more specific case. EXPECT_CALL(foo, DoThis(Lt(5))) .WillRepeatedly(Return('a')); ``` Now, if `foo.DoThis()` is called with a value less than 5, `'a'` will be returned; otherwise `'b'` will be returned. ### Matching Multiple Arguments as a Whole Sometimes it's not enough to match the arguments individually. For example, we may want to say that the first argument must be less than the second argument. The `With()` clause allows us to match all arguments of a mock function as a whole. For example, ```cpp using ::testing::_; using ::testing::Ne; using ::testing::Lt; ... EXPECT_CALL(foo, InRange(Ne(0), _)) .With(Lt()); ``` says that the first argument of `InRange()` must not be 0, and must be less than the second argument. The expression inside `With()` must be a matcher of type `Matcher< ::std::tuple >`, where `A1`, ..., `An` are the types of the function arguments. You can also write `AllArgs(m)` instead of `m` inside `.With()`. The two forms are equivalent, but `.With(AllArgs(Lt()))` is more readable than `.With(Lt())`. You can use `Args(m)` to match the `n` selected arguments (as a tuple) against `m`. For example, ```cpp using ::testing::_; using ::testing::AllOf; using ::testing::Args; using ::testing::Lt; ... EXPECT_CALL(foo, Blah) .With(AllOf(Args<0, 1>(Lt()), Args<1, 2>(Lt()))); ``` says that `Blah` will be called with arguments `x`, `y`, and `z` where `x < y < z`. Note that in this example, it wasn't necessary specify the positional matchers. As a convenience and example, gMock provides some matchers for 2-tuples, including the `Lt()` matcher above. See [here](#MultiArgMatchers) for the complete list. Note that if you want to pass the arguments to a predicate of your own (e.g. `.With(Args<0, 1>(Truly(&MyPredicate)))`), that predicate MUST be written to take a `::std::tuple` as its argument; gMock will pass the `n` selected arguments as *one* single tuple to the predicate. ### Using Matchers as Predicates Have you noticed that a matcher is just a fancy predicate that also knows how to describe itself? Many existing algorithms take predicates as arguments (e.g. those defined in STL's `` header), and it would be a shame if gMock matchers were not allowed to participate. Luckily, you can use a matcher where a unary predicate functor is expected by wrapping it inside the `Matches()` function. For example, ```cpp #include #include using ::testing::Matches; using ::testing::Ge; vector v; ... // How many elements in v are >= 10? const int count = count_if(v.begin(), v.end(), Matches(Ge(10))); ``` Since you can build complex matchers from simpler ones easily using gMock, this gives you a way to conveniently construct composite predicates (doing the same using STL's `` header is just painful). For example, here's a predicate that's satisfied by any number that is >= 0, <= 100, and != 50: ```cpp using testing::AllOf; using testing::Ge; using testing::Le; using testing::Matches; using testing::Ne; ... Matches(AllOf(Ge(0), Le(100), Ne(50))) ``` ### Using Matchers in googletest Assertions Since matchers are basically predicates that also know how to describe themselves, there is a way to take advantage of them in googletest assertions. It's called `ASSERT_THAT` and `EXPECT_THAT`: ```cpp ASSERT_THAT(value, matcher); // Asserts that value matches matcher. EXPECT_THAT(value, matcher); // The non-fatal version. ``` For example, in a googletest test you can write: ```cpp #include "gmock/gmock.h" using ::testing::AllOf; using ::testing::Ge; using ::testing::Le; using ::testing::MatchesRegex; using ::testing::StartsWith; ... EXPECT_THAT(Foo(), StartsWith("Hello")); EXPECT_THAT(Bar(), MatchesRegex("Line \\d+")); ASSERT_THAT(Baz(), AllOf(Ge(5), Le(10))); ``` which (as you can probably guess) executes `Foo()`, `Bar()`, and `Baz()`, and verifies that: * `Foo()` returns a string that starts with `"Hello"`. * `Bar()` returns a string that matches regular expression `"Line \\d+"`. * `Baz()` returns a number in the range [5, 10]. The nice thing about these macros is that *they read like English*. They generate informative messages too. For example, if the first `EXPECT_THAT()` above fails, the message will be something like: ```cpp Value of: Foo() Actual: "Hi, world!" Expected: starts with "Hello" ``` **Credit:** The idea of `(ASSERT|EXPECT)_THAT` was borrowed from Joe Walnes' Hamcrest project, which adds `assertThat()` to JUnit. ### Using Predicates as Matchers gMock provides a [built-in set](#MatcherList) of matchers. In case you find them lacking, you can use an arbitrary unary predicate function or functor as a matcher - as long as the predicate accepts a value of the type you want. You do this by wrapping the predicate inside the `Truly()` function, for example: ```cpp using ::testing::Truly; int IsEven(int n) { return (n % 2) == 0 ? 1 : 0; } ... // Bar() must be called with an even number. EXPECT_CALL(foo, Bar(Truly(IsEven))); ``` Note that the predicate function / functor doesn't have to return `bool`. It works as long as the return value can be used as the condition in in statement `if (condition) ...`. ### Matching Arguments that Are Not Copyable When you do an `EXPECT_CALL(mock_obj, Foo(bar))`, gMock saves away a copy of `bar`. When `Foo()` is called later, gMock compares the argument to `Foo()` with the saved copy of `bar`. This way, you don't need to worry about `bar` being modified or destroyed after the `EXPECT_CALL()` is executed. The same is true when you use matchers like `Eq(bar)`, `Le(bar)`, and so on. But what if `bar` cannot be copied (i.e. has no copy constructor)? You could define your own matcher function or callback and use it with `Truly()`, as the previous couple of recipes have shown. Or, you may be able to get away from it if you can guarantee that `bar` won't be changed after the `EXPECT_CALL()` is executed. Just tell gMock that it should save a reference to `bar`, instead of a copy of it. Here's how: ```cpp using ::testing::ByRef; using ::testing::Eq; using ::testing::Lt; ... // Expects that Foo()'s argument == bar. EXPECT_CALL(mock_obj, Foo(Eq(ByRef(bar)))); // Expects that Foo()'s argument < bar. EXPECT_CALL(mock_obj, Foo(Lt(ByRef(bar)))); ``` Remember: if you do this, don't change `bar` after the `EXPECT_CALL()`, or the result is undefined. ### Validating a Member of an Object Often a mock function takes a reference to object as an argument. When matching the argument, you may not want to compare the entire object against a fixed object, as that may be over-specification. Instead, you may need to validate a certain member variable or the result of a certain getter method of the object. You can do this with `Field()` and `Property()`. More specifically, ```cpp Field(&Foo::bar, m) ``` is a matcher that matches a `Foo` object whose `bar` member variable satisfies matcher `m`. ```cpp Property(&Foo::baz, m) ``` is a matcher that matches a `Foo` object whose `baz()` method returns a value that satisfies matcher `m`. For example: | Expression | Description | | :--------------------------- | :--------------------------------------- | | `Field(&Foo::number, Ge(3))` | Matches `x` where `x.number >= 3`. | | `Property(&Foo::name, StartsWith("John "))` | Matches `x` where `x.name()` starts with `"John "`. | Note that in `Property(&Foo::baz, ...)`, method `baz()` must take no argument and be declared as `const`. BTW, `Field()` and `Property()` can also match plain pointers to objects. For instance, ```cpp using ::testing::Field; using ::testing::Ge; ... Field(&Foo::number, Ge(3)) ``` matches a plain pointer `p` where `p->number >= 3`. If `p` is `NULL`, the match will always fail regardless of the inner matcher. What if you want to validate more than one members at the same time? Remember that there are [`AllOf()` and `AllOfArray()`](#CombiningMatchers). Finally `Field()` and `Property()` provide overloads that take the field or property names as the first argument to include it in the error message. This can be useful when creating combined matchers. ```cpp using ::testing::AllOf; using ::testing::Field; using ::testing::Matcher; using ::testing::SafeMatcherCast; Matcher IsFoo(const Foo& foo) { return AllOf(Field("some_field", &Foo::some_field, foo.some_field), Field("other_field", &Foo::other_field, foo.other_field), Field("last_field", &Foo::last_field, foo.last_field)); } ``` ### Validating the Value Pointed to by a Pointer Argument C++ functions often take pointers as arguments. You can use matchers like `IsNull()`, `NotNull()`, and other comparison matchers to match a pointer, but what if you want to make sure the value *pointed to* by the pointer, instead of the pointer itself, has a certain property? Well, you can use the `Pointee(m)` matcher. `Pointee(m)` matches a pointer if and only if `m` matches the value the pointer points to. For example: ```cpp using ::testing::Ge; using ::testing::Pointee; ... EXPECT_CALL(foo, Bar(Pointee(Ge(3)))); ``` expects `foo.Bar()` to be called with a pointer that points to a value greater than or equal to 3. One nice thing about `Pointee()` is that it treats a `NULL` pointer as a match failure, so you can write `Pointee(m)` instead of ```cpp using ::testing::AllOf; using ::testing::NotNull; using ::testing::Pointee; ... AllOf(NotNull(), Pointee(m)) ``` without worrying that a `NULL` pointer will crash your test. Also, did we tell you that `Pointee()` works with both raw pointers **and** smart pointers (`std::unique_ptr`, `std::shared_ptr`, etc)? What if you have a pointer to pointer? You guessed it - you can use nested `Pointee()` to probe deeper inside the value. For example, `Pointee(Pointee(Lt(3)))` matches a pointer that points to a pointer that points to a number less than 3 (what a mouthful...). ### Testing a Certain Property of an Object Sometimes you want to specify that an object argument has a certain property, but there is no existing matcher that does this. If you want good error messages, you should [define a matcher](#NewMatchers). If you want to do it quick and dirty, you could get away with writing an ordinary function. Let's say you have a mock function that takes an object of type `Foo`, which has an `int bar()` method and an `int baz()` method, and you want to constrain that the argument's `bar()` value plus its `baz()` value is a given number. Here's how you can define a matcher to do it: ```cpp using ::testing::Matcher; using ::testing::MatcherInterface; using ::testing::MatchResultListener; class BarPlusBazEqMatcher : public MatcherInterface { public: explicit BarPlusBazEqMatcher(int expected_sum) : expected_sum_(expected_sum) {} bool MatchAndExplain(const Foo& foo, MatchResultListener* /* listener */) const override { return (foo.bar() + foo.baz()) == expected_sum_; } void DescribeTo(::std::ostream* os) const override { *os << "bar() + baz() equals " << expected_sum_; } void DescribeNegationTo(::std::ostream* os) const override { *os << "bar() + baz() does not equal " << expected_sum_; } private: const int expected_sum_; }; Matcher BarPlusBazEq(int expected_sum) { return MakeMatcher(new BarPlusBazEqMatcher(expected_sum)); } ... EXPECT_CALL(..., DoThis(BarPlusBazEq(5)))...; ``` ### Matching Containers Sometimes an STL container (e.g. list, vector, map, ...) is passed to a mock function and you may want to validate it. Since most STL containers support the `==` operator, you can write `Eq(expected_container)` or simply `expected_container` to match a container exactly. Sometimes, though, you may want to be more flexible (for example, the first element must be an exact match, but the second element can be any positive number, and so on). Also, containers used in tests often have a small number of elements, and having to define the expected container out-of-line is a bit of a hassle. You can use the `ElementsAre()` or `UnorderedElementsAre()` matcher in such cases: ```cpp using ::testing::_; using ::testing::ElementsAre; using ::testing::Gt; ... MOCK_METHOD(void, Foo, (const vector& numbers), (override)); ... EXPECT_CALL(mock, Foo(ElementsAre(1, Gt(0), _, 5))); ``` The above matcher says that the container must have 4 elements, which must be 1, greater than 0, anything, and 5 respectively. If you instead write: ```cpp using ::testing::_; using ::testing::Gt; using ::testing::UnorderedElementsAre; ... MOCK_METHOD(void, Foo, (const vector& numbers), (override)); ... EXPECT_CALL(mock, Foo(UnorderedElementsAre(1, Gt(0), _, 5))); ``` It means that the container must have 4 elements, which (under some permutation) must be 1, greater than 0, anything, and 5 respectively. As an alternative you can place the arguments in a C-style array and use `ElementsAreArray()` or `UnorderedElementsAreArray()` instead: ```cpp using ::testing::ElementsAreArray; ... // ElementsAreArray accepts an array of element values. const int expected_vector1[] = {1, 5, 2, 4, ...}; EXPECT_CALL(mock, Foo(ElementsAreArray(expected_vector1))); // Or, an array of element matchers. Matcher expected_vector2[] = {1, Gt(2), _, 3, ...}; EXPECT_CALL(mock, Foo(ElementsAreArray(expected_vector2))); ``` In case the array needs to be dynamically created (and therefore the array size cannot be inferred by the compiler), you can give `ElementsAreArray()` an additional argument to specify the array size: ```cpp using ::testing::ElementsAreArray; ... int* const expected_vector3 = new int[count]; ... fill expected_vector3 with values ... EXPECT_CALL(mock, Foo(ElementsAreArray(expected_vector3, count))); ``` Use `Pair` when comparing maps or other associative containers. ```cpp using testing::ElementsAre; using testing::Pair; ... std::map m = {{"a", 1}, {"b", 2}, {"c", 3}}; EXPECT_THAT(m, ElementsAre(Pair("a", 1), Pair("b", 2), Pair("c", 3))); ``` **Tips:** * `ElementsAre*()` can be used to match *any* container that implements the STL iterator pattern (i.e. it has a `const_iterator` type and supports `begin()/end()`), not just the ones defined in STL. It will even work with container types yet to be written - as long as they follows the above pattern. * You can use nested `ElementsAre*()` to match nested (multi-dimensional) containers. * If the container is passed by pointer instead of by reference, just write `Pointee(ElementsAre*(...))`. * The order of elements *matters* for `ElementsAre*()`. If you are using it with containers whose element order are undefined (e.g. `hash_map`) you should use `WhenSorted` around `ElementsAre`. ### Sharing Matchers Under the hood, a gMock matcher object consists of a pointer to a ref-counted implementation object. Copying matchers is allowed and very efficient, as only the pointer is copied. When the last matcher that references the implementation object dies, the implementation object will be deleted. Therefore, if you have some complex matcher that you want to use again and again, there is no need to build it everytime. Just assign it to a matcher variable and use that variable repeatedly! For example, ```cpp using ::testing::AllOf; using ::testing::Gt; using ::testing::Le; using ::testing::Matcher; ... Matcher in_range = AllOf(Gt(5), Le(10)); ... use in_range as a matcher in multiple EXPECT_CALLs ... ``` ### Matchers must have no side-effects {#PureMatchers} WARNING: gMock does not guarantee when or how many times a matcher will be invoked. Therefore, all matchers must be *purely functional*: they cannot have any side effects, and the match result must not depend on anything other than the matcher's parameters and the value being matched. This requirement must be satisfied no matter how a matcher is defined (e.g., if it is one of the standard matchers, or a custom matcher). In particular, a matcher can never call a mock function, as that will affect the state of the mock object and gMock. ## Setting Expectations ### Knowing When to Expect {#UseOnCall} **`ON_CALL`** is likely the *single most under-utilized construct* in gMock. There are basically two constructs for defining the behavior of a mock object: `ON_CALL` and `EXPECT_CALL`. The difference? `ON_CALL` defines what happens when a mock method is called, but doesn't imply any expectation on the method being called. `EXPECT_CALL` not only defines the behavior, but also sets an expectation that the method will be called with the given arguments, for the given number of times (and *in the given order* when you specify the order too). Since `EXPECT_CALL` does more, isn't it better than `ON_CALL`? Not really. Every `EXPECT_CALL` adds a constraint on the behavior of the code under test. Having more constraints than necessary is *baaad* - even worse than not having enough constraints. This may be counter-intuitive. How could tests that verify more be worse than tests that verify less? Isn't verification the whole point of tests? The answer lies in *what* a test should verify. **A good test verifies the contract of the code.** If a test over-specifies, it doesn't leave enough freedom to the implementation. As a result, changing the implementation without breaking the contract (e.g. refactoring and optimization), which should be perfectly fine to do, can break such tests. Then you have to spend time fixing them, only to see them broken again the next time the implementation is changed. Keep in mind that one doesn't have to verify more than one property in one test. In fact, **it's a good style to verify only one thing in one test.** If you do that, a bug will likely break only one or two tests instead of dozens (which case would you rather debug?). If you are also in the habit of giving tests descriptive names that tell what they verify, you can often easily guess what's wrong just from the test log itself. So use `ON_CALL` by default, and only use `EXPECT_CALL` when you actually intend to verify that the call is made. For example, you may have a bunch of `ON_CALL`s in your test fixture to set the common mock behavior shared by all tests in the same group, and write (scarcely) different `EXPECT_CALL`s in different `TEST_F`s to verify different aspects of the code's behavior. Compared with the style where each `TEST` has many `EXPECT_CALL`s, this leads to tests that are more resilient to implementational changes (and thus less likely to require maintenance) and makes the intent of the tests more obvious (so they are easier to maintain when you do need to maintain them). If you are bothered by the "Uninteresting mock function call" message printed when a mock method without an `EXPECT_CALL` is called, you may use a `NiceMock` instead to suppress all such messages for the mock object, or suppress the message for specific methods by adding `EXPECT_CALL(...).Times(AnyNumber())`. DO NOT suppress it by blindly adding an `EXPECT_CALL(...)`, or you'll have a test that's a pain to maintain. ### Ignoring Uninteresting Calls If you are not interested in how a mock method is called, just don't say anything about it. In this case, if the method is ever called, gMock will perform its default action to allow the test program to continue. If you are not happy with the default action taken by gMock, you can override it using `DefaultValue::Set()` (described [here](#DefaultValue)) or `ON_CALL()`. Please note that once you expressed interest in a particular mock method (via `EXPECT_CALL()`), all invocations to it must match some expectation. If this function is called but the arguments don't match any `EXPECT_CALL()` statement, it will be an error. ### Disallowing Unexpected Calls If a mock method shouldn't be called at all, explicitly say so: ```cpp using ::testing::_; ... EXPECT_CALL(foo, Bar(_)) .Times(0); ``` If some calls to the method are allowed, but the rest are not, just list all the expected calls: ```cpp using ::testing::AnyNumber; using ::testing::Gt; ... EXPECT_CALL(foo, Bar(5)); EXPECT_CALL(foo, Bar(Gt(10))) .Times(AnyNumber()); ``` A call to `foo.Bar()` that doesn't match any of the `EXPECT_CALL()` statements will be an error. ### Understanding Uninteresting vs Unexpected Calls {#uninteresting-vs-unexpected} *Uninteresting* calls and *unexpected* calls are different concepts in gMock. *Very* different. A call `x.Y(...)` is **uninteresting** if there's *not even a single* `EXPECT_CALL(x, Y(...))` set. In other words, the test isn't interested in the `x.Y()` method at all, as evident in that the test doesn't care to say anything about it. A call `x.Y(...)` is **unexpected** if there are *some* `EXPECT_CALL(x, Y(...))`s set, but none of them matches the call. Put another way, the test is interested in the `x.Y()` method (therefore it explicitly sets some `EXPECT_CALL` to verify how it's called); however, the verification fails as the test doesn't expect this particular call to happen. **An unexpected call is always an error,** as the code under test doesn't behave the way the test expects it to behave. **By default, an uninteresting call is not an error,** as it violates no constraint specified by the test. (gMock's philosophy is that saying nothing means there is no constraint.) However, it leads to a warning, as it *might* indicate a problem (e.g. the test author might have forgotten to specify a constraint). In gMock, `NiceMock` and `StrictMock` can be used to make a mock class "nice" or "strict". How does this affect uninteresting calls and unexpected calls? A **nice mock** suppresses uninteresting call *warnings*. It is less chatty than the default mock, but otherwise is the same. If a test fails with a default mock, it will also fail using a nice mock instead. And vice versa. Don't expect making a mock nice to change the test's result. A **strict mock** turns uninteresting call warnings into errors. So making a mock strict may change the test's result. Let's look at an example: ```cpp TEST(...) { NiceMock mock_registry; EXPECT_CALL(mock_registry, GetDomainOwner("google.com")) .WillRepeatedly(Return("Larry Page")); // Use mock_registry in code under test. ... &mock_registry ... } ``` The sole `EXPECT_CALL` here says that all calls to `GetDomainOwner()` must have `"google.com"` as the argument. If `GetDomainOwner("yahoo.com")` is called, it will be an unexpected call, and thus an error. *Having a nice mock doesn't change the severity of an unexpected call.* So how do we tell gMock that `GetDomainOwner()` can be called with some other arguments as well? The standard technique is to add a "catch all" `EXPECT_CALL`: ```cpp EXPECT_CALL(mock_registry, GetDomainOwner(_)) .Times(AnyNumber()); // catches all other calls to this method. EXPECT_CALL(mock_registry, GetDomainOwner("google.com")) .WillRepeatedly(Return("Larry Page")); ``` Remember that `_` is the wildcard matcher that matches anything. With this, if `GetDomainOwner("google.com")` is called, it will do what the second `EXPECT_CALL` says; if it is called with a different argument, it will do what the first `EXPECT_CALL` says. Note that the order of the two `EXPECT_CALL`s is important, as a newer `EXPECT_CALL` takes precedence over an older one. For more on uninteresting calls, nice mocks, and strict mocks, read ["The Nice, the Strict, and the Naggy"](#NiceStrictNaggy). ### Ignoring Uninteresting Arguments {#ParameterlessExpectations} If your test doesn't care about the parameters (it only cares about the number or order of calls), you can often simply omit the parameter list: ```cpp // Expect foo.Bar( ... ) twice with any arguments. EXPECT_CALL(foo, Bar).Times(2); // Delegate to the given method whenever the factory is invoked. ON_CALL(foo_factory, MakeFoo) .WillByDefault(&BuildFooForTest); ``` This functionality is only available when a method is not overloaded; to prevent unexpected behavior it is a compilation error to try to set an expectation on a method where the specific overload is ambiguous. You can work around this by supplying a [simpler mock interface](#SimplerInterfaces) than the mocked class provides. This pattern is also useful when the arguments are interesting, but match logic is substantially complex. You can leave the argument list unspecified and use SaveArg actions to [save the values for later verification](#SaveArgVerify). If you do that, you can easily differentiate calling the method the wrong number of times from calling it with the wrong arguments. ### Expecting Ordered Calls {#OrderedCalls} Although an `EXPECT_CALL()` statement defined earlier takes precedence when gMock tries to match a function call with an expectation, by default calls don't have to happen in the order `EXPECT_CALL()` statements are written. For example, if the arguments match the matchers in the third `EXPECT_CALL()`, but not those in the first two, then the third expectation will be used. If you would rather have all calls occur in the order of the expectations, put the `EXPECT_CALL()` statements in a block where you define a variable of type `InSequence`: ```cpp using ::testing::_; using ::testing::InSequence; { InSequence s; EXPECT_CALL(foo, DoThis(5)); EXPECT_CALL(bar, DoThat(_)) .Times(2); EXPECT_CALL(foo, DoThis(6)); } ``` In this example, we expect a call to `foo.DoThis(5)`, followed by two calls to `bar.DoThat()` where the argument can be anything, which are in turn followed by a call to `foo.DoThis(6)`. If a call occurred out-of-order, gMock will report an error. ### Expecting Partially Ordered Calls {#PartialOrder} Sometimes requiring everything to occur in a predetermined order can lead to brittle tests. For example, we may care about `A` occurring before both `B` and `C`, but aren't interested in the relative order of `B` and `C`. In this case, the test should reflect our real intent, instead of being overly constraining. gMock allows you to impose an arbitrary DAG (directed acyclic graph) on the calls. One way to express the DAG is to use the [After](#AfterClause) clause of `EXPECT_CALL`. Another way is via the `InSequence()` clause (not the same as the `InSequence` class), which we borrowed from jMock 2. It's less flexible than `After()`, but more convenient when you have long chains of sequential calls, as it doesn't require you to come up with different names for the expectations in the chains. Here's how it works: If we view `EXPECT_CALL()` statements as nodes in a graph, and add an edge from node A to node B wherever A must occur before B, we can get a DAG. We use the term "sequence" to mean a directed path in this DAG. Now, if we decompose the DAG into sequences, we just need to know which sequences each `EXPECT_CALL()` belongs to in order to be able to reconstruct the original DAG. So, to specify the partial order on the expectations we need to do two things: first to define some `Sequence` objects, and then for each `EXPECT_CALL()` say which `Sequence` objects it is part of. Expectations in the same sequence must occur in the order they are written. For example, ```cpp using ::testing::Sequence; ... Sequence s1, s2; EXPECT_CALL(foo, A()) .InSequence(s1, s2); EXPECT_CALL(bar, B()) .InSequence(s1); EXPECT_CALL(bar, C()) .InSequence(s2); EXPECT_CALL(foo, D()) .InSequence(s2); ``` specifies the following DAG (where `s1` is `A -> B`, and `s2` is `A -> C -> D`): ```text +---> B | A ---| | +---> C ---> D ``` This means that A must occur before B and C, and C must occur before D. There's no restriction about the order other than these. ### Controlling When an Expectation Retires When a mock method is called, gMock only considers expectations that are still active. An expectation is active when created, and becomes inactive (aka *retires*) when a call that has to occur later has occurred. For example, in ```cpp using ::testing::_; using ::testing::Sequence; ... Sequence s1, s2; EXPECT_CALL(log, Log(WARNING, _, "File too large.")) // #1 .Times(AnyNumber()) .InSequence(s1, s2); EXPECT_CALL(log, Log(WARNING, _, "Data set is empty.")) // #2 .InSequence(s1); EXPECT_CALL(log, Log(WARNING, _, "User not found.")) // #3 .InSequence(s2); ``` as soon as either #2 or #3 is matched, #1 will retire. If a warning `"File too large."` is logged after this, it will be an error. Note that an expectation doesn't retire automatically when it's saturated. For example, ```cpp using ::testing::_; ... EXPECT_CALL(log, Log(WARNING, _, _)); // #1 EXPECT_CALL(log, Log(WARNING, _, "File too large.")); // #2 ``` says that there will be exactly one warning with the message `"File too large."`. If the second warning contains this message too, #2 will match again and result in an upper-bound-violated error. If this is not what you want, you can ask an expectation to retire as soon as it becomes saturated: ```cpp using ::testing::_; ... EXPECT_CALL(log, Log(WARNING, _, _)); // #1 EXPECT_CALL(log, Log(WARNING, _, "File too large.")) // #2 .RetiresOnSaturation(); ``` Here #2 can be used only once, so if you have two warnings with the message `"File too large."`, the first will match #2 and the second will match #1 - there will be no error. ## Using Actions ### Returning References from Mock Methods If a mock function's return type is a reference, you need to use `ReturnRef()` instead of `Return()` to return a result: ```cpp using ::testing::ReturnRef; class MockFoo : public Foo { public: MOCK_METHOD(Bar&, GetBar, (), (override)); }; ... MockFoo foo; Bar bar; EXPECT_CALL(foo, GetBar()) .WillOnce(ReturnRef(bar)); ... ``` ### Returning Live Values from Mock Methods The `Return(x)` action saves a copy of `x` when the action is created, and always returns the same value whenever it's executed. Sometimes you may want to instead return the *live* value of `x` (i.e. its value at the time when the action is *executed*.). Use either `ReturnRef()` or `ReturnPointee()` for this purpose. If the mock function's return type is a reference, you can do it using `ReturnRef(x)`, as shown in the previous recipe ("Returning References from Mock Methods"). However, gMock doesn't let you use `ReturnRef()` in a mock function whose return type is not a reference, as doing that usually indicates a user error. So, what shall you do? Though you may be tempted, DO NOT use `ByRef()`: ```cpp using testing::ByRef; using testing::Return; class MockFoo : public Foo { public: MOCK_METHOD(int, GetValue, (), (override)); }; ... int x = 0; MockFoo foo; EXPECT_CALL(foo, GetValue()) .WillRepeatedly(Return(ByRef(x))); // Wrong! x = 42; EXPECT_EQ(42, foo.GetValue()); ``` Unfortunately, it doesn't work here. The above code will fail with error: ```text Value of: foo.GetValue() Actual: 0 Expected: 42 ``` The reason is that `Return(*value*)` converts `value` to the actual return type of the mock function at the time when the action is *created*, not when it is *executed*. (This behavior was chosen for the action to be safe when `value` is a proxy object that references some temporary objects.) As a result, `ByRef(x)` is converted to an `int` value (instead of a `const int&`) when the expectation is set, and `Return(ByRef(x))` will always return 0. `ReturnPointee(pointer)` was provided to solve this problem specifically. It returns the value pointed to by `pointer` at the time the action is *executed*: ```cpp using testing::ReturnPointee; ... int x = 0; MockFoo foo; EXPECT_CALL(foo, GetValue()) .WillRepeatedly(ReturnPointee(&x)); // Note the & here. x = 42; EXPECT_EQ(42, foo.GetValue()); // This will succeed now. ``` ### Combining Actions Want to do more than one thing when a function is called? That's fine. `DoAll()` allow you to do sequence of actions every time. Only the return value of the last action in the sequence will be used. ```cpp using ::testing::_; using ::testing::DoAll; class MockFoo : public Foo { public: MOCK_METHOD(bool, Bar, (int n), (override)); }; ... EXPECT_CALL(foo, Bar(_)) .WillOnce(DoAll(action_1, action_2, ... action_n)); ``` ### Verifying Complex Arguments {#SaveArgVerify} If you want to verify that a method is called with a particular argument but the match criteria is complex, it can be difficult to distinguish between cardinality failures (calling the method the wrong number of times) and argument match failures. Similarly, if you are matching multiple parameters, it may not be easy to distinguishing which argument failed to match. For example: ```cpp // Not ideal: this could fail because of a problem with arg1 or arg2, or maybe // just the method wasn't called. EXPECT_CALL(foo, SendValues(_, ElementsAre(1, 4, 4, 7), EqualsProto( ... ))); ``` You can instead save the arguments and test them individually: ```cpp EXPECT_CALL(foo, SendValues) .WillOnce(DoAll(SaveArg<1>(&actual_array), SaveArg<2>(&actual_proto))); ... run the test EXPECT_THAT(actual_array, ElementsAre(1, 4, 4, 7)); EXPECT_THAT(actual_proto, EqualsProto( ... )); ``` ### Mocking Side Effects {#MockingSideEffects} Sometimes a method exhibits its effect not via returning a value but via side effects. For example, it may change some global state or modify an output argument. To mock side effects, in general you can define your own action by implementing `::testing::ActionInterface`. If all you need to do is to change an output argument, the built-in `SetArgPointee()` action is convenient: ```cpp using ::testing::_; using ::testing::SetArgPointee; class MockMutator : public Mutator { public: MOCK_METHOD(void, Mutate, (bool mutate, int* value), (override)); ... } ... MockMutator mutator; EXPECT_CALL(mutator, Mutate(true, _)) .WillOnce(SetArgPointee<1>(5)); ``` In this example, when `mutator.Mutate()` is called, we will assign 5 to the `int` variable pointed to by argument #1 (0-based). `SetArgPointee()` conveniently makes an internal copy of the value you pass to it, removing the need to keep the value in scope and alive. The implication however is that the value must have a copy constructor and assignment operator. If the mock method also needs to return a value as well, you can chain `SetArgPointee()` with `Return()` using `DoAll()`, remembering to put the `Return()` statement last: ```cpp using ::testing::_; using ::testing::Return; using ::testing::SetArgPointee; class MockMutator : public Mutator { public: ... MOCK_METHOD(bool, MutateInt, (int* value), (override)); } ... MockMutator mutator; EXPECT_CALL(mutator, MutateInt(_)) .WillOnce(DoAll(SetArgPointee<0>(5), Return(true))); ``` Note, however, that if you use the `ReturnOKWith()` method, it will override the values provided by `SetArgPointee()` in the response parameters of your function call. If the output argument is an array, use the `SetArrayArgument(first, last)` action instead. It copies the elements in source range `[first, last)` to the array pointed to by the `N`-th (0-based) argument: ```cpp using ::testing::NotNull; using ::testing::SetArrayArgument; class MockArrayMutator : public ArrayMutator { public: MOCK_METHOD(void, Mutate, (int* values, int num_values), (override)); ... } ... MockArrayMutator mutator; int values[5] = {1, 2, 3, 4, 5}; EXPECT_CALL(mutator, Mutate(NotNull(), 5)) .WillOnce(SetArrayArgument<0>(values, values + 5)); ``` This also works when the argument is an output iterator: ```cpp using ::testing::_; using ::testing::SetArrayArgument; class MockRolodex : public Rolodex { public: MOCK_METHOD(void, GetNames, (std::back_insert_iterator>), (override)); ... } ... MockRolodex rolodex; vector names; names.push_back("George"); names.push_back("John"); names.push_back("Thomas"); EXPECT_CALL(rolodex, GetNames(_)) .WillOnce(SetArrayArgument<0>(names.begin(), names.end())); ``` ### Changing a Mock Object's Behavior Based on the State If you expect a call to change the behavior of a mock object, you can use `::testing::InSequence` to specify different behaviors before and after the call: ```cpp using ::testing::InSequence; using ::testing::Return; ... { InSequence seq; EXPECT_CALL(my_mock, IsDirty()) .WillRepeatedly(Return(true)); EXPECT_CALL(my_mock, Flush()); EXPECT_CALL(my_mock, IsDirty()) .WillRepeatedly(Return(false)); } my_mock.FlushIfDirty(); ``` This makes `my_mock.IsDirty()` return `true` before `my_mock.Flush()` is called and return `false` afterwards. If the behavior change is more complex, you can store the effects in a variable and make a mock method get its return value from that variable: ```cpp using ::testing::_; using ::testing::SaveArg; using ::testing::Return; ACTION_P(ReturnPointee, p) { return *p; } ... int previous_value = 0; EXPECT_CALL(my_mock, GetPrevValue) .WillRepeatedly(ReturnPointee(&previous_value)); EXPECT_CALL(my_mock, UpdateValue) .WillRepeatedly(SaveArg<0>(&previous_value)); my_mock.DoSomethingToUpdateValue(); ``` Here `my_mock.GetPrevValue()` will always return the argument of the last `UpdateValue()` call. ### Setting the Default Value for a Return Type {#DefaultValue} If a mock method's return type is a built-in C++ type or pointer, by default it will return 0 when invoked. Also, in C++ 11 and above, a mock method whose return type has a default constructor will return a default-constructed value by default. You only need to specify an action if this default value doesn't work for you. Sometimes, you may want to change this default value, or you may want to specify a default value for types gMock doesn't know about. You can do this using the `::testing::DefaultValue` class template: ```cpp using ::testing::DefaultValue; class MockFoo : public Foo { public: MOCK_METHOD(Bar, CalculateBar, (), (override)); }; ... Bar default_bar; // Sets the default return value for type Bar. DefaultValue::Set(default_bar); MockFoo foo; // We don't need to specify an action here, as the default // return value works for us. EXPECT_CALL(foo, CalculateBar()); foo.CalculateBar(); // This should return default_bar. // Unsets the default return value. DefaultValue::Clear(); ``` Please note that changing the default value for a type can make you tests hard to understand. We recommend you to use this feature judiciously. For example, you may want to make sure the `Set()` and `Clear()` calls are right next to the code that uses your mock. ### Setting the Default Actions for a Mock Method You've learned how to change the default value of a given type. However, this may be too coarse for your purpose: perhaps you have two mock methods with the same return type and you want them to have different behaviors. The `ON_CALL()` macro allows you to customize your mock's behavior at the method level: ```cpp using ::testing::_; using ::testing::AnyNumber; using ::testing::Gt; using ::testing::Return; ... ON_CALL(foo, Sign(_)) .WillByDefault(Return(-1)); ON_CALL(foo, Sign(0)) .WillByDefault(Return(0)); ON_CALL(foo, Sign(Gt(0))) .WillByDefault(Return(1)); EXPECT_CALL(foo, Sign(_)) .Times(AnyNumber()); foo.Sign(5); // This should return 1. foo.Sign(-9); // This should return -1. foo.Sign(0); // This should return 0. ``` As you may have guessed, when there are more than one `ON_CALL()` statements, the newer ones in the order take precedence over the older ones. In other words, the **last** one that matches the function arguments will be used. This matching order allows you to set up the common behavior in a mock object's constructor or the test fixture's set-up phase and specialize the mock's behavior later. Note that both `ON_CALL` and `EXPECT_CALL` have the same "later statements take precedence" rule, but they don't interact. That is, `EXPECT_CALL`s have their own precedence order distinct from the `ON_CALL` precedence order. ### Using Functions/Methods/Functors/Lambdas as Actions {#FunctionsAsActions} If the built-in actions don't suit you, you can use an existing callable (function, `std::function`, method, functor, lambda as an action. ```cpp using ::testing::_; using ::testing::Invoke; class MockFoo : public Foo { public: MOCK_METHOD(int, Sum, (int x, int y), (override)); MOCK_METHOD(bool, ComplexJob, (int x), (override)); }; int CalculateSum(int x, int y) { return x + y; } int Sum3(int x, int y, int z) { return x + y + z; } class Helper { public: bool ComplexJob(int x); }; ... MockFoo foo; Helper helper; EXPECT_CALL(foo, Sum(_, _)) .WillOnce(&CalculateSum) .WillRepeatedly(Invoke(NewPermanentCallback(Sum3, 1))); EXPECT_CALL(foo, ComplexJob(_)) .WillOnce(Invoke(&helper, &Helper::ComplexJob)); .WillRepeatedly([](int x) { return x > 0; }); foo.Sum(5, 6); // Invokes CalculateSum(5, 6). foo.Sum(2, 3); // Invokes Sum3(1, 2, 3). foo.ComplexJob(10); // Invokes helper.ComplexJob(10). foo.ComplexJob(-1); // Invokes the inline lambda. ``` The only requirement is that the type of the function, etc must be *compatible* with the signature of the mock function, meaning that the latter's arguments can be implicitly converted to the corresponding arguments of the former, and the former's return type can be implicitly converted to that of the latter. So, you can invoke something whose type is *not* exactly the same as the mock function, as long as it's safe to do so - nice, huh? **`Note:`{.escaped}** * The action takes ownership of the callback and will delete it when the action itself is destructed. * If the type of a callback is derived from a base callback type `C`, you need to implicitly cast it to `C` to resolve the overloading, e.g. ```cpp using ::testing::Invoke; ... ResultCallback* is_ok = ...; ... Invoke(is_ok) ...; // This works. BlockingClosure* done = new BlockingClosure; ... Invoke(implicit_cast(done)) ...; // The cast is necessary. ``` ### Using Functions with Extra Info as Actions The function or functor you call using `Invoke()` must have the same number of arguments as the mock function you use it for. Sometimes you may have a function that takes more arguments, and you are willing to pass in the extra arguments yourself to fill the gap. You can do this in gMock using callbacks with pre-bound arguments. Here's an example: ```cpp using ::testing::Invoke; class MockFoo : public Foo { public: MOCK_METHOD(char, DoThis, (int n), (override)); }; char SignOfSum(int x, int y) { const int sum = x + y; return (sum > 0) ? '+' : (sum < 0) ? '-' : '0'; } TEST_F(FooTest, Test) { MockFoo foo; EXPECT_CALL(foo, DoThis(2)) .WillOnce(Invoke(NewPermanentCallback(SignOfSum, 5))); EXPECT_EQ('+', foo.DoThis(2)); // Invokes SignOfSum(5, 2). } ``` ### Invoking a Function/Method/Functor/Lambda/Callback Without Arguments `Invoke()` is very useful for doing actions that are more complex. It passes the mock function's arguments to the function, etc being invoked such that the callee has the full context of the call to work with. If the invoked function is not interested in some or all of the arguments, it can simply ignore them. Yet, a common pattern is that a test author wants to invoke a function without the arguments of the mock function. `Invoke()` allows her to do that using a wrapper function that throws away the arguments before invoking an underlining nullary function. Needless to say, this can be tedious and obscures the intent of the test. `InvokeWithoutArgs()` solves this problem. It's like `Invoke()` except that it doesn't pass the mock function's arguments to the callee. Here's an example: ```cpp using ::testing::_; using ::testing::InvokeWithoutArgs; class MockFoo : public Foo { public: MOCK_METHOD(bool, ComplexJob, (int n), (override)); }; bool Job1() { ... } bool Job2(int n, char c) { ... } ... MockFoo foo; EXPECT_CALL(foo, ComplexJob(_)) .WillOnce(InvokeWithoutArgs(Job1)) .WillOnce(InvokeWithoutArgs(NewPermanentCallback(Job2, 5, 'a'))); foo.ComplexJob(10); // Invokes Job1(). foo.ComplexJob(20); // Invokes Job2(5, 'a'). ``` **`Note:`{.escaped}** * The action takes ownership of the callback and will delete it when the action itself is destructed. * If the type of a callback is derived from a base callback type `C`, you need to implicitly cast it to `C` to resolve the overloading, e.g. ```cpp using ::testing::InvokeWithoutArgs; ... ResultCallback* is_ok = ...; ... InvokeWithoutArgs(is_ok) ...; // This works. BlockingClosure* done = ...; ... InvokeWithoutArgs(implicit_cast(done)) ...; // The cast is necessary. ``` ### Invoking an Argument of the Mock Function Sometimes a mock function will receive a function pointer, a functor (in other words, a "callable") as an argument, e.g. ```cpp class MockFoo : public Foo { public: MOCK_METHOD(bool, DoThis, (int n, (ResultCallback1* callback)), (override)); }; ``` and you may want to invoke this callable argument: ```cpp using ::testing::_; ... MockFoo foo; EXPECT_CALL(foo, DoThis(_, _)) .WillOnce(...); // Will execute callback->Run(5), where callback is the // second argument DoThis() receives. ``` NOTE: The section below is legacy documentation from before C++ had lambdas: Arghh, you need to refer to a mock function argument but C++ has no lambda (yet), so you have to define your own action. :-( Or do you really? Well, gMock has an action to solve *exactly* this problem: ```cpp InvokeArgument(arg_1, arg_2, ..., arg_m) ``` will invoke the `N`-th (0-based) argument the mock function receives, with `arg_1`, `arg_2`, ..., and `arg_m`. No matter if the argument is a function pointer, a functor, or a callback. gMock handles them all. With that, you could write: ```cpp using ::testing::_; using ::testing::InvokeArgument; ... EXPECT_CALL(foo, DoThis(_, _)) .WillOnce(InvokeArgument<1>(5)); // Will execute callback->Run(5), where callback is the // second argument DoThis() receives. ``` What if the callable takes an argument by reference? No problem - just wrap it inside `ByRef()`: ```cpp ... MOCK_METHOD(bool, Bar, ((ResultCallback2* callback)), (override)); ... using ::testing::_; using ::testing::ByRef; using ::testing::InvokeArgument; ... MockFoo foo; Helper helper; ... EXPECT_CALL(foo, Bar(_)) .WillOnce(InvokeArgument<0>(5, ByRef(helper))); // ByRef(helper) guarantees that a reference to helper, not a copy of it, // will be passed to the callback. ``` What if the callable takes an argument by reference and we do **not** wrap the argument in `ByRef()`? Then `InvokeArgument()` will *make a copy* of the argument, and pass a *reference to the copy*, instead of a reference to the original value, to the callable. This is especially handy when the argument is a temporary value: ```cpp ... MOCK_METHOD(bool, DoThat, (bool (*f)(const double& x, const string& s)), (override)); ... using ::testing::_; using ::testing::InvokeArgument; ... MockFoo foo; ... EXPECT_CALL(foo, DoThat(_)) .WillOnce(InvokeArgument<0>(5.0, string("Hi"))); // Will execute (*f)(5.0, string("Hi")), where f is the function pointer // DoThat() receives. Note that the values 5.0 and string("Hi") are // temporary and dead once the EXPECT_CALL() statement finishes. Yet // it's fine to perform this action later, since a copy of the values // are kept inside the InvokeArgument action. ``` ### Ignoring an Action's Result Sometimes you have an action that returns *something*, but you need an action that returns `void` (perhaps you want to use it in a mock function that returns `void`, or perhaps it needs to be used in `DoAll()` and it's not the last in the list). `IgnoreResult()` lets you do that. For example: ```cpp using ::testing::_; using ::testing::DoAll; using ::testing::IgnoreResult; using ::testing::Return; int Process(const MyData& data); string DoSomething(); class MockFoo : public Foo { public: MOCK_METHOD(void, Abc, (const MyData& data), (override)); MOCK_METHOD(bool, Xyz, (), (override)); }; ... MockFoo foo; EXPECT_CALL(foo, Abc(_)) // .WillOnce(Invoke(Process)); // The above line won't compile as Process() returns int but Abc() needs // to return void. .WillOnce(IgnoreResult(Process)); EXPECT_CALL(foo, Xyz()) .WillOnce(DoAll(IgnoreResult(DoSomething), // Ignores the string DoSomething() returns. Return(true))); ``` Note that you **cannot** use `IgnoreResult()` on an action that already returns `void`. Doing so will lead to ugly compiler errors. ### Selecting an Action's Arguments {#SelectingArgs} Say you have a mock function `Foo()` that takes seven arguments, and you have a custom action that you want to invoke when `Foo()` is called. Trouble is, the custom action only wants three arguments: ```cpp using ::testing::_; using ::testing::Invoke; ... MOCK_METHOD(bool, Foo, (bool visible, const string& name, int x, int y, (const map>), double& weight, double min_weight, double max_wight)); ... bool IsVisibleInQuadrant1(bool visible, int x, int y) { return visible && x >= 0 && y >= 0; } ... EXPECT_CALL(mock, Foo) .WillOnce(Invoke(IsVisibleInQuadrant1)); // Uh, won't compile. :-( ``` To please the compiler God, you need to define an "adaptor" that has the same signature as `Foo()` and calls the custom action with the right arguments: ```cpp using ::testing::_; using ::testing::Invoke; ... bool MyIsVisibleInQuadrant1(bool visible, const string& name, int x, int y, const map, double>& weight, double min_weight, double max_wight) { return IsVisibleInQuadrant1(visible, x, y); } ... EXPECT_CALL(mock, Foo) .WillOnce(Invoke(MyIsVisibleInQuadrant1)); // Now it works. ``` But isn't this awkward? gMock provides a generic *action adaptor*, so you can spend your time minding more important business than writing your own adaptors. Here's the syntax: ```cpp WithArgs(action) ``` creates an action that passes the arguments of the mock function at the given indices (0-based) to the inner `action` and performs it. Using `WithArgs`, our original example can be written as: ```cpp using ::testing::_; using ::testing::Invoke; using ::testing::WithArgs; ... EXPECT_CALL(mock, Foo) .WillOnce(WithArgs<0, 2, 3>(Invoke(IsVisibleInQuadrant1))); // No need to define your own adaptor. ``` For better readability, gMock also gives you: * `WithoutArgs(action)` when the inner `action` takes *no* argument, and * `WithArg(action)` (no `s` after `Arg`) when the inner `action` takes *one* argument. As you may have realized, `InvokeWithoutArgs(...)` is just syntactic sugar for `WithoutArgs(Invoke(...))`. Here are more tips: * The inner action used in `WithArgs` and friends does not have to be `Invoke()` -- it can be anything. * You can repeat an argument in the argument list if necessary, e.g. `WithArgs<2, 3, 3, 5>(...)`. * You can change the order of the arguments, e.g. `WithArgs<3, 2, 1>(...)`. * The types of the selected arguments do *not* have to match the signature of the inner action exactly. It works as long as they can be implicitly converted to the corresponding arguments of the inner action. For example, if the 4-th argument of the mock function is an `int` and `my_action` takes a `double`, `WithArg<4>(my_action)` will work. ### Ignoring Arguments in Action Functions The [selecting-an-action's-arguments](#SelectingArgs) recipe showed us one way to make a mock function and an action with incompatible argument lists fit together. The downside is that wrapping the action in `WithArgs<...>()` can get tedious for people writing the tests. If you are defining a function (or method, functor, lambda, callback) to be used with `Invoke*()`, and you are not interested in some of its arguments, an alternative to `WithArgs` is to declare the uninteresting arguments as `Unused`. This makes the definition less cluttered and less fragile in case the types of the uninteresting arguments change. It could also increase the chance the action function can be reused. For example, given ```cpp public: MOCK_METHOD(double, Foo, double(const string& label, double x, double y), (override)); MOCK_METHOD(double, Bar, (int index, double x, double y), (override)); ``` instead of ```cpp using ::testing::_; using ::testing::Invoke; double DistanceToOriginWithLabel(const string& label, double x, double y) { return sqrt(x*x + y*y); } double DistanceToOriginWithIndex(int index, double x, double y) { return sqrt(x*x + y*y); } ... EXPECT_CALL(mock, Foo("abc", _, _)) .WillOnce(Invoke(DistanceToOriginWithLabel)); EXPECT_CALL(mock, Bar(5, _, _)) .WillOnce(Invoke(DistanceToOriginWithIndex)); ``` you could write ```cpp using ::testing::_; using ::testing::Invoke; using ::testing::Unused; double DistanceToOrigin(Unused, double x, double y) { return sqrt(x*x + y*y); } ... EXPECT_CALL(mock, Foo("abc", _, _)) .WillOnce(Invoke(DistanceToOrigin)); EXPECT_CALL(mock, Bar(5, _, _)) .WillOnce(Invoke(DistanceToOrigin)); ``` ### Sharing Actions Just like matchers, a gMock action object consists of a pointer to a ref-counted implementation object. Therefore copying actions is also allowed and very efficient. When the last action that references the implementation object dies, the implementation object will be deleted. If you have some complex action that you want to use again and again, you may not have to build it from scratch everytime. If the action doesn't have an internal state (i.e. if it always does the same thing no matter how many times it has been called), you can assign it to an action variable and use that variable repeatedly. For example: ```cpp using ::testing::Action; using ::testing::DoAll; using ::testing::Return; using ::testing::SetArgPointee; ... Action set_flag = DoAll(SetArgPointee<0>(5), Return(true)); ... use set_flag in .WillOnce() and .WillRepeatedly() ... ``` However, if the action has its own state, you may be surprised if you share the action object. Suppose you have an action factory `IncrementCounter(init)` which creates an action that increments and returns a counter whose initial value is `init`, using two actions created from the same expression and using a shared action will exhibit different behaviors. Example: ```cpp EXPECT_CALL(foo, DoThis()) .WillRepeatedly(IncrementCounter(0)); EXPECT_CALL(foo, DoThat()) .WillRepeatedly(IncrementCounter(0)); foo.DoThis(); // Returns 1. foo.DoThis(); // Returns 2. foo.DoThat(); // Returns 1 - Blah() uses a different // counter than Bar()'s. ``` versus ```cpp using ::testing::Action; ... Action increment = IncrementCounter(0); EXPECT_CALL(foo, DoThis()) .WillRepeatedly(increment); EXPECT_CALL(foo, DoThat()) .WillRepeatedly(increment); foo.DoThis(); // Returns 1. foo.DoThis(); // Returns 2. foo.DoThat(); // Returns 3 - the counter is shared. ``` ### Testing Asynchronous Behavior One oft-encountered problem with gMock is that it can be hard to test asynchronous behavior. Suppose you had a `EventQueue` class that you wanted to test, and you created a separate `EventDispatcher` interface so that you could easily mock it out. However, the implementation of the class fired all the events on a background thread, which made test timings difficult. You could just insert `sleep()` statements and hope for the best, but that makes your test behavior nondeterministic. A better way is to use gMock actions and `Notification` objects to force your asynchronous test to behave synchronously. ```cpp using ::testing::DoAll; using ::testing::InvokeWithoutArgs; using ::testing::Return; class MockEventDispatcher : public EventDispatcher { MOCK_METHOD(bool, DispatchEvent, (int32), (override)); }; ACTION_P(Notify, notification) { notification->Notify(); } TEST(EventQueueTest, EnqueueEventTest) { MockEventDispatcher mock_event_dispatcher; EventQueue event_queue(&mock_event_dispatcher); const int32 kEventId = 321; Notification done; EXPECT_CALL(mock_event_dispatcher, DispatchEvent(kEventId)) .WillOnce(Notify(&done)); event_queue.EnqueueEvent(kEventId); done.WaitForNotification(); } ``` In the example above, we set our normal gMock expectations, but then add an additional action to notify the `Notification` object. Now we can just call `Notification::WaitForNotification()` in the main thread to wait for the asynchronous call to finish. After that, our test suite is complete and we can safely exit. Note: this example has a downside: namely, if the expectation is not satisfied, our test will run forever. It will eventually time-out and fail, but it will take longer and be slightly harder to debug. To alleviate this problem, you can use `WaitForNotificationWithTimeout(ms)` instead of `WaitForNotification()`. ## Misc Recipes on Using gMock ### Mocking Methods That Use Move-Only Types C++11 introduced *move-only types*. A move-only-typed value can be moved from one object to another, but cannot be copied. `std::unique_ptr` is probably the most commonly used move-only type. Mocking a method that takes and/or returns move-only types presents some challenges, but nothing insurmountable. This recipe shows you how you can do it. Note that the support for move-only method arguments was only introduced to gMock in April 2017; in older code, you may find more complex [workarounds](#LegacyMoveOnly) for lack of this feature. Let’s say we are working on a fictional project that lets one post and share snippets called “buzzesâ€. Your code uses these types: ```cpp enum class AccessLevel { kInternal, kPublic }; class Buzz { public: explicit Buzz(AccessLevel access) { ... } ... }; class Buzzer { public: virtual ~Buzzer() {} virtual std::unique_ptr MakeBuzz(StringPiece text) = 0; virtual bool ShareBuzz(std::unique_ptr buzz, int64_t timestamp) = 0; ... }; ``` A `Buzz` object represents a snippet being posted. A class that implements the `Buzzer` interface is capable of creating and sharing `Buzz`es. Methods in `Buzzer` may return a `unique_ptr` or take a `unique_ptr`. Now we need to mock `Buzzer` in our tests. To mock a method that accepts or returns move-only types, you just use the familiar `MOCK_METHOD` syntax as usual: ```cpp class MockBuzzer : public Buzzer { public: MOCK_METHOD(std::unique_ptr, MakeBuzz, (StringPiece text), (override)); MOCK_METHOD(bool, ShareBuzz, (std::unique_ptr buzz, int64_t timestamp), (override)); }; ``` Now that we have the mock class defined, we can use it in tests. In the following code examples, we assume that we have defined a `MockBuzzer` object named `mock_buzzer_`: ```cpp MockBuzzer mock_buzzer_; ``` First let’s see how we can set expectations on the `MakeBuzz()` method, which returns a `unique_ptr`. As usual, if you set an expectation without an action (i.e. the `.WillOnce()` or `.WillRepeatedly()` clause), when that expectation fires, the default action for that method will be taken. Since `unique_ptr<>` has a default constructor that returns a null `unique_ptr`, that’s what you’ll get if you don’t specify an action: ```cpp // Use the default action. EXPECT_CALL(mock_buzzer_, MakeBuzz("hello")); // Triggers the previous EXPECT_CALL. EXPECT_EQ(nullptr, mock_buzzer_.MakeBuzz("hello")); ``` If you are not happy with the default action, you can tweak it as usual; see [Setting Default Actions](#OnCall). If you just need to return a pre-defined move-only value, you can use the `Return(ByMove(...))` action: ```cpp // When this fires, the unique_ptr<> specified by ByMove(...) will // be returned. EXPECT_CALL(mock_buzzer_, MakeBuzz("world")) .WillOnce(Return(ByMove(MakeUnique(AccessLevel::kInternal)))); EXPECT_NE(nullptr, mock_buzzer_.MakeBuzz("world")); ``` Note that `ByMove()` is essential here - if you drop it, the code won’t compile. Quiz time! What do you think will happen if a `Return(ByMove(...))` action is performed more than once (e.g. you write `... .WillRepeatedly(Return(ByMove(...)));`)? Come think of it, after the first time the action runs, the source value will be consumed (since it’s a move-only value), so the next time around, there’s no value to move from -- you’ll get a run-time error that `Return(ByMove(...))` can only be run once. If you need your mock method to do more than just moving a pre-defined value, remember that you can always use a lambda or a callable object, which can do pretty much anything you want: ```cpp EXPECT_CALL(mock_buzzer_, MakeBuzz("x")) .WillRepeatedly([](StringPiece text) { return MakeUnique(AccessLevel::kInternal); }); EXPECT_NE(nullptr, mock_buzzer_.MakeBuzz("x")); EXPECT_NE(nullptr, mock_buzzer_.MakeBuzz("x")); ``` Every time this `EXPECT_CALL` fires, a new `unique_ptr` will be created and returned. You cannot do this with `Return(ByMove(...))`. That covers returning move-only values; but how do we work with methods accepting move-only arguments? The answer is that they work normally, although some actions will not compile when any of method's arguments are move-only. You can always use `Return`, or a [lambda or functor](#FunctionsAsActions): ```cpp using ::testing::Unused; EXPECT_CALL(mock_buzzer_, ShareBuzz(NotNull(), _)).WillOnce(Return(true)); EXPECT_TRUE(mock_buzzer_.ShareBuzz(MakeUnique(AccessLevel::kInternal)), 0); EXPECT_CALL(mock_buzzer_, ShareBuzz(_, _)).WillOnce( [](std::unique_ptr buzz, Unused) { return buzz != nullptr; }); EXPECT_FALSE(mock_buzzer_.ShareBuzz(nullptr, 0)); ``` Many built-in actions (`WithArgs`, `WithoutArgs`,`DeleteArg`, `SaveArg`, ...) could in principle support move-only arguments, but the support for this is not implemented yet. If this is blocking you, please file a bug. A few actions (e.g. `DoAll`) copy their arguments internally, so they can never work with non-copyable objects; you'll have to use functors instead. #### Legacy workarounds for move-only types {#LegacyMoveOnly} Support for move-only function arguments was only introduced to gMock in April 2017. In older code, you may encounter the following workaround for the lack of this feature (it is no longer necessary - we're including it just for reference): ```cpp class MockBuzzer : public Buzzer { public: MOCK_METHOD(bool, DoShareBuzz, (Buzz* buzz, Time timestamp)); bool ShareBuzz(std::unique_ptr buzz, Time timestamp) override { return DoShareBuzz(buzz.get(), timestamp); } }; ``` The trick is to delegate the `ShareBuzz()` method to a mock method (let’s call it `DoShareBuzz()`) that does not take move-only parameters. Then, instead of setting expectations on `ShareBuzz()`, you set them on the `DoShareBuzz()` mock method: ```cpp MockBuzzer mock_buzzer_; EXPECT_CALL(mock_buzzer_, DoShareBuzz(NotNull(), _)); // When one calls ShareBuzz() on the MockBuzzer like this, the call is // forwarded to DoShareBuzz(), which is mocked. Therefore this statement // will trigger the above EXPECT_CALL. mock_buzzer_.ShareBuzz(MakeUnique(AccessLevel::kInternal), 0); ``` ### Making the Compilation Faster Believe it or not, the *vast majority* of the time spent on compiling a mock class is in generating its constructor and destructor, as they perform non-trivial tasks (e.g. verification of the expectations). What's more, mock methods with different signatures have different types and thus their constructors/destructors need to be generated by the compiler separately. As a result, if you mock many different types of methods, compiling your mock class can get really slow. If you are experiencing slow compilation, you can move the definition of your mock class' constructor and destructor out of the class body and into a `.cc` file. This way, even if you `#include` your mock class in N files, the compiler only needs to generate its constructor and destructor once, resulting in a much faster compilation. Let's illustrate the idea using an example. Here's the definition of a mock class before applying this recipe: ```cpp // File mock_foo.h. ... class MockFoo : public Foo { public: // Since we don't declare the constructor or the destructor, // the compiler will generate them in every translation unit // where this mock class is used. MOCK_METHOD(int, DoThis, (), (override)); MOCK_METHOD(bool, DoThat, (const char* str), (override)); ... more mock methods ... }; ``` After the change, it would look like: ```cpp // File mock_foo.h. ... class MockFoo : public Foo { public: // The constructor and destructor are declared, but not defined, here. MockFoo(); virtual ~MockFoo(); MOCK_METHOD(int, DoThis, (), (override)); MOCK_METHOD(bool, DoThat, (const char* str), (override)); ... more mock methods ... }; ``` and ```cpp // File mock_foo.cc. #include "path/to/mock_foo.h" // The definitions may appear trivial, but the functions actually do a // lot of things through the constructors/destructors of the member // variables used to implement the mock methods. MockFoo::MockFoo() {} MockFoo::~MockFoo() {} ``` ### Forcing a Verification When it's being destroyed, your friendly mock object will automatically verify that all expectations on it have been satisfied, and will generate googletest failures if not. This is convenient as it leaves you with one less thing to worry about. That is, unless you are not sure if your mock object will be destroyed. How could it be that your mock object won't eventually be destroyed? Well, it might be created on the heap and owned by the code you are testing. Suppose there's a bug in that code and it doesn't delete the mock object properly - you could end up with a passing test when there's actually a bug. Using a heap checker is a good idea and can alleviate the concern, but its implementation is not 100% reliable. So, sometimes you do want to *force* gMock to verify a mock object before it is (hopefully) destructed. You can do this with `Mock::VerifyAndClearExpectations(&mock_object)`: ```cpp TEST(MyServerTest, ProcessesRequest) { using ::testing::Mock; MockFoo* const foo = new MockFoo; EXPECT_CALL(*foo, ...)...; // ... other expectations ... // server now owns foo. MyServer server(foo); server.ProcessRequest(...); // In case that server's destructor will forget to delete foo, // this will verify the expectations anyway. Mock::VerifyAndClearExpectations(foo); } // server is destroyed when it goes out of scope here. ``` **Tip:** The `Mock::VerifyAndClearExpectations()` function returns a `bool` to indicate whether the verification was successful (`true` for yes), so you can wrap that function call inside a `ASSERT_TRUE()` if there is no point going further when the verification has failed. ### Using Check Points {#UsingCheckPoints} Sometimes you may want to "reset" a mock object at various check points in your test: at each check point, you verify that all existing expectations on the mock object have been satisfied, and then you set some new expectations on it as if it's newly created. This allows you to work with a mock object in "phases" whose sizes are each manageable. One such scenario is that in your test's `SetUp()` function, you may want to put the object you are testing into a certain state, with the help from a mock object. Once in the desired state, you want to clear all expectations on the mock, such that in the `TEST_F` body you can set fresh expectations on it. As you may have figured out, the `Mock::VerifyAndClearExpectations()` function we saw in the previous recipe can help you here. Or, if you are using `ON_CALL()` to set default actions on the mock object and want to clear the default actions as well, use `Mock::VerifyAndClear(&mock_object)` instead. This function does what `Mock::VerifyAndClearExpectations(&mock_object)` does and returns the same `bool`, **plus** it clears the `ON_CALL()` statements on `mock_object` too. Another trick you can use to achieve the same effect is to put the expectations in sequences and insert calls to a dummy "check-point" function at specific places. Then you can verify that the mock function calls do happen at the right time. For example, if you are exercising code: ```cpp Foo(1); Foo(2); Foo(3); ``` and want to verify that `Foo(1)` and `Foo(3)` both invoke `mock.Bar("a")`, but `Foo(2)` doesn't invoke anything. You can write: ```cpp using ::testing::MockFunction; TEST(FooTest, InvokesBarCorrectly) { MyMock mock; // Class MockFunction has exactly one mock method. It is named // Call() and has type F. MockFunction check; { InSequence s; EXPECT_CALL(mock, Bar("a")); EXPECT_CALL(check, Call("1")); EXPECT_CALL(check, Call("2")); EXPECT_CALL(mock, Bar("a")); } Foo(1); check.Call("1"); Foo(2); check.Call("2"); Foo(3); } ``` The expectation spec says that the first `Bar("a")` must happen before check point "1", the second `Bar("a")` must happen after check point "2", and nothing should happen between the two check points. The explicit check points make it easy to tell which `Bar("a")` is called by which call to `Foo()`. ### Mocking Destructors Sometimes you want to make sure a mock object is destructed at the right time, e.g. after `bar->A()` is called but before `bar->B()` is called. We already know that you can specify constraints on the [order](#OrderedCalls) of mock function calls, so all we need to do is to mock the destructor of the mock function. This sounds simple, except for one problem: a destructor is a special function with special syntax and special semantics, and the `MOCK_METHOD` macro doesn't work for it: ```cpp MOCK_METHOD(void, ~MockFoo, ()); // Won't compile! ``` The good news is that you can use a simple pattern to achieve the same effect. First, add a mock function `Die()` to your mock class and call it in the destructor, like this: ```cpp class MockFoo : public Foo { ... // Add the following two lines to the mock class. MOCK_METHOD(void, Die, ()); virtual ~MockFoo() { Die(); } }; ``` (If the name `Die()` clashes with an existing symbol, choose another name.) Now, we have translated the problem of testing when a `MockFoo` object dies to testing when its `Die()` method is called: ```cpp MockFoo* foo = new MockFoo; MockBar* bar = new MockBar; ... { InSequence s; // Expects *foo to die after bar->A() and before bar->B(). EXPECT_CALL(*bar, A()); EXPECT_CALL(*foo, Die()); EXPECT_CALL(*bar, B()); } ``` And that's that. ### Using gMock and Threads {#UsingThreads} In a **unit** test, it's best if you could isolate and test a piece of code in a single-threaded context. That avoids race conditions and dead locks, and makes debugging your test much easier. Yet most programs are multi-threaded, and sometimes to test something we need to pound on it from more than one thread. gMock works for this purpose too. Remember the steps for using a mock: 1. Create a mock object `foo`. 2. Set its default actions and expectations using `ON_CALL()` and `EXPECT_CALL()`. 3. The code under test calls methods of `foo`. 4. Optionally, verify and reset the mock. 5. Destroy the mock yourself, or let the code under test destroy it. The destructor will automatically verify it. If you follow the following simple rules, your mocks and threads can live happily together: * Execute your *test code* (as opposed to the code being tested) in *one* thread. This makes your test easy to follow. * Obviously, you can do step #1 without locking. * When doing step #2 and #5, make sure no other thread is accessing `foo`. Obvious too, huh? * #3 and #4 can be done either in one thread or in multiple threads - anyway you want. gMock takes care of the locking, so you don't have to do any - unless required by your test logic. If you violate the rules (for example, if you set expectations on a mock while another thread is calling its methods), you get undefined behavior. That's not fun, so don't do it. gMock guarantees that the action for a mock function is done in the same thread that called the mock function. For example, in ```cpp EXPECT_CALL(mock, Foo(1)) .WillOnce(action1); EXPECT_CALL(mock, Foo(2)) .WillOnce(action2); ``` if `Foo(1)` is called in thread 1 and `Foo(2)` is called in thread 2, gMock will execute `action1` in thread 1 and `action2` in thread 2. gMock does *not* impose a sequence on actions performed in different threads (doing so may create deadlocks as the actions may need to cooperate). This means that the execution of `action1` and `action2` in the above example *may* interleave. If this is a problem, you should add proper synchronization logic to `action1` and `action2` to make the test thread-safe. Also, remember that `DefaultValue` is a global resource that potentially affects *all* living mock objects in your program. Naturally, you won't want to mess with it from multiple threads or when there still are mocks in action. ### Controlling How Much Information gMock Prints When gMock sees something that has the potential of being an error (e.g. a mock function with no expectation is called, a.k.a. an uninteresting call, which is allowed but perhaps you forgot to explicitly ban the call), it prints some warning messages, including the arguments of the function, the return value, and the stack trace. Hopefully this will remind you to take a look and see if there is indeed a problem. Sometimes you are confident that your tests are correct and may not appreciate such friendly messages. Some other times, you are debugging your tests or learning about the behavior of the code you are testing, and wish you could observe every mock call that happens (including argument values, the return value, and the stack trace). Clearly, one size doesn't fit all. You can control how much gMock tells you using the `--gmock_verbose=LEVEL` command-line flag, where `LEVEL` is a string with three possible values: * `info`: gMock will print all informational messages, warnings, and errors (most verbose). At this setting, gMock will also log any calls to the `ON_CALL/EXPECT_CALL` macros. It will include a stack trace in "uninteresting call" warnings. * `warning`: gMock will print both warnings and errors (less verbose); it will omit the stack traces in "uninteresting call" warnings. This is the default. * `error`: gMock will print errors only (least verbose). Alternatively, you can adjust the value of that flag from within your tests like so: ```cpp ::testing::FLAGS_gmock_verbose = "error"; ``` If you find gMock printing too many stack frames with its informational or warning messages, remember that you can control their amount with the `--gtest_stack_trace_depth=max_depth` flag. Now, judiciously use the right flag to enable gMock serve you better! ### Gaining Super Vision into Mock Calls You have a test using gMock. It fails: gMock tells you some expectations aren't satisfied. However, you aren't sure why: Is there a typo somewhere in the matchers? Did you mess up the order of the `EXPECT_CALL`s? Or is the code under test doing something wrong? How can you find out the cause? Won't it be nice if you have X-ray vision and can actually see the trace of all `EXPECT_CALL`s and mock method calls as they are made? For each call, would you like to see its actual argument values and which `EXPECT_CALL` gMock thinks it matches? If you still need some help to figure out who made these calls, how about being able to see the complete stack trace at each mock call? You can unlock this power by running your test with the `--gmock_verbose=info` flag. For example, given the test program: ```cpp #include "gmock/gmock.h" using testing::_; using testing::HasSubstr; using testing::Return; class MockFoo { public: MOCK_METHOD(void, F, (const string& x, const string& y)); }; TEST(Foo, Bar) { MockFoo mock; EXPECT_CALL(mock, F(_, _)).WillRepeatedly(Return()); EXPECT_CALL(mock, F("a", "b")); EXPECT_CALL(mock, F("c", HasSubstr("d"))); mock.F("a", "good"); mock.F("a", "b"); } ``` if you run it with `--gmock_verbose=info`, you will see this output: ```shell [ RUN ] Foo.Bar foo_test.cc:14: EXPECT_CALL(mock, F(_, _)) invoked Stack trace: ... foo_test.cc:15: EXPECT_CALL(mock, F("a", "b")) invoked Stack trace: ... foo_test.cc:16: EXPECT_CALL(mock, F("c", HasSubstr("d"))) invoked Stack trace: ... foo_test.cc:14: Mock function call matches EXPECT_CALL(mock, F(_, _))... Function call: F(@0x7fff7c8dad40"a",@0x7fff7c8dad10"good") Stack trace: ... foo_test.cc:15: Mock function call matches EXPECT_CALL(mock, F("a", "b"))... Function call: F(@0x7fff7c8dada0"a",@0x7fff7c8dad70"b") Stack trace: ... foo_test.cc:16: Failure Actual function call count doesn't match EXPECT_CALL(mock, F("c", HasSubstr("d")))... Expected: to be called once Actual: never called - unsatisfied and active [ FAILED ] Foo.Bar ``` Suppose the bug is that the `"c"` in the third `EXPECT_CALL` is a typo and should actually be `"a"`. With the above message, you should see that the actual `F("a", "good")` call is matched by the first `EXPECT_CALL`, not the third as you thought. From that it should be obvious that the third `EXPECT_CALL` is written wrong. Case solved. If you are interested in the mock call trace but not the stack traces, you can combine `--gmock_verbose=info` with `--gtest_stack_trace_depth=0` on the test command line. ### Running Tests in Emacs If you build and run your tests in Emacs using the `M-x google-compile` command (as many googletest users do), the source file locations of gMock and googletest errors will be highlighted. Just press `` on one of them and you'll be taken to the offending line. Or, you can just type `C-x`` to jump to the next error. To make it even easier, you can add the following lines to your `~/.emacs` file: ```text (global-set-key "\M-m" 'google-compile) ; m is for make (global-set-key [M-down] 'next-error) (global-set-key [M-up] '(lambda () (interactive) (next-error -1))) ``` Then you can type `M-m` to start a build (if you want to run the test as well, just make sure `foo_test.run` or `runtests` is in the build command you supply after typing `M-m`), or `M-up`/`M-down` to move back and forth between errors. ## Extending gMock ### Writing New Matchers Quickly {#NewMatchers} WARNING: gMock does not guarantee when or how many times a matcher will be invoked. Therefore, all matchers must be functionally pure. See [this section](#PureMatchers) for more details. The `MATCHER*` family of macros can be used to define custom matchers easily. The syntax: ```cpp MATCHER(name, description_string_expression) { statements; } ``` will define a matcher with the given name that executes the statements, which must return a `bool` to indicate if the match succeeds. Inside the statements, you can refer to the value being matched by `arg`, and refer to its type by `arg_type`. The *description string* is a `string`-typed expression that documents what the matcher does, and is used to generate the failure message when the match fails. It can (and should) reference the special `bool` variable `negation`, and should evaluate to the description of the matcher when `negation` is `false`, or that of the matcher's negation when `negation` is `true`. For convenience, we allow the description string to be empty (`""`), in which case gMock will use the sequence of words in the matcher name as the description. For example: ```cpp MATCHER(IsDivisibleBy7, "") { return (arg % 7) == 0; } ``` allows you to write ```cpp // Expects mock_foo.Bar(n) to be called where n is divisible by 7. EXPECT_CALL(mock_foo, Bar(IsDivisibleBy7())); ``` or, ```cpp using ::testing::Not; ... // Verifies that two values are divisible by 7. EXPECT_THAT(some_expression, IsDivisibleBy7()); EXPECT_THAT(some_other_expression, Not(IsDivisibleBy7())); ``` If the above assertions fail, they will print something like: ```shell Value of: some_expression Expected: is divisible by 7 Actual: 27 ... Value of: some_other_expression Expected: not (is divisible by 7) Actual: 21 ``` where the descriptions `"is divisible by 7"` and `"not (is divisible by 7)"` are automatically calculated from the matcher name `IsDivisibleBy7`. As you may have noticed, the auto-generated descriptions (especially those for the negation) may not be so great. You can always override them with a `string` expression of your own: ```cpp MATCHER(IsDivisibleBy7, absl::StrCat(negation ? "isn't" : "is", " divisible by 7")) { return (arg % 7) == 0; } ``` Optionally, you can stream additional information to a hidden argument named `result_listener` to explain the match result. For example, a better definition of `IsDivisibleBy7` is: ```cpp MATCHER(IsDivisibleBy7, "") { if ((arg % 7) == 0) return true; *result_listener << "the remainder is " << (arg % 7); return false; } ``` With this definition, the above assertion will give a better message: ```shell Value of: some_expression Expected: is divisible by 7 Actual: 27 (the remainder is 6) ``` You should let `MatchAndExplain()` print *any additional information* that can help a user understand the match result. Note that it should explain why the match succeeds in case of a success (unless it's obvious) - this is useful when the matcher is used inside `Not()`. There is no need to print the argument value itself, as gMock already prints it for you. NOTE: The type of the value being matched (`arg_type`) is determined by the context in which you use the matcher and is supplied to you by the compiler, so you don't need to worry about declaring it (nor can you). This allows the matcher to be polymorphic. For example, `IsDivisibleBy7()` can be used to match any type where the value of `(arg % 7) == 0` can be implicitly converted to a `bool`. In the `Bar(IsDivisibleBy7())` example above, if method `Bar()` takes an `int`, `arg_type` will be `int`; if it takes an `unsigned long`, `arg_type` will be `unsigned long`; and so on. ### Writing New Parameterized Matchers Quickly Sometimes you'll want to define a matcher that has parameters. For that you can use the macro: ```cpp MATCHER_P(name, param_name, description_string) { statements; } ``` where the description string can be either `""` or a `string` expression that references `negation` and `param_name`. For example: ```cpp MATCHER_P(HasAbsoluteValue, value, "") { return abs(arg) == value; } ``` will allow you to write: ```cpp EXPECT_THAT(Blah("a"), HasAbsoluteValue(n)); ``` which may lead to this message (assuming `n` is 10): ```shell Value of: Blah("a") Expected: has absolute value 10 Actual: -9 ``` Note that both the matcher description and its parameter are printed, making the message human-friendly. In the matcher definition body, you can write `foo_type` to reference the type of a parameter named `foo`. For example, in the body of `MATCHER_P(HasAbsoluteValue, value)` above, you can write `value_type` to refer to the type of `value`. gMock also provides `MATCHER_P2`, `MATCHER_P3`, ..., up to `MATCHER_P10` to support multi-parameter matchers: ```cpp MATCHER_Pk(name, param_1, ..., param_k, description_string) { statements; } ``` Please note that the custom description string is for a particular *instance* of the matcher, where the parameters have been bound to actual values. Therefore usually you'll want the parameter values to be part of the description. gMock lets you do that by referencing the matcher parameters in the description string expression. For example, ```cpp using ::testing::PrintToString; MATCHER_P2(InClosedRange, low, hi, absl::StrFormat("%s in range [%s, %s]", negation ? "isn't" : "is", PrintToString(low), PrintToString(hi))) { return low <= arg && arg <= hi; } ... EXPECT_THAT(3, InClosedRange(4, 6)); ``` would generate a failure that contains the message: ```shell Expected: is in range [4, 6] ``` If you specify `""` as the description, the failure message will contain the sequence of words in the matcher name followed by the parameter values printed as a tuple. For example, ```cpp MATCHER_P2(InClosedRange, low, hi, "") { ... } ... EXPECT_THAT(3, InClosedRange(4, 6)); ``` would generate a failure that contains the text: ```shell Expected: in closed range (4, 6) ``` For the purpose of typing, you can view ```cpp MATCHER_Pk(Foo, p1, ..., pk, description_string) { ... } ``` as shorthand for ```cpp template FooMatcherPk Foo(p1_type p1, ..., pk_type pk) { ... } ``` When you write `Foo(v1, ..., vk)`, the compiler infers the types of the parameters `v1`, ..., and `vk` for you. If you are not happy with the result of the type inference, you can specify the types by explicitly instantiating the template, as in `Foo(5, false)`. As said earlier, you don't get to (or need to) specify `arg_type` as that's determined by the context in which the matcher is used. You can assign the result of expression `Foo(p1, ..., pk)` to a variable of type `FooMatcherPk`. This can be useful when composing matchers. Matchers that don't have a parameter or have only one parameter have special types: you can assign `Foo()` to a `FooMatcher`-typed variable, and assign `Foo(p)` to a `FooMatcherP`-typed variable. While you can instantiate a matcher template with reference types, passing the parameters by pointer usually makes your code more readable. If, however, you still want to pass a parameter by reference, be aware that in the failure message generated by the matcher you will see the value of the referenced object but not its address. You can overload matchers with different numbers of parameters: ```cpp MATCHER_P(Blah, a, description_string_1) { ... } MATCHER_P2(Blah, a, b, description_string_2) { ... } ``` While it's tempting to always use the `MATCHER*` macros when defining a new matcher, you should also consider implementing `MatcherInterface` or using `MakePolymorphicMatcher()` instead (see the recipes that follow), especially if you need to use the matcher a lot. While these approaches require more work, they give you more control on the types of the value being matched and the matcher parameters, which in general leads to better compiler error messages that pay off in the long run. They also allow overloading matchers based on parameter types (as opposed to just based on the number of parameters). ### Writing New Monomorphic Matchers A matcher of argument type `T` implements `::testing::MatcherInterface` and does two things: it tests whether a value of type `T` matches the matcher, and can describe what kind of values it matches. The latter ability is used for generating readable error messages when expectations are violated. The interface looks like this: ```cpp class MatchResultListener { public: ... // Streams x to the underlying ostream; does nothing if the ostream // is NULL. template MatchResultListener& operator<<(const T& x); // Returns the underlying ostream. ::std::ostream* stream(); }; template class MatcherInterface { public: virtual ~MatcherInterface(); // Returns true if and only if the matcher matches x; also explains the match // result to 'listener'. virtual bool MatchAndExplain(T x, MatchResultListener* listener) const = 0; // Describes this matcher to an ostream. virtual void DescribeTo(::std::ostream* os) const = 0; // Describes the negation of this matcher to an ostream. virtual void DescribeNegationTo(::std::ostream* os) const; }; ``` If you need a custom matcher but `Truly()` is not a good option (for example, you may not be happy with the way `Truly(predicate)` describes itself, or you may want your matcher to be polymorphic as `Eq(value)` is), you can define a matcher to do whatever you want in two steps: first implement the matcher interface, and then define a factory function to create a matcher instance. The second step is not strictly needed but it makes the syntax of using the matcher nicer. For example, you can define a matcher to test whether an `int` is divisible by 7 and then use it like this: ```cpp using ::testing::MakeMatcher; using ::testing::Matcher; using ::testing::MatcherInterface; using ::testing::MatchResultListener; class DivisibleBy7Matcher : public MatcherInterface { public: bool MatchAndExplain(int n, MatchResultListener* /* listener */) const override { return (n % 7) == 0; } void DescribeTo(::std::ostream* os) const override { *os << "is divisible by 7"; } void DescribeNegationTo(::std::ostream* os) const override { *os << "is not divisible by 7"; } }; Matcher DivisibleBy7() { return MakeMatcher(new DivisibleBy7Matcher); } ... EXPECT_CALL(foo, Bar(DivisibleBy7())); ``` You may improve the matcher message by streaming additional information to the `listener` argument in `MatchAndExplain()`: ```cpp class DivisibleBy7Matcher : public MatcherInterface { public: bool MatchAndExplain(int n, MatchResultListener* listener) const override { const int remainder = n % 7; if (remainder != 0) { *listener << "the remainder is " << remainder; } return remainder == 0; } ... }; ``` Then, `EXPECT_THAT(x, DivisibleBy7());` may generate a message like this: ```shell Value of: x Expected: is divisible by 7 Actual: 23 (the remainder is 2) ``` ### Writing New Polymorphic Matchers You've learned how to write your own matchers in the previous recipe. Just one problem: a matcher created using `MakeMatcher()` only works for one particular type of arguments. If you want a *polymorphic* matcher that works with arguments of several types (for instance, `Eq(x)` can be used to match a *`value`* as long as `value == x` compiles -- *`value`* and `x` don't have to share the same type), you can learn the trick from `testing/base/public/gmock-matchers.h` but it's a bit involved. Fortunately, most of the time you can define a polymorphic matcher easily with the help of `MakePolymorphicMatcher()`. Here's how you can define `NotNull()` as an example: ```cpp using ::testing::MakePolymorphicMatcher; using ::testing::MatchResultListener; using ::testing::PolymorphicMatcher; class NotNullMatcher { public: // To implement a polymorphic matcher, first define a COPYABLE class // that has three members MatchAndExplain(), DescribeTo(), and // DescribeNegationTo(), like the following. // In this example, we want to use NotNull() with any pointer, so // MatchAndExplain() accepts a pointer of any type as its first argument. // In general, you can define MatchAndExplain() as an ordinary method or // a method template, or even overload it. template bool MatchAndExplain(T* p, MatchResultListener* /* listener */) const { return p != NULL; } // Describes the property of a value matching this matcher. void DescribeTo(std::ostream* os) const { *os << "is not NULL"; } // Describes the property of a value NOT matching this matcher. void DescribeNegationTo(std::ostream* os) const { *os << "is NULL"; } }; // To construct a polymorphic matcher, pass an instance of the class // to MakePolymorphicMatcher(). Note the return type. PolymorphicMatcher NotNull() { return MakePolymorphicMatcher(NotNullMatcher()); } ... EXPECT_CALL(foo, Bar(NotNull())); // The argument must be a non-NULL pointer. ``` **Note:** Your polymorphic matcher class does **not** need to inherit from `MatcherInterface` or any other class, and its methods do **not** need to be virtual. Like in a monomorphic matcher, you may explain the match result by streaming additional information to the `listener` argument in `MatchAndExplain()`. ### Writing New Cardinalities A cardinality is used in `Times()` to tell gMock how many times you expect a call to occur. It doesn't have to be exact. For example, you can say `AtLeast(5)` or `Between(2, 4)`. If the [built-in set](cheat_sheet.md#CardinalityList) of cardinalities doesn't suit you, you are free to define your own by implementing the following interface (in namespace `testing`): ```cpp class CardinalityInterface { public: virtual ~CardinalityInterface(); // Returns true if and only if call_count calls will satisfy this cardinality. virtual bool IsSatisfiedByCallCount(int call_count) const = 0; // Returns true if and only if call_count calls will saturate this // cardinality. virtual bool IsSaturatedByCallCount(int call_count) const = 0; // Describes self to an ostream. virtual void DescribeTo(std::ostream* os) const = 0; }; ``` For example, to specify that a call must occur even number of times, you can write ```cpp using ::testing::Cardinality; using ::testing::CardinalityInterface; using ::testing::MakeCardinality; class EvenNumberCardinality : public CardinalityInterface { public: bool IsSatisfiedByCallCount(int call_count) const override { return (call_count % 2) == 0; } bool IsSaturatedByCallCount(int call_count) const override { return false; } void DescribeTo(std::ostream* os) const { *os << "called even number of times"; } }; Cardinality EvenNumber() { return MakeCardinality(new EvenNumberCardinality); } ... EXPECT_CALL(foo, Bar(3)) .Times(EvenNumber()); ``` ### Writing New Actions Quickly {#QuickNewActions} If the built-in actions don't work for you, you can easily define your own one. Just define a functor class with a (possibly templated) call operator, matching the signature of your action. ```cpp struct Increment { template T operator()(T* arg) { return ++(*arg); } } ``` The same approach works with stateful functors (or any callable, really): ``` struct MultiplyBy { template T operator()(T arg) { return arg * multiplier; } int multiplier; } // Then use: // EXPECT_CALL(...).WillOnce(MultiplyBy{7}); ``` #### Legacy macro-based Actions Before C++11, the functor-based actions were not supported; the old way of writing actions was through a set of `ACTION*` macros. We suggest to avoid them in new code; they hide a lot of logic behind the macro, potentially leading to harder-to-understand compiler errors. Nevertheless, we cover them here for completeness. By writing ```cpp ACTION(name) { statements; } ``` in a namespace scope (i.e. not inside a class or function), you will define an action with the given name that executes the statements. The value returned by `statements` will be used as the return value of the action. Inside the statements, you can refer to the K-th (0-based) argument of the mock function as `argK`. For example: ```cpp ACTION(IncrementArg1) { return ++(*arg1); } ``` allows you to write ```cpp ... WillOnce(IncrementArg1()); ``` Note that you don't need to specify the types of the mock function arguments. Rest assured that your code is type-safe though: you'll get a compiler error if `*arg1` doesn't support the `++` operator, or if the type of `++(*arg1)` isn't compatible with the mock function's return type. Another example: ```cpp ACTION(Foo) { (*arg2)(5); Blah(); *arg1 = 0; return arg0; } ``` defines an action `Foo()` that invokes argument #2 (a function pointer) with 5, calls function `Blah()`, sets the value pointed to by argument #1 to 0, and returns argument #0. For more convenience and flexibility, you can also use the following pre-defined symbols in the body of `ACTION`: `argK_type` | The type of the K-th (0-based) argument of the mock function :-------------- | :----------------------------------------------------------- `args` | All arguments of the mock function as a tuple `args_type` | The type of all arguments of the mock function as a tuple `return_type` | The return type of the mock function `function_type` | The type of the mock function For example, when using an `ACTION` as a stub action for mock function: ```cpp int DoSomething(bool flag, int* ptr); ``` we have: Pre-defined Symbol | Is Bound To ------------------ | --------------------------------- `arg0` | the value of `flag` `arg0_type` | the type `bool` `arg1` | the value of `ptr` `arg1_type` | the type `int*` `args` | the tuple `(flag, ptr)` `args_type` | the type `std::tuple` `return_type` | the type `int` `function_type` | the type `int(bool, int*)` #### Legacy macro-based parameterized Actions Sometimes you'll want to parameterize an action you define. For that we have another macro ```cpp ACTION_P(name, param) { statements; } ``` For example, ```cpp ACTION_P(Add, n) { return arg0 + n; } ``` will allow you to write ```cpp // Returns argument #0 + 5. ... WillOnce(Add(5)); ``` For convenience, we use the term *arguments* for the values used to invoke the mock function, and the term *parameters* for the values used to instantiate an action. Note that you don't need to provide the type of the parameter either. Suppose the parameter is named `param`, you can also use the gMock-defined symbol `param_type` to refer to the type of the parameter as inferred by the compiler. For example, in the body of `ACTION_P(Add, n)` above, you can write `n_type` for the type of `n`. gMock also provides `ACTION_P2`, `ACTION_P3`, and etc to support multi-parameter actions. For example, ```cpp ACTION_P2(ReturnDistanceTo, x, y) { double dx = arg0 - x; double dy = arg1 - y; return sqrt(dx*dx + dy*dy); } ``` lets you write ```cpp ... WillOnce(ReturnDistanceTo(5.0, 26.5)); ``` You can view `ACTION` as a degenerated parameterized action where the number of parameters is 0. You can also easily define actions overloaded on the number of parameters: ```cpp ACTION_P(Plus, a) { ... } ACTION_P2(Plus, a, b) { ... } ``` ### Restricting the Type of an Argument or Parameter in an ACTION For maximum brevity and reusability, the `ACTION*` macros don't ask you to provide the types of the mock function arguments and the action parameters. Instead, we let the compiler infer the types for us. Sometimes, however, we may want to be more explicit about the types. There are several tricks to do that. For example: ```cpp ACTION(Foo) { // Makes sure arg0 can be converted to int. int n = arg0; ... use n instead of arg0 here ... } ACTION_P(Bar, param) { // Makes sure the type of arg1 is const char*. ::testing::StaticAssertTypeEq(); // Makes sure param can be converted to bool. bool flag = param; } ``` where `StaticAssertTypeEq` is a compile-time assertion in googletest that verifies two types are the same. ### Writing New Action Templates Quickly Sometimes you want to give an action explicit template parameters that cannot be inferred from its value parameters. `ACTION_TEMPLATE()` supports that and can be viewed as an extension to `ACTION()` and `ACTION_P*()`. The syntax: ```cpp ACTION_TEMPLATE(ActionName, HAS_m_TEMPLATE_PARAMS(kind1, name1, ..., kind_m, name_m), AND_n_VALUE_PARAMS(p1, ..., p_n)) { statements; } ``` defines an action template that takes *m* explicit template parameters and *n* value parameters, where *m* is in [1, 10] and *n* is in [0, 10]. `name_i` is the name of the *i*-th template parameter, and `kind_i` specifies whether it's a `typename`, an integral constant, or a template. `p_i` is the name of the *i*-th value parameter. Example: ```cpp // DuplicateArg(output) converts the k-th argument of the mock // function to type T and copies it to *output. ACTION_TEMPLATE(DuplicateArg, // Note the comma between int and k: HAS_2_TEMPLATE_PARAMS(int, k, typename, T), AND_1_VALUE_PARAMS(output)) { *output = T(::std::get(args)); } ``` To create an instance of an action template, write: ```cpp ActionName(v1, ..., v_n) ``` where the `t`s are the template arguments and the `v`s are the value arguments. The value argument types are inferred by the compiler. For example: ```cpp using ::testing::_; ... int n; EXPECT_CALL(mock, Foo).WillOnce(DuplicateArg<1, unsigned char>(&n)); ``` If you want to explicitly specify the value argument types, you can provide additional template arguments: ```cpp ActionName(v1, ..., v_n) ``` where `u_i` is the desired type of `v_i`. `ACTION_TEMPLATE` and `ACTION`/`ACTION_P*` can be overloaded on the number of value parameters, but not on the number of template parameters. Without the restriction, the meaning of the following is unclear: ```cpp OverloadedAction(x); ``` Are we using a single-template-parameter action where `bool` refers to the type of `x`, or a two-template-parameter action where the compiler is asked to infer the type of `x`? ### Using the ACTION Object's Type If you are writing a function that returns an `ACTION` object, you'll need to know its type. The type depends on the macro used to define the action and the parameter types. The rule is relatively simple: | Given Definition | Expression | Has Type | | ----------------------------- | ------------------- | --------------------- | | `ACTION(Foo)` | `Foo()` | `FooAction` | | `ACTION_TEMPLATE(Foo,` | `Foo()` : t_m>` : : `AND_0_VALUE_PARAMS())` : : : | `ACTION_P(Bar, param)` | `Bar(int_value)` | `BarActionP` | | `ACTION_TEMPLATE(Bar,` | `Bar` | `FooActionP` : : `AND_1_VALUE_PARAMS(p1))` : : : | `ACTION_P2(Baz, p1, p2)` | `Baz(bool_value,` | `BazActionP2` : | `ACTION_TEMPLATE(Baz,` | `Baz` | `FooActionP2` : : `AND_2_VALUE_PARAMS(p1, p2))` : `int_value)` : : | ... | ... | ... | Note that we have to pick different suffixes (`Action`, `ActionP`, `ActionP2`, and etc) for actions with different numbers of value parameters, or the action definitions cannot be overloaded on the number of them. ### Writing New Monomorphic Actions {#NewMonoActions} While the `ACTION*` macros are very convenient, sometimes they are inappropriate. For example, despite the tricks shown in the previous recipes, they don't let you directly specify the types of the mock function arguments and the action parameters, which in general leads to unoptimized compiler error messages that can baffle unfamiliar users. They also don't allow overloading actions based on parameter types without jumping through some hoops. An alternative to the `ACTION*` macros is to implement `::testing::ActionInterface`, where `F` is the type of the mock function in which the action will be used. For example: ```cpp template class ActionInterface { public: virtual ~ActionInterface(); // Performs the action. Result is the return type of function type // F, and ArgumentTuple is the tuple of arguments of F. // // For example, if F is int(bool, const string&), then Result would // be int, and ArgumentTuple would be ::std::tuple. virtual Result Perform(const ArgumentTuple& args) = 0; }; ``` ```cpp using ::testing::_; using ::testing::Action; using ::testing::ActionInterface; using ::testing::MakeAction; typedef int IncrementMethod(int*); class IncrementArgumentAction : public ActionInterface { public: int Perform(const ::std::tuple& args) override { int* p = ::std::get<0>(args); // Grabs the first argument. return *p++; } }; Action IncrementArgument() { return MakeAction(new IncrementArgumentAction); } ... EXPECT_CALL(foo, Baz(_)) .WillOnce(IncrementArgument()); int n = 5; foo.Baz(&n); // Should return 5 and change n to 6. ``` ### Writing New Polymorphic Actions {#NewPolyActions} The previous recipe showed you how to define your own action. This is all good, except that you need to know the type of the function in which the action will be used. Sometimes that can be a problem. For example, if you want to use the action in functions with *different* types (e.g. like `Return()` and `SetArgPointee()`). If an action can be used in several types of mock functions, we say it's *polymorphic*. The `MakePolymorphicAction()` function template makes it easy to define such an action: ```cpp namespace testing { template PolymorphicAction MakePolymorphicAction(const Impl& impl); } // namespace testing ``` As an example, let's define an action that returns the second argument in the mock function's argument list. The first step is to define an implementation class: ```cpp class ReturnSecondArgumentAction { public: template Result Perform(const ArgumentTuple& args) const { // To get the i-th (0-based) argument, use ::std::get(args). return ::std::get<1>(args); } }; ``` This implementation class does *not* need to inherit from any particular class. What matters is that it must have a `Perform()` method template. This method template takes the mock function's arguments as a tuple in a **single** argument, and returns the result of the action. It can be either `const` or not, but must be invokable with exactly one template argument, which is the result type. In other words, you must be able to call `Perform(args)` where `R` is the mock function's return type and `args` is its arguments in a tuple. Next, we use `MakePolymorphicAction()` to turn an instance of the implementation class into the polymorphic action we need. It will be convenient to have a wrapper for this: ```cpp using ::testing::MakePolymorphicAction; using ::testing::PolymorphicAction; PolymorphicAction ReturnSecondArgument() { return MakePolymorphicAction(ReturnSecondArgumentAction()); } ``` Now, you can use this polymorphic action the same way you use the built-in ones: ```cpp using ::testing::_; class MockFoo : public Foo { public: MOCK_METHOD(int, DoThis, (bool flag, int n), (override)); MOCK_METHOD(string, DoThat, (int x, const char* str1, const char* str2), (override)); }; ... MockFoo foo; EXPECT_CALL(foo, DoThis).WillOnce(ReturnSecondArgument()); EXPECT_CALL(foo, DoThat).WillOnce(ReturnSecondArgument()); ... foo.DoThis(true, 5); // Will return 5. foo.DoThat(1, "Hi", "Bye"); // Will return "Hi". ``` ### Teaching gMock How to Print Your Values When an uninteresting or unexpected call occurs, gMock prints the argument values and the stack trace to help you debug. Assertion macros like `EXPECT_THAT` and `EXPECT_EQ` also print the values in question when the assertion fails. gMock and googletest do this using googletest's user-extensible value printer. This printer knows how to print built-in C++ types, native arrays, STL containers, and any type that supports the `<<` operator. For other types, it prints the raw bytes in the value and hopes that you the user can figure it out. [googletest's advanced guide](../../googletest/docs/advanced.md#teaching-googletest-how-to-print-your-values) explains how to extend the printer to do a better job at printing your particular type than to dump the bytes. ## Useful Mocks Created Using gMock ### Mock std::function {#MockFunction} `std::function` is a general function type introduced in C++11. It is a preferred way of passing callbacks to new interfaces. Functions are copiable, and are not usually passed around by pointer, which makes them tricky to mock. But fear not - `MockFunction` can help you with that. `MockFunction` has a mock method `Call()` with the signature: ```cpp R Call(T1, ..., Tn); ``` It also has a `AsStdFunction()` method, which creates a `std::function` proxy forwarding to Call: ```cpp std::function AsStdFunction(); ``` To use `MockFunction`, first create `MockFunction` object and set up expectations on its `Call` method. Then pass proxy obtained from `AsStdFunction()` to the code you are testing. For example: ```cpp TEST(FooTest, RunsCallbackWithBarArgument) { // 1. Create a mock object. MockFunction mock_function; // 2. Set expectations on Call() method. EXPECT_CALL(mock_function, Call("bar")).WillOnce(Return(1)); // 3. Exercise code that uses std::function. Foo(mock_function.AsStdFunction()); // Foo's signature can be either of: // void Foo(const std::function& fun); // void Foo(std::function fun); // 4. All expectations will be verified when mock_function // goes out of scope and is destroyed. } ``` Remember that function objects created with `AsStdFunction()` are just forwarders. If you create multiple of them, they will share the same set of expectations. Although `std::function` supports unlimited number of arguments, `MockFunction` implementation is limited to ten. If you ever hit that limit... well, your callback has bigger problems than being mockable. :-) LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/docs/for_dummies.md000066400000000000000000000710061456444476200261410ustar00rootroot00000000000000## gMock for Dummies {#GMockForDummies} ### What Is gMock? When you write a prototype or test, often it's not feasible or wise to rely on real objects entirely. A **mock object** implements the same interface as a real object (so it can be used as one), but lets you specify at run time how it will be used and what it should do (which methods will be called? in which order? how many times? with what arguments? what will they return? etc). **Note:** It is easy to confuse the term *fake objects* with mock objects. Fakes and mocks actually mean very different things in the Test-Driven Development (TDD) community: * **Fake** objects have working implementations, but usually take some shortcut (perhaps to make the operations less expensive), which makes them not suitable for production. An in-memory file system would be an example of a fake. * **Mocks** are objects pre-programmed with *expectations*, which form a specification of the calls they are expected to receive. If all this seems too abstract for you, don't worry - the most important thing to remember is that a mock allows you to check the *interaction* between itself and code that uses it. The difference between fakes and mocks shall become much clearer once you start to use mocks. **gMock** is a library (sometimes we also call it a "framework" to make it sound cool) for creating mock classes and using them. It does to C++ what jMock/EasyMock does to Java (well, more or less). When using gMock, 1. first, you use some simple macros to describe the interface you want to mock, and they will expand to the implementation of your mock class; 2. next, you create some mock objects and specify its expectations and behavior using an intuitive syntax; 3. then you exercise code that uses the mock objects. gMock will catch any violation to the expectations as soon as it arises. ### Why gMock? While mock objects help you remove unnecessary dependencies in tests and make them fast and reliable, using mocks manually in C++ is *hard*: * Someone has to implement the mocks. The job is usually tedious and error-prone. No wonder people go great distance to avoid it. * The quality of those manually written mocks is a bit, uh, unpredictable. You may see some really polished ones, but you may also see some that were hacked up in a hurry and have all sorts of ad hoc restrictions. * The knowledge you gained from using one mock doesn't transfer to the next one. In contrast, Java and Python programmers have some fine mock frameworks (jMock, EasyMock, [Mox](http://wtf/mox), etc), which automate the creation of mocks. As a result, mocking is a proven effective technique and widely adopted practice in those communities. Having the right tool absolutely makes the difference. gMock was built to help C++ programmers. It was inspired by jMock and EasyMock, but designed with C++'s specifics in mind. It is your friend if any of the following problems is bothering you: * You are stuck with a sub-optimal design and wish you had done more prototyping before it was too late, but prototyping in C++ is by no means "rapid". * Your tests are slow as they depend on too many libraries or use expensive resources (e.g. a database). * Your tests are brittle as some resources they use are unreliable (e.g. the network). * You want to test how your code handles a failure (e.g. a file checksum error), but it's not easy to cause one. * You need to make sure that your module interacts with other modules in the right way, but it's hard to observe the interaction; therefore you resort to observing the side effects at the end of the action, but it's awkward at best. * You want to "mock out" your dependencies, except that they don't have mock implementations yet; and, frankly, you aren't thrilled by some of those hand-written mocks. We encourage you to use gMock as * a *design* tool, for it lets you experiment with your interface design early and often. More iterations lead to better designs! * a *testing* tool to cut your tests' outbound dependencies and probe the interaction between your module and its collaborators. ### Getting Started gMock is bundled with googletest. ### A Case for Mock Turtles Let's look at an example. Suppose you are developing a graphics program that relies on a [LOGO](http://en.wikipedia.org/wiki/Logo_programming_language)-like API for drawing. How would you test that it does the right thing? Well, you can run it and compare the screen with a golden screen snapshot, but let's admit it: tests like this are expensive to run and fragile (What if you just upgraded to a shiny new graphics card that has better anti-aliasing? Suddenly you have to update all your golden images.). It would be too painful if all your tests are like this. Fortunately, you learned about [Dependency Injection](http://en.wikipedia.org/wiki/Dependency_injection) and know the right thing to do: instead of having your application talk to the system API directly, wrap the API in an interface (say, `Turtle`) and code to that interface: ```cpp class Turtle { ... virtual ~Turtle() {}; virtual void PenUp() = 0; virtual void PenDown() = 0; virtual void Forward(int distance) = 0; virtual void Turn(int degrees) = 0; virtual void GoTo(int x, int y) = 0; virtual int GetX() const = 0; virtual int GetY() const = 0; }; ``` (Note that the destructor of `Turtle` **must** be virtual, as is the case for **all** classes you intend to inherit from - otherwise the destructor of the derived class will not be called when you delete an object through a base pointer, and you'll get corrupted program states like memory leaks.) You can control whether the turtle's movement will leave a trace using `PenUp()` and `PenDown()`, and control its movement using `Forward()`, `Turn()`, and `GoTo()`. Finally, `GetX()` and `GetY()` tell you the current position of the turtle. Your program will normally use a real implementation of this interface. In tests, you can use a mock implementation instead. This allows you to easily check what drawing primitives your program is calling, with what arguments, and in which order. Tests written this way are much more robust (they won't break because your new machine does anti-aliasing differently), easier to read and maintain (the intent of a test is expressed in the code, not in some binary images), and run *much, much faster*. ### Writing the Mock Class If you are lucky, the mocks you need to use have already been implemented by some nice people. If, however, you find yourself in the position to write a mock class, relax - gMock turns this task into a fun game! (Well, almost.) #### How to Define It Using the `Turtle` interface as example, here are the simple steps you need to follow: * Derive a class `MockTurtle` from `Turtle`. * Take a *virtual* function of `Turtle` (while it's possible to [mock non-virtual methods using templates](cook_book.md#MockingNonVirtualMethods), it's much more involved). * In the `public:` section of the child class, write `MOCK_METHOD();` * Now comes the fun part: you take the function signature, cut-and-paste it into the macro, and add two commas - one between the return type and the name, another between the name and the argument list. * If you're mocking a const method, add a 4th parameter containing `(const)` (the parentheses are required). * Since you're overriding a virtual method, we suggest adding the `override` keyword. For const methods the 4th parameter becomes `(const, override)`, for non-const methods just `(override)`. This isn't mandatory. * Repeat until all virtual functions you want to mock are done. (It goes without saying that *all* pure virtual methods in your abstract class must be either mocked or overridden.) After the process, you should have something like: ```cpp #include "gmock/gmock.h" // Brings in gMock. class MockTurtle : public Turtle { public: ... MOCK_METHOD(void, PenUp, (), (override)); MOCK_METHOD(void, PenDown, (), (override)); MOCK_METHOD(void, Forward, (int distance), (override)); MOCK_METHOD(void, Turn, (int degrees), (override)); MOCK_METHOD(void, GoTo, (int x, int y), (override)); MOCK_METHOD(int, GetX, (), (const, override)); MOCK_METHOD(int, GetY, (), (const, override)); }; ``` You don't need to define these mock methods somewhere else - the `MOCK_METHOD` macro will generate the definitions for you. It's that simple! #### Where to Put It When you define a mock class, you need to decide where to put its definition. Some people put it in a `_test.cc`. This is fine when the interface being mocked (say, `Foo`) is owned by the same person or team. Otherwise, when the owner of `Foo` changes it, your test could break. (You can't really expect `Foo`'s maintainer to fix every test that uses `Foo`, can you?) So, the rule of thumb is: if you need to mock `Foo` and it's owned by others, define the mock class in `Foo`'s package (better, in a `testing` sub-package such that you can clearly separate production code and testing utilities), put it in a `.h` and a `cc_library`. Then everyone can reference them from their tests. If `Foo` ever changes, there is only one copy of `MockFoo` to change, and only tests that depend on the changed methods need to be fixed. Another way to do it: you can introduce a thin layer `FooAdaptor` on top of `Foo` and code to this new interface. Since you own `FooAdaptor`, you can absorb changes in `Foo` much more easily. While this is more work initially, carefully choosing the adaptor interface can make your code easier to write and more readable (a net win in the long run), as you can choose `FooAdaptor` to fit your specific domain much better than `Foo` does. ### Using Mocks in Tests Once you have a mock class, using it is easy. The typical work flow is: 1. Import the gMock names from the `testing` namespace such that you can use them unqualified (You only have to do it once per file. Remember that namespaces are a good idea. 2. Create some mock objects. 3. Specify your expectations on them (How many times will a method be called? With what arguments? What should it do? etc.). 4. Exercise some code that uses the mocks; optionally, check the result using googletest assertions. If a mock method is called more than expected or with wrong arguments, you'll get an error immediately. 5. When a mock is destructed, gMock will automatically check whether all expectations on it have been satisfied. Here's an example: ```cpp #include "path/to/mock-turtle.h" #include "gmock/gmock.h" #include "gtest/gtest.h" using ::testing::AtLeast; // #1 TEST(PainterTest, CanDrawSomething) { MockTurtle turtle; // #2 EXPECT_CALL(turtle, PenDown()) // #3 .Times(AtLeast(1)); Painter painter(&turtle); // #4 EXPECT_TRUE(painter.DrawCircle(0, 0, 10)); // #5 } ``` As you might have guessed, this test checks that `PenDown()` is called at least once. If the `painter` object didn't call this method, your test will fail with a message like this: ```text path/to/my_test.cc:119: Failure Actual function call count doesn't match this expectation: Actually: never called; Expected: called at least once. Stack trace: ... ``` **Tip 1:** If you run the test from an Emacs buffer, you can hit on the line number to jump right to the failed expectation. **Tip 2:** If your mock objects are never deleted, the final verification won't happen. Therefore it's a good idea to turn on the heap checker in your tests when you allocate mocks on the heap. You get that automatically if you use the `gtest_main` library already. **Important note:** gMock requires expectations to be set **before** the mock functions are called, otherwise the behavior is **undefined**. In particular, you mustn't interleave `EXPECT_CALL()s` and calls to the mock functions. This means `EXPECT_CALL()` should be read as expecting that a call will occur *in the future*, not that a call has occurred. Why does gMock work like that? Well, specifying the expectation beforehand allows gMock to report a violation as soon as it rises, when the context (stack trace, etc) is still available. This makes debugging much easier. Admittedly, this test is contrived and doesn't do much. You can easily achieve the same effect without using gMock. However, as we shall reveal soon, gMock allows you to do *so much more* with the mocks. ### Setting Expectations The key to using a mock object successfully is to set the *right expectations* on it. If you set the expectations too strict, your test will fail as the result of unrelated changes. If you set them too loose, bugs can slip through. You want to do it just right such that your test can catch exactly the kind of bugs you intend it to catch. gMock provides the necessary means for you to do it "just right." #### General Syntax In gMock we use the `EXPECT_CALL()` macro to set an expectation on a mock method. The general syntax is: ```cpp EXPECT_CALL(mock_object, method(matchers)) .Times(cardinality) .WillOnce(action) .WillRepeatedly(action); ``` The macro has two arguments: first the mock object, and then the method and its arguments. Note that the two are separated by a comma (`,`), not a period (`.`). (Why using a comma? The answer is that it was necessary for technical reasons.) If the method is not overloaded, the macro can also be called without matchers: ```cpp EXPECT_CALL(mock_object, non-overloaded-method) .Times(cardinality) .WillOnce(action) .WillRepeatedly(action); ``` This syntax allows the test writer to specify "called with any arguments" without explicitly specifying the number or types of arguments. To avoid unintended ambiguity, this syntax may only be used for methods which are not overloaded Either form of the macro can be followed by some optional *clauses* that provide more information about the expectation. We'll discuss how each clause works in the coming sections. This syntax is designed to make an expectation read like English. For example, you can probably guess that ```cpp using ::testing::Return; ... EXPECT_CALL(turtle, GetX()) .Times(5) .WillOnce(Return(100)) .WillOnce(Return(150)) .WillRepeatedly(Return(200)); ``` says that the `turtle` object's `GetX()` method will be called five times, it will return 100 the first time, 150 the second time, and then 200 every time. Some people like to call this style of syntax a Domain-Specific Language (DSL). **Note:** Why do we use a macro to do this? Well it serves two purposes: first it makes expectations easily identifiable (either by `gsearch` or by a human reader), and second it allows gMock to include the source file location of a failed expectation in messages, making debugging easier. #### Matchers: What Arguments Do We Expect? When a mock function takes arguments, we may specify what arguments we are expecting, for example: ```cpp // Expects the turtle to move forward by 100 units. EXPECT_CALL(turtle, Forward(100)); ``` Oftentimes you do not want to be too specific. Remember that talk about tests being too rigid? Over specification leads to brittle tests and obscures the intent of tests. Therefore we encourage you to specify only what's necessary—no more, no less. If you aren't interested in the value of an argument, write `_` as the argument, which means "anything goes": ```cpp using ::testing::_; ... // Expects that the turtle jumps to somewhere on the x=50 line. EXPECT_CALL(turtle, GoTo(50, _)); ``` `_` is an instance of what we call **matchers**. A matcher is like a predicate and can test whether an argument is what we'd expect. You can use a matcher inside `EXPECT_CALL()` wherever a function argument is expected. `_` is a convenient way of saying "any value". In the above examples, `100` and `50` are also matchers; implicitly, they are the same as `Eq(100)` and `Eq(50)`, which specify that the argument must be equal (using `operator==`) to the matcher argument. There are many [built-in matchers](#MatcherList) for common types (as well as [custom matchers](cook_book.md#NewMatchers)); for example: ```cpp using ::testing::Ge; ... // Expects the turtle moves forward by at least 100. EXPECT_CALL(turtle, Forward(Ge(100))); ``` If you don't care about *any* arguments, rather than specify `_` for each of them you may instead omit the parameter list: ```cpp // Expects the turtle to move forward. EXPECT_CALL(turtle, Forward); // Expects the turtle to jump somewhere. EXPECT_CALL(turtle, GoTo); ``` This works for all non-overloaded methods; if a method is overloaded, you need to help gMock resolve which overload is expected by specifying the number of arguments and possibly also the [types of the arguments](cook_book.md#SelectOverload). #### Cardinalities: How Many Times Will It Be Called? The first clause we can specify following an `EXPECT_CALL()` is `Times()`. We call its argument a **cardinality** as it tells *how many times* the call should occur. It allows us to repeat an expectation many times without actually writing it as many times. More importantly, a cardinality can be "fuzzy", just like a matcher can be. This allows a user to express the intent of a test exactly. An interesting special case is when we say `Times(0)`. You may have guessed - it means that the function shouldn't be called with the given arguments at all, and gMock will report a googletest failure whenever the function is (wrongfully) called. We've seen `AtLeast(n)` as an example of fuzzy cardinalities earlier. For the list of built-in cardinalities you can use, see [here](cheat_sheet.md#CardinalityList). The `Times()` clause can be omitted. **If you omit `Times()`, gMock will infer the cardinality for you.** The rules are easy to remember: * If **neither** `WillOnce()` **nor** `WillRepeatedly()` is in the `EXPECT_CALL()`, the inferred cardinality is `Times(1)`. * If there are *n* `WillOnce()`'s but **no** `WillRepeatedly()`, where *n* >= 1, the cardinality is `Times(n)`. * If there are *n* `WillOnce()`'s and **one** `WillRepeatedly()`, where *n* >= 0, the cardinality is `Times(AtLeast(n))`. **Quick quiz:** what do you think will happen if a function is expected to be called twice but actually called four times? #### Actions: What Should It Do? Remember that a mock object doesn't really have a working implementation? We as users have to tell it what to do when a method is invoked. This is easy in gMock. First, if the return type of a mock function is a built-in type or a pointer, the function has a **default action** (a `void` function will just return, a `bool` function will return `false`, and other functions will return 0). In addition, in C++ 11 and above, a mock function whose return type is default-constructible (i.e. has a default constructor) has a default action of returning a default-constructed value. If you don't say anything, this behavior will be used. Second, if a mock function doesn't have a default action, or the default action doesn't suit you, you can specify the action to be taken each time the expectation matches using a series of `WillOnce()` clauses followed by an optional `WillRepeatedly()`. For example, ```cpp using ::testing::Return; ... EXPECT_CALL(turtle, GetX()) .WillOnce(Return(100)) .WillOnce(Return(200)) .WillOnce(Return(300)); ``` says that `turtle.GetX()` will be called *exactly three times* (gMock inferred this from how many `WillOnce()` clauses we've written, since we didn't explicitly write `Times()`), and will return 100, 200, and 300 respectively. ```cpp using ::testing::Return; ... EXPECT_CALL(turtle, GetY()) .WillOnce(Return(100)) .WillOnce(Return(200)) .WillRepeatedly(Return(300)); ``` says that `turtle.GetY()` will be called *at least twice* (gMock knows this as we've written two `WillOnce()` clauses and a `WillRepeatedly()` while having no explicit `Times()`), will return 100 and 200 respectively the first two times, and 300 from the third time on. Of course, if you explicitly write a `Times()`, gMock will not try to infer the cardinality itself. What if the number you specified is larger than there are `WillOnce()` clauses? Well, after all `WillOnce()`s are used up, gMock will do the *default* action for the function every time (unless, of course, you have a `WillRepeatedly()`.). What can we do inside `WillOnce()` besides `Return()`? You can return a reference using `ReturnRef(*variable*)`, or invoke a pre-defined function, among [others](cook_book.md#using-actions). **Important note:** The `EXPECT_CALL()` statement evaluates the action clause only once, even though the action may be performed many times. Therefore you must be careful about side effects. The following may not do what you want: ```cpp using ::testing::Return; ... int n = 100; EXPECT_CALL(turtle, GetX()) .Times(4) .WillRepeatedly(Return(n++)); ``` Instead of returning 100, 101, 102, ..., consecutively, this mock function will always return 100 as `n++` is only evaluated once. Similarly, `Return(new Foo)` will create a new `Foo` object when the `EXPECT_CALL()` is executed, and will return the same pointer every time. If you want the side effect to happen every time, you need to define a custom action, which we'll teach in the [cook book](http://). Time for another quiz! What do you think the following means? ```cpp using ::testing::Return; ... EXPECT_CALL(turtle, GetY()) .Times(4) .WillOnce(Return(100)); ``` Obviously `turtle.GetY()` is expected to be called four times. But if you think it will return 100 every time, think twice! Remember that one `WillOnce()` clause will be consumed each time the function is invoked and the default action will be taken afterwards. So the right answer is that `turtle.GetY()` will return 100 the first time, but **return 0 from the second time on**, as returning 0 is the default action for `int` functions. #### Using Multiple Expectations {#MultiExpectations} So far we've only shown examples where you have a single expectation. More realistically, you'll specify expectations on multiple mock methods which may be from multiple mock objects. By default, when a mock method is invoked, gMock will search the expectations in the **reverse order** they are defined, and stop when an active expectation that matches the arguments is found (you can think of it as "newer rules override older ones."). If the matching expectation cannot take any more calls, you will get an upper-bound-violated failure. Here's an example: ```cpp using ::testing::_; ... EXPECT_CALL(turtle, Forward(_)); // #1 EXPECT_CALL(turtle, Forward(10)) // #2 .Times(2); ``` If `Forward(10)` is called three times in a row, the third time it will be an error, as the last matching expectation (#2) has been saturated. If, however, the third `Forward(10)` call is replaced by `Forward(20)`, then it would be OK, as now #1 will be the matching expectation. **Note:** Why does gMock search for a match in the *reverse* order of the expectations? The reason is that this allows a user to set up the default expectations in a mock object's constructor or the test fixture's set-up phase and then customize the mock by writing more specific expectations in the test body. So, if you have two expectations on the same method, you want to put the one with more specific matchers **after** the other, or the more specific rule would be shadowed by the more general one that comes after it. **Tip:** It is very common to start with a catch-all expectation for a method and `Times(AnyNumber())` (omitting arguments, or with `_` for all arguments, if overloaded). This makes any calls to the method expected. This is not necessary for methods that are not mentioned at all (these are "uninteresting"), but is useful for methods that have some expectations, but for which other calls are ok. See [Understanding Uninteresting vs Unexpected Calls](cook_book.md#uninteresting-vs-unexpected). #### Ordered vs Unordered Calls {#OrderedCalls} By default, an expectation can match a call even though an earlier expectation hasn't been satisfied. In other words, the calls don't have to occur in the order the expectations are specified. Sometimes, you may want all the expected calls to occur in a strict order. To say this in gMock is easy: ```cpp using ::testing::InSequence; ... TEST(FooTest, DrawsLineSegment) { ... { InSequence seq; EXPECT_CALL(turtle, PenDown()); EXPECT_CALL(turtle, Forward(100)); EXPECT_CALL(turtle, PenUp()); } Foo(); } ``` By creating an object of type `InSequence`, all expectations in its scope are put into a *sequence* and have to occur *sequentially*. Since we are just relying on the constructor and destructor of this object to do the actual work, its name is really irrelevant. In this example, we test that `Foo()` calls the three expected functions in the order as written. If a call is made out-of-order, it will be an error. (What if you care about the relative order of some of the calls, but not all of them? Can you specify an arbitrary partial order? The answer is ... yes! The details can be found [here](cook_book.md#OrderedCalls).) #### All Expectations Are Sticky (Unless Said Otherwise) {#StickyExpectations} Now let's do a quick quiz to see how well you can use this mock stuff already. How would you test that the turtle is asked to go to the origin *exactly twice* (you want to ignore any other instructions it receives)? After you've come up with your answer, take a look at ours and compare notes (solve it yourself first - don't cheat!): ```cpp using ::testing::_; using ::testing::AnyNumber; ... EXPECT_CALL(turtle, GoTo(_, _)) // #1 .Times(AnyNumber()); EXPECT_CALL(turtle, GoTo(0, 0)) // #2 .Times(2); ``` Suppose `turtle.GoTo(0, 0)` is called three times. In the third time, gMock will see that the arguments match expectation #2 (remember that we always pick the last matching expectation). Now, since we said that there should be only two such calls, gMock will report an error immediately. This is basically what we've told you in the [Using Multiple Expectations](#MultiExpectations) section above. This example shows that **expectations in gMock are "sticky" by default**, in the sense that they remain active even after we have reached their invocation upper bounds. This is an important rule to remember, as it affects the meaning of the spec, and is **different** to how it's done in many other mocking frameworks (Why'd we do that? Because we think our rule makes the common cases easier to express and understand.). Simple? Let's see if you've really understood it: what does the following code say? ```cpp using ::testing::Return; ... for (int i = n; i > 0; i--) { EXPECT_CALL(turtle, GetX()) .WillOnce(Return(10*i)); } ``` If you think it says that `turtle.GetX()` will be called `n` times and will return 10, 20, 30, ..., consecutively, think twice! The problem is that, as we said, expectations are sticky. So, the second time `turtle.GetX()` is called, the last (latest) `EXPECT_CALL()` statement will match, and will immediately lead to an "upper bound violated" error - this piece of code is not very useful! One correct way of saying that `turtle.GetX()` will return 10, 20, 30, ..., is to explicitly say that the expectations are *not* sticky. In other words, they should *retire* as soon as they are saturated: ```cpp using ::testing::Return; ... for (int i = n; i > 0; i--) { EXPECT_CALL(turtle, GetX()) .WillOnce(Return(10*i)) .RetiresOnSaturation(); } ``` And, there's a better way to do it: in this case, we expect the calls to occur in a specific order, and we line up the actions to match the order. Since the order is important here, we should make it explicit using a sequence: ```cpp using ::testing::InSequence; using ::testing::Return; ... { InSequence s; for (int i = 1; i <= n; i++) { EXPECT_CALL(turtle, GetX()) .WillOnce(Return(10*i)) .RetiresOnSaturation(); } } ``` By the way, the other situation where an expectation may *not* be sticky is when it's in a sequence - as soon as another expectation that comes after it in the sequence has been used, it automatically retires (and will never be used to match any call). #### Uninteresting Calls A mock object may have many methods, and not all of them are that interesting. For example, in some tests we may not care about how many times `GetX()` and `GetY()` get called. In gMock, if you are not interested in a method, just don't say anything about it. If a call to this method occurs, you'll see a warning in the test output, but it won't be a failure. This is called "naggy" behavior; to change, see [The Nice, the Strict, and the Naggy](cook_book.md#NiceStrictNaggy). LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/docs/gmock_faq.md000066400000000000000000000354131456444476200255610ustar00rootroot00000000000000## Legacy gMock FAQ {#GMockFaq} ### When I call a method on my mock object, the method for the real object is invoked instead. What's the problem? In order for a method to be mocked, it must be *virtual*, unless you use the [high-perf dependency injection technique](#MockingNonVirtualMethods). ### Can I mock a variadic function? You cannot mock a variadic function (i.e. a function taking ellipsis (`...`) arguments) directly in gMock. The problem is that in general, there is *no way* for a mock object to know how many arguments are passed to the variadic method, and what the arguments' types are. Only the *author of the base class* knows the protocol, and we cannot look into his or her head. Therefore, to mock such a function, the *user* must teach the mock object how to figure out the number of arguments and their types. One way to do it is to provide overloaded versions of the function. Ellipsis arguments are inherited from C and not really a C++ feature. They are unsafe to use and don't work with arguments that have constructors or destructors. Therefore we recommend to avoid them in C++ as much as possible. ### MSVC gives me warning C4301 or C4373 when I define a mock method with a const parameter. Why? If you compile this using Microsoft Visual C++ 2005 SP1: ```cpp class Foo { ... virtual void Bar(const int i) = 0; }; class MockFoo : public Foo { ... MOCK_METHOD(void, Bar, (const int i), (override)); }; ``` You may get the following warning: ```shell warning C4301: 'MockFoo::Bar': overriding virtual function only differs from 'Foo::Bar' by const/volatile qualifier ``` This is a MSVC bug. The same code compiles fine with gcc, for example. If you use Visual C++ 2008 SP1, you would get the warning: ```shell warning C4373: 'MockFoo::Bar': virtual function overrides 'Foo::Bar', previous versions of the compiler did not override when parameters only differed by const/volatile qualifiers ``` In C++, if you *declare* a function with a `const` parameter, the `const` modifier is ignored. Therefore, the `Foo` base class above is equivalent to: ```cpp class Foo { ... virtual void Bar(int i) = 0; // int or const int? Makes no difference. }; ``` In fact, you can *declare* `Bar()` with an `int` parameter, and define it with a `const int` parameter. The compiler will still match them up. Since making a parameter `const` is meaningless in the method declaration, we recommend to remove it in both `Foo` and `MockFoo`. That should workaround the VC bug. Note that we are talking about the *top-level* `const` modifier here. If the function parameter is passed by pointer or reference, declaring the pointee or referee as `const` is still meaningful. For example, the following two declarations are *not* equivalent: ```cpp void Bar(int* p); // Neither p nor *p is const. void Bar(const int* p); // p is not const, but *p is. ``` ### I can't figure out why gMock thinks my expectations are not satisfied. What should I do? You might want to run your test with `--gmock_verbose=info`. This flag lets gMock print a trace of every mock function call it receives. By studying the trace, you'll gain insights on why the expectations you set are not met. If you see the message "The mock function has no default action set, and its return type has no default value set.", then try [adding a default action](for_dummies.md#DefaultValue). Due to a known issue, unexpected calls on mocks without default actions don't print out a detailed comparison between the actual arguments and the expected arguments. ### My program crashed and `ScopedMockLog` spit out tons of messages. Is it a gMock bug? gMock and `ScopedMockLog` are likely doing the right thing here. When a test crashes, the failure signal handler will try to log a lot of information (the stack trace, and the address map, for example). The messages are compounded if you have many threads with depth stacks. When `ScopedMockLog` intercepts these messages and finds that they don't match any expectations, it prints an error for each of them. You can learn to ignore the errors, or you can rewrite your expectations to make your test more robust, for example, by adding something like: ```cpp using ::testing::AnyNumber; using ::testing::Not; ... // Ignores any log not done by us. EXPECT_CALL(log, Log(_, Not(EndsWith("/my_file.cc")), _)) .Times(AnyNumber()); ``` ### How can I assert that a function is NEVER called? ```cpp using ::testing::_; ... EXPECT_CALL(foo, Bar(_)) .Times(0); ``` ### I have a failed test where gMock tells me TWICE that a particular expectation is not satisfied. Isn't this redundant? When gMock detects a failure, it prints relevant information (the mock function arguments, the state of relevant expectations, and etc) to help the user debug. If another failure is detected, gMock will do the same, including printing the state of relevant expectations. Sometimes an expectation's state didn't change between two failures, and you'll see the same description of the state twice. They are however *not* redundant, as they refer to *different points in time*. The fact they are the same *is* interesting information. ### I get a heapcheck failure when using a mock object, but using a real object is fine. What can be wrong? Does the class (hopefully a pure interface) you are mocking have a virtual destructor? Whenever you derive from a base class, make sure its destructor is virtual. Otherwise Bad Things will happen. Consider the following code: ```cpp class Base { public: // Not virtual, but should be. ~Base() { ... } ... }; class Derived : public Base { public: ... private: std::string value_; }; ... Base* p = new Derived; ... delete p; // Surprise! ~Base() will be called, but ~Derived() will not // - value_ is leaked. ``` By changing `~Base()` to virtual, `~Derived()` will be correctly called when `delete p` is executed, and the heap checker will be happy. ### The "newer expectations override older ones" rule makes writing expectations awkward. Why does gMock do that? When people complain about this, often they are referring to code like: ```cpp using ::testing::Return; ... // foo.Bar() should be called twice, return 1 the first time, and return // 2 the second time. However, I have to write the expectations in the // reverse order. This sucks big time!!! EXPECT_CALL(foo, Bar()) .WillOnce(Return(2)) .RetiresOnSaturation(); EXPECT_CALL(foo, Bar()) .WillOnce(Return(1)) .RetiresOnSaturation(); ``` The problem, is that they didn't pick the **best** way to express the test's intent. By default, expectations don't have to be matched in *any* particular order. If you want them to match in a certain order, you need to be explicit. This is gMock's (and jMock's) fundamental philosophy: it's easy to accidentally over-specify your tests, and we want to make it harder to do so. There are two better ways to write the test spec. You could either put the expectations in sequence: ```cpp using ::testing::Return; ... // foo.Bar() should be called twice, return 1 the first time, and return // 2 the second time. Using a sequence, we can write the expectations // in their natural order. { InSequence s; EXPECT_CALL(foo, Bar()) .WillOnce(Return(1)) .RetiresOnSaturation(); EXPECT_CALL(foo, Bar()) .WillOnce(Return(2)) .RetiresOnSaturation(); } ``` or you can put the sequence of actions in the same expectation: ```cpp using ::testing::Return; ... // foo.Bar() should be called twice, return 1 the first time, and return // 2 the second time. EXPECT_CALL(foo, Bar()) .WillOnce(Return(1)) .WillOnce(Return(2)) .RetiresOnSaturation(); ``` Back to the original questions: why does gMock search the expectations (and `ON_CALL`s) from back to front? Because this allows a user to set up a mock's behavior for the common case early (e.g. in the mock's constructor or the test fixture's set-up phase) and customize it with more specific rules later. If gMock searches from front to back, this very useful pattern won't be possible. ### gMock prints a warning when a function without EXPECT_CALL is called, even if I have set its behavior using ON_CALL. Would it be reasonable not to show the warning in this case? When choosing between being neat and being safe, we lean toward the latter. So the answer is that we think it's better to show the warning. Often people write `ON_CALL`s in the mock object's constructor or `SetUp()`, as the default behavior rarely changes from test to test. Then in the test body they set the expectations, which are often different for each test. Having an `ON_CALL` in the set-up part of a test doesn't mean that the calls are expected. If there's no `EXPECT_CALL` and the method is called, it's possibly an error. If we quietly let the call go through without notifying the user, bugs may creep in unnoticed. If, however, you are sure that the calls are OK, you can write ```cpp using ::testing::_; ... EXPECT_CALL(foo, Bar(_)) .WillRepeatedly(...); ``` instead of ```cpp using ::testing::_; ... ON_CALL(foo, Bar(_)) .WillByDefault(...); ``` This tells gMock that you do expect the calls and no warning should be printed. Also, you can control the verbosity by specifying `--gmock_verbose=error`. Other values are `info` and `warning`. If you find the output too noisy when debugging, just choose a less verbose level. ### How can I delete the mock function's argument in an action? If your mock function takes a pointer argument and you want to delete that argument, you can use testing::DeleteArg() to delete the N'th (zero-indexed) argument: ```cpp using ::testing::_; ... MOCK_METHOD(void, Bar, (X* x, const Y& y)); ... EXPECT_CALL(mock_foo_, Bar(_, _)) .WillOnce(testing::DeleteArg<0>())); ``` ### How can I perform an arbitrary action on a mock function's argument? If you find yourself needing to perform some action that's not supported by gMock directly, remember that you can define your own actions using [`MakeAction()`](#NewMonoActions) or [`MakePolymorphicAction()`](#NewPolyActions), or you can write a stub function and invoke it using [`Invoke()`](#FunctionsAsActions). ```cpp using ::testing::_; using ::testing::Invoke; ... MOCK_METHOD(void, Bar, (X* p)); ... EXPECT_CALL(mock_foo_, Bar(_)) .WillOnce(Invoke(MyAction(...))); ``` ### My code calls a static/global function. Can I mock it? You can, but you need to make some changes. In general, if you find yourself needing to mock a static function, it's a sign that your modules are too tightly coupled (and less flexible, less reusable, less testable, etc). You are probably better off defining a small interface and call the function through that interface, which then can be easily mocked. It's a bit of work initially, but usually pays for itself quickly. This Google Testing Blog [post](https://testing.googleblog.com/2008/06/defeat-static-cling.html) says it excellently. Check it out. ### My mock object needs to do complex stuff. It's a lot of pain to specify the actions. gMock sucks! I know it's not a question, but you get an answer for free any way. :-) With gMock, you can create mocks in C++ easily. And people might be tempted to use them everywhere. Sometimes they work great, and sometimes you may find them, well, a pain to use. So, what's wrong in the latter case? When you write a test without using mocks, you exercise the code and assert that it returns the correct value or that the system is in an expected state. This is sometimes called "state-based testing". Mocks are great for what some call "interaction-based" testing: instead of checking the system state at the very end, mock objects verify that they are invoked the right way and report an error as soon as it arises, giving you a handle on the precise context in which the error was triggered. This is often more effective and economical to do than state-based testing. If you are doing state-based testing and using a test double just to simulate the real object, you are probably better off using a fake. Using a mock in this case causes pain, as it's not a strong point for mocks to perform complex actions. If you experience this and think that mocks suck, you are just not using the right tool for your problem. Or, you might be trying to solve the wrong problem. :-) ### I got a warning "Uninteresting function call encountered - default action taken.." Should I panic? By all means, NO! It's just an FYI. :-) What it means is that you have a mock function, you haven't set any expectations on it (by gMock's rule this means that you are not interested in calls to this function and therefore it can be called any number of times), and it is called. That's OK - you didn't say it's not OK to call the function! What if you actually meant to disallow this function to be called, but forgot to write `EXPECT_CALL(foo, Bar()).Times(0)`? While one can argue that it's the user's fault, gMock tries to be nice and prints you a note. So, when you see the message and believe that there shouldn't be any uninteresting calls, you should investigate what's going on. To make your life easier, gMock dumps the stack trace when an uninteresting call is encountered. From that you can figure out which mock function it is, and how it is called. ### I want to define a custom action. Should I use Invoke() or implement the ActionInterface interface? Either way is fine - you want to choose the one that's more convenient for your circumstance. Usually, if your action is for a particular function type, defining it using `Invoke()` should be easier; if your action can be used in functions of different types (e.g. if you are defining `Return(*value*)`), `MakePolymorphicAction()` is easiest. Sometimes you want precise control on what types of functions the action can be used in, and implementing `ActionInterface` is the way to go here. See the implementation of `Return()` in `testing/base/public/gmock-actions.h` for an example. ### I use SetArgPointee() in WillOnce(), but gcc complains about "conflicting return type specified". What does it mean? You got this error as gMock has no idea what value it should return when the mock method is called. `SetArgPointee()` says what the side effect is, but doesn't say what the return value should be. You need `DoAll()` to chain a `SetArgPointee()` with a `Return()` that provides a value appropriate to the API being mocked. See this [recipe](cook_book.md#mocking-side-effects) for more details and an example. ### I have a huge mock class, and Microsoft Visual C++ runs out of memory when compiling it. What can I do? We've noticed that when the `/clr` compiler flag is used, Visual C++ uses 5~6 times as much memory when compiling a mock class. We suggest to avoid `/clr` when compiling native C++ mocks. LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/include/000077500000000000000000000000001456444476200237755ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/include/gmock/000077500000000000000000000000001456444476200250755ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/include/gmock/gmock-actions.h000066400000000000000000001154321456444476200300120ustar00rootroot00000000000000// Copyright 2007, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Google Mock - a framework for writing C++ mock classes. // // This file implements some commonly used actions. // GOOGLETEST_CM0002 DO NOT DELETE #ifndef GMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_ #define GMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_ #ifndef _WIN32_WCE # include #endif #include #include #include #include #include #include #include "gmock/internal/gmock-internal-utils.h" #include "gmock/internal/gmock-port.h" #ifdef _MSC_VER # pragma warning(push) # pragma warning(disable:4100) #endif namespace testing { // To implement an action Foo, define: // 1. a class FooAction that implements the ActionInterface interface, and // 2. a factory function that creates an Action object from a // const FooAction*. // // The two-level delegation design follows that of Matcher, providing // consistency for extension developers. It also eases ownership // management as Action objects can now be copied like plain values. namespace internal { // BuiltInDefaultValueGetter::Get() returns a // default-constructed T value. BuiltInDefaultValueGetter::Get() crashes with an error. // // This primary template is used when kDefaultConstructible is true. template struct BuiltInDefaultValueGetter { static T Get() { return T(); } }; template struct BuiltInDefaultValueGetter { static T Get() { Assert(false, __FILE__, __LINE__, "Default action undefined for the function return type."); return internal::Invalid(); // The above statement will never be reached, but is required in // order for this function to compile. } }; // BuiltInDefaultValue::Get() returns the "built-in" default value // for type T, which is NULL when T is a raw pointer type, 0 when T is // a numeric type, false when T is bool, or "" when T is string or // std::string. In addition, in C++11 and above, it turns a // default-constructed T value if T is default constructible. For any // other type T, the built-in default T value is undefined, and the // function will abort the process. template class BuiltInDefaultValue { public: // This function returns true if and only if type T has a built-in default // value. static bool Exists() { return ::std::is_default_constructible::value; } static T Get() { return BuiltInDefaultValueGetter< T, ::std::is_default_constructible::value>::Get(); } }; // This partial specialization says that we use the same built-in // default value for T and const T. template class BuiltInDefaultValue { public: static bool Exists() { return BuiltInDefaultValue::Exists(); } static T Get() { return BuiltInDefaultValue::Get(); } }; // This partial specialization defines the default values for pointer // types. template class BuiltInDefaultValue { public: static bool Exists() { return true; } static T* Get() { return nullptr; } }; // The following specializations define the default values for // specific types we care about. #define GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(type, value) \ template <> \ class BuiltInDefaultValue { \ public: \ static bool Exists() { return true; } \ static type Get() { return value; } \ } GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(void, ); // NOLINT GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(::std::string, ""); GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(bool, false); GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned char, '\0'); GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed char, '\0'); GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(char, '\0'); // There's no need for a default action for signed wchar_t, as that // type is the same as wchar_t for gcc, and invalid for MSVC. // // There's also no need for a default action for unsigned wchar_t, as // that type is the same as unsigned int for gcc, and invalid for // MSVC. #if GMOCK_WCHAR_T_IS_NATIVE_ GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(wchar_t, 0U); // NOLINT #endif GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned short, 0U); // NOLINT GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed short, 0); // NOLINT GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned int, 0U); GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed int, 0); GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned long, 0UL); // NOLINT GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed long, 0L); // NOLINT GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(UInt64, 0); GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(Int64, 0); GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(float, 0); GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(double, 0); #undef GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_ } // namespace internal // When an unexpected function call is encountered, Google Mock will // let it return a default value if the user has specified one for its // return type, or if the return type has a built-in default value; // otherwise Google Mock won't know what value to return and will have // to abort the process. // // The DefaultValue class allows a user to specify the // default value for a type T that is both copyable and publicly // destructible (i.e. anything that can be used as a function return // type). The usage is: // // // Sets the default value for type T to be foo. // DefaultValue::Set(foo); template class DefaultValue { public: // Sets the default value for type T; requires T to be // copy-constructable and have a public destructor. static void Set(T x) { delete producer_; producer_ = new FixedValueProducer(x); } // Provides a factory function to be called to generate the default value. // This method can be used even if T is only move-constructible, but it is not // limited to that case. typedef T (*FactoryFunction)(); static void SetFactory(FactoryFunction factory) { delete producer_; producer_ = new FactoryValueProducer(factory); } // Unsets the default value for type T. static void Clear() { delete producer_; producer_ = nullptr; } // Returns true if and only if the user has set the default value for type T. static bool IsSet() { return producer_ != nullptr; } // Returns true if T has a default return value set by the user or there // exists a built-in default value. static bool Exists() { return IsSet() || internal::BuiltInDefaultValue::Exists(); } // Returns the default value for type T if the user has set one; // otherwise returns the built-in default value. Requires that Exists() // is true, which ensures that the return value is well-defined. static T Get() { return producer_ == nullptr ? internal::BuiltInDefaultValue::Get() : producer_->Produce(); } private: class ValueProducer { public: virtual ~ValueProducer() {} virtual T Produce() = 0; }; class FixedValueProducer : public ValueProducer { public: explicit FixedValueProducer(T value) : value_(value) {} T Produce() override { return value_; } private: const T value_; GTEST_DISALLOW_COPY_AND_ASSIGN_(FixedValueProducer); }; class FactoryValueProducer : public ValueProducer { public: explicit FactoryValueProducer(FactoryFunction factory) : factory_(factory) {} T Produce() override { return factory_(); } private: const FactoryFunction factory_; GTEST_DISALLOW_COPY_AND_ASSIGN_(FactoryValueProducer); }; static ValueProducer* producer_; }; // This partial specialization allows a user to set default values for // reference types. template class DefaultValue { public: // Sets the default value for type T&. static void Set(T& x) { // NOLINT address_ = &x; } // Unsets the default value for type T&. static void Clear() { address_ = nullptr; } // Returns true if and only if the user has set the default value for type T&. static bool IsSet() { return address_ != nullptr; } // Returns true if T has a default return value set by the user or there // exists a built-in default value. static bool Exists() { return IsSet() || internal::BuiltInDefaultValue::Exists(); } // Returns the default value for type T& if the user has set one; // otherwise returns the built-in default value if there is one; // otherwise aborts the process. static T& Get() { return address_ == nullptr ? internal::BuiltInDefaultValue::Get() : *address_; } private: static T* address_; }; // This specialization allows DefaultValue::Get() to // compile. template <> class DefaultValue { public: static bool Exists() { return true; } static void Get() {} }; // Points to the user-set default value for type T. template typename DefaultValue::ValueProducer* DefaultValue::producer_ = nullptr; // Points to the user-set default value for type T&. template T* DefaultValue::address_ = nullptr; // Implement this interface to define an action for function type F. template class ActionInterface { public: typedef typename internal::Function::Result Result; typedef typename internal::Function::ArgumentTuple ArgumentTuple; ActionInterface() {} virtual ~ActionInterface() {} // Performs the action. This method is not const, as in general an // action can have side effects and be stateful. For example, a // get-the-next-element-from-the-collection action will need to // remember the current element. virtual Result Perform(const ArgumentTuple& args) = 0; private: GTEST_DISALLOW_COPY_AND_ASSIGN_(ActionInterface); }; // An Action is a copyable and IMMUTABLE (except by assignment) // object that represents an action to be taken when a mock function // of type F is called. The implementation of Action is just a // std::shared_ptr to const ActionInterface. Don't inherit from Action! // You can view an object implementing ActionInterface as a // concrete action (including its current state), and an Action // object as a handle to it. template class Action { // Adapter class to allow constructing Action from a legacy ActionInterface. // New code should create Actions from functors instead. struct ActionAdapter { // Adapter must be copyable to satisfy std::function requirements. ::std::shared_ptr> impl_; template typename internal::Function::Result operator()(Args&&... args) { return impl_->Perform( ::std::forward_as_tuple(::std::forward(args)...)); } }; public: typedef typename internal::Function::Result Result; typedef typename internal::Function::ArgumentTuple ArgumentTuple; // Constructs a null Action. Needed for storing Action objects in // STL containers. Action() {} // Construct an Action from a specified callable. // This cannot take std::function directly, because then Action would not be // directly constructible from lambda (it would require two conversions). template , G>::value>::type> Action(G&& fun) : fun_(::std::forward(fun)) {} // NOLINT // Constructs an Action from its implementation. explicit Action(ActionInterface* impl) : fun_(ActionAdapter{::std::shared_ptr>(impl)}) {} // This constructor allows us to turn an Action object into an // Action, as long as F's arguments can be implicitly converted // to Func's and Func's return type can be implicitly converted to F's. template explicit Action(const Action& action) : fun_(action.fun_) {} // Returns true if and only if this is the DoDefault() action. bool IsDoDefault() const { return fun_ == nullptr; } // Performs the action. Note that this method is const even though // the corresponding method in ActionInterface is not. The reason // is that a const Action means that it cannot be re-bound to // another concrete action, not that the concrete action it binds to // cannot change state. (Think of the difference between a const // pointer and a pointer to const.) Result Perform(ArgumentTuple args) const { if (IsDoDefault()) { internal::IllegalDoDefault(__FILE__, __LINE__); } return internal::Apply(fun_, ::std::move(args)); } private: template friend class Action; // fun_ is an empty function if and only if this is the DoDefault() action. ::std::function fun_; }; // The PolymorphicAction class template makes it easy to implement a // polymorphic action (i.e. an action that can be used in mock // functions of than one type, e.g. Return()). // // To define a polymorphic action, a user first provides a COPYABLE // implementation class that has a Perform() method template: // // class FooAction { // public: // template // Result Perform(const ArgumentTuple& args) const { // // Processes the arguments and returns a result, using // // std::get(args) to get the N-th (0-based) argument in the tuple. // } // ... // }; // // Then the user creates the polymorphic action using // MakePolymorphicAction(object) where object has type FooAction. See // the definition of Return(void) and SetArgumentPointee(value) for // complete examples. template class PolymorphicAction { public: explicit PolymorphicAction(const Impl& impl) : impl_(impl) {} template operator Action() const { return Action(new MonomorphicImpl(impl_)); } private: template class MonomorphicImpl : public ActionInterface { public: typedef typename internal::Function::Result Result; typedef typename internal::Function::ArgumentTuple ArgumentTuple; explicit MonomorphicImpl(const Impl& impl) : impl_(impl) {} Result Perform(const ArgumentTuple& args) override { return impl_.template Perform(args); } private: Impl impl_; GTEST_DISALLOW_ASSIGN_(MonomorphicImpl); }; Impl impl_; GTEST_DISALLOW_ASSIGN_(PolymorphicAction); }; // Creates an Action from its implementation and returns it. The // created Action object owns the implementation. template Action MakeAction(ActionInterface* impl) { return Action(impl); } // Creates a polymorphic action from its implementation. This is // easier to use than the PolymorphicAction constructor as it // doesn't require you to explicitly write the template argument, e.g. // // MakePolymorphicAction(foo); // vs // PolymorphicAction(foo); template inline PolymorphicAction MakePolymorphicAction(const Impl& impl) { return PolymorphicAction(impl); } namespace internal { // Helper struct to specialize ReturnAction to execute a move instead of a copy // on return. Useful for move-only types, but could be used on any type. template struct ByMoveWrapper { explicit ByMoveWrapper(T value) : payload(std::move(value)) {} T payload; }; // Implements the polymorphic Return(x) action, which can be used in // any function that returns the type of x, regardless of the argument // types. // // Note: The value passed into Return must be converted into // Function::Result when this action is cast to Action rather than // when that action is performed. This is important in scenarios like // // MOCK_METHOD1(Method, T(U)); // ... // { // Foo foo; // X x(&foo); // EXPECT_CALL(mock, Method(_)).WillOnce(Return(x)); // } // // In the example above the variable x holds reference to foo which leaves // scope and gets destroyed. If copying X just copies a reference to foo, // that copy will be left with a hanging reference. If conversion to T // makes a copy of foo, the above code is safe. To support that scenario, we // need to make sure that the type conversion happens inside the EXPECT_CALL // statement, and conversion of the result of Return to Action is a // good place for that. // // The real life example of the above scenario happens when an invocation // of gtl::Container() is passed into Return. // template class ReturnAction { public: // Constructs a ReturnAction object from the value to be returned. // 'value' is passed by value instead of by const reference in order // to allow Return("string literal") to compile. explicit ReturnAction(R value) : value_(new R(std::move(value))) {} // This template type conversion operator allows Return(x) to be // used in ANY function that returns x's type. template operator Action() const { // NOLINT // Assert statement belongs here because this is the best place to verify // conditions on F. It produces the clearest error messages // in most compilers. // Impl really belongs in this scope as a local class but can't // because MSVC produces duplicate symbols in different translation units // in this case. Until MS fixes that bug we put Impl into the class scope // and put the typedef both here (for use in assert statement) and // in the Impl class. But both definitions must be the same. typedef typename Function::Result Result; GTEST_COMPILE_ASSERT_( !std::is_reference::value, use_ReturnRef_instead_of_Return_to_return_a_reference); static_assert(!std::is_void::value, "Can't use Return() on an action expected to return `void`."); return Action(new Impl(value_)); } private: // Implements the Return(x) action for a particular function type F. template class Impl : public ActionInterface { public: typedef typename Function::Result Result; typedef typename Function::ArgumentTuple ArgumentTuple; // The implicit cast is necessary when Result has more than one // single-argument constructor (e.g. Result is std::vector) and R // has a type conversion operator template. In that case, value_(value) // won't compile as the compiler doesn't known which constructor of // Result to call. ImplicitCast_ forces the compiler to convert R to // Result without considering explicit constructors, thus resolving the // ambiguity. value_ is then initialized using its copy constructor. explicit Impl(const std::shared_ptr& value) : value_before_cast_(*value), value_(ImplicitCast_(value_before_cast_)) {} Result Perform(const ArgumentTuple&) override { return value_; } private: GTEST_COMPILE_ASSERT_(!std::is_reference::value, Result_cannot_be_a_reference_type); // We save the value before casting just in case it is being cast to a // wrapper type. R value_before_cast_; Result value_; GTEST_DISALLOW_COPY_AND_ASSIGN_(Impl); }; // Partially specialize for ByMoveWrapper. This version of ReturnAction will // move its contents instead. template class Impl, F> : public ActionInterface { public: typedef typename Function::Result Result; typedef typename Function::ArgumentTuple ArgumentTuple; explicit Impl(const std::shared_ptr& wrapper) : performed_(false), wrapper_(wrapper) {} Result Perform(const ArgumentTuple&) override { GTEST_CHECK_(!performed_) << "A ByMove() action should only be performed once."; performed_ = true; return std::move(wrapper_->payload); } private: bool performed_; const std::shared_ptr wrapper_; GTEST_DISALLOW_ASSIGN_(Impl); }; const std::shared_ptr value_; GTEST_DISALLOW_ASSIGN_(ReturnAction); }; // Implements the ReturnNull() action. class ReturnNullAction { public: // Allows ReturnNull() to be used in any pointer-returning function. In C++11 // this is enforced by returning nullptr, and in non-C++11 by asserting a // pointer type on compile time. template static Result Perform(const ArgumentTuple&) { return nullptr; } }; // Implements the Return() action. class ReturnVoidAction { public: // Allows Return() to be used in any void-returning function. template static void Perform(const ArgumentTuple&) { static_assert(std::is_void::value, "Result should be void."); } }; // Implements the polymorphic ReturnRef(x) action, which can be used // in any function that returns a reference to the type of x, // regardless of the argument types. template class ReturnRefAction { public: // Constructs a ReturnRefAction object from the reference to be returned. explicit ReturnRefAction(T& ref) : ref_(ref) {} // NOLINT // This template type conversion operator allows ReturnRef(x) to be // used in ANY function that returns a reference to x's type. template operator Action() const { typedef typename Function::Result Result; // Asserts that the function return type is a reference. This // catches the user error of using ReturnRef(x) when Return(x) // should be used, and generates some helpful error message. GTEST_COMPILE_ASSERT_(std::is_reference::value, use_Return_instead_of_ReturnRef_to_return_a_value); return Action(new Impl(ref_)); } private: // Implements the ReturnRef(x) action for a particular function type F. template class Impl : public ActionInterface { public: typedef typename Function::Result Result; typedef typename Function::ArgumentTuple ArgumentTuple; explicit Impl(T& ref) : ref_(ref) {} // NOLINT Result Perform(const ArgumentTuple&) override { return ref_; } private: T& ref_; GTEST_DISALLOW_ASSIGN_(Impl); }; T& ref_; GTEST_DISALLOW_ASSIGN_(ReturnRefAction); }; // Implements the polymorphic ReturnRefOfCopy(x) action, which can be // used in any function that returns a reference to the type of x, // regardless of the argument types. template class ReturnRefOfCopyAction { public: // Constructs a ReturnRefOfCopyAction object from the reference to // be returned. explicit ReturnRefOfCopyAction(const T& value) : value_(value) {} // NOLINT // This template type conversion operator allows ReturnRefOfCopy(x) to be // used in ANY function that returns a reference to x's type. template operator Action() const { typedef typename Function::Result Result; // Asserts that the function return type is a reference. This // catches the user error of using ReturnRefOfCopy(x) when Return(x) // should be used, and generates some helpful error message. GTEST_COMPILE_ASSERT_( std::is_reference::value, use_Return_instead_of_ReturnRefOfCopy_to_return_a_value); return Action(new Impl(value_)); } private: // Implements the ReturnRefOfCopy(x) action for a particular function type F. template class Impl : public ActionInterface { public: typedef typename Function::Result Result; typedef typename Function::ArgumentTuple ArgumentTuple; explicit Impl(const T& value) : value_(value) {} // NOLINT Result Perform(const ArgumentTuple&) override { return value_; } private: T value_; GTEST_DISALLOW_ASSIGN_(Impl); }; const T value_; GTEST_DISALLOW_ASSIGN_(ReturnRefOfCopyAction); }; // Implements the polymorphic DoDefault() action. class DoDefaultAction { public: // This template type conversion operator allows DoDefault() to be // used in any function. template operator Action() const { return Action(); } // NOLINT }; // Implements the Assign action to set a given pointer referent to a // particular value. template class AssignAction { public: AssignAction(T1* ptr, T2 value) : ptr_(ptr), value_(value) {} template void Perform(const ArgumentTuple& /* args */) const { *ptr_ = value_; } private: T1* const ptr_; const T2 value_; GTEST_DISALLOW_ASSIGN_(AssignAction); }; #if !GTEST_OS_WINDOWS_MOBILE // Implements the SetErrnoAndReturn action to simulate return from // various system calls and libc functions. template class SetErrnoAndReturnAction { public: SetErrnoAndReturnAction(int errno_value, T result) : errno_(errno_value), result_(result) {} template Result Perform(const ArgumentTuple& /* args */) const { errno = errno_; return result_; } private: const int errno_; const T result_; GTEST_DISALLOW_ASSIGN_(SetErrnoAndReturnAction); }; #endif // !GTEST_OS_WINDOWS_MOBILE // Implements the SetArgumentPointee(x) action for any function // whose N-th argument (0-based) is a pointer to x's type. template struct SetArgumentPointeeAction { A value; template void operator()(const Args&... args) const { *::std::get(std::tie(args...)) = value; } }; // Implements the Invoke(object_ptr, &Class::Method) action. template struct InvokeMethodAction { Class* const obj_ptr; const MethodPtr method_ptr; template auto operator()(Args&&... args) const -> decltype((obj_ptr->*method_ptr)(std::forward(args)...)) { return (obj_ptr->*method_ptr)(std::forward(args)...); } }; // Implements the InvokeWithoutArgs(f) action. The template argument // FunctionImpl is the implementation type of f, which can be either a // function pointer or a functor. InvokeWithoutArgs(f) can be used as an // Action as long as f's type is compatible with F. template struct InvokeWithoutArgsAction { FunctionImpl function_impl; // Allows InvokeWithoutArgs(f) to be used as any action whose type is // compatible with f. template auto operator()(const Args&...) -> decltype(function_impl()) { return function_impl(); } }; // Implements the InvokeWithoutArgs(object_ptr, &Class::Method) action. template struct InvokeMethodWithoutArgsAction { Class* const obj_ptr; const MethodPtr method_ptr; using ReturnType = typename std::result_of::type; template ReturnType operator()(const Args&...) const { return (obj_ptr->*method_ptr)(); } }; // Implements the IgnoreResult(action) action. template class IgnoreResultAction { public: explicit IgnoreResultAction(const A& action) : action_(action) {} template operator Action() const { // Assert statement belongs here because this is the best place to verify // conditions on F. It produces the clearest error messages // in most compilers. // Impl really belongs in this scope as a local class but can't // because MSVC produces duplicate symbols in different translation units // in this case. Until MS fixes that bug we put Impl into the class scope // and put the typedef both here (for use in assert statement) and // in the Impl class. But both definitions must be the same. typedef typename internal::Function::Result Result; // Asserts at compile time that F returns void. static_assert(std::is_void::value, "Result type should be void."); return Action(new Impl(action_)); } private: template class Impl : public ActionInterface { public: typedef typename internal::Function::Result Result; typedef typename internal::Function::ArgumentTuple ArgumentTuple; explicit Impl(const A& action) : action_(action) {} void Perform(const ArgumentTuple& args) override { // Performs the action and ignores its result. action_.Perform(args); } private: // Type OriginalFunction is the same as F except that its return // type is IgnoredValue. typedef typename internal::Function::MakeResultIgnoredValue OriginalFunction; const Action action_; GTEST_DISALLOW_ASSIGN_(Impl); }; const A action_; GTEST_DISALLOW_ASSIGN_(IgnoreResultAction); }; template struct WithArgsAction { InnerAction action; // The inner action could be anything convertible to Action. // We use the conversion operator to detect the signature of the inner Action. template operator Action() const { // NOLINT Action>::type...)> converted(action); return [converted](Args... args) -> R { return converted.Perform(std::forward_as_tuple( std::get(std::forward_as_tuple(std::forward(args)...))...)); }; } }; template struct DoAllAction { private: template std::vector> Convert(IndexSequence) const { return {std::get(actions)...}; } public: std::tuple actions; template operator Action() const { // NOLINT struct Op { std::vector> converted; Action last; R operator()(Args... args) const { auto tuple_args = std::forward_as_tuple(std::forward(args)...); for (auto& a : converted) { a.Perform(tuple_args); } return last.Perform(tuple_args); } }; return Op{Convert(MakeIndexSequence()), std::get(actions)}; } }; } // namespace internal // An Unused object can be implicitly constructed from ANY value. // This is handy when defining actions that ignore some or all of the // mock function arguments. For example, given // // MOCK_METHOD3(Foo, double(const string& label, double x, double y)); // MOCK_METHOD3(Bar, double(int index, double x, double y)); // // instead of // // double DistanceToOriginWithLabel(const string& label, double x, double y) { // return sqrt(x*x + y*y); // } // double DistanceToOriginWithIndex(int index, double x, double y) { // return sqrt(x*x + y*y); // } // ... // EXPECT_CALL(mock, Foo("abc", _, _)) // .WillOnce(Invoke(DistanceToOriginWithLabel)); // EXPECT_CALL(mock, Bar(5, _, _)) // .WillOnce(Invoke(DistanceToOriginWithIndex)); // // you could write // // // We can declare any uninteresting argument as Unused. // double DistanceToOrigin(Unused, double x, double y) { // return sqrt(x*x + y*y); // } // ... // EXPECT_CALL(mock, Foo("abc", _, _)).WillOnce(Invoke(DistanceToOrigin)); // EXPECT_CALL(mock, Bar(5, _, _)).WillOnce(Invoke(DistanceToOrigin)); typedef internal::IgnoredValue Unused; // Creates an action that does actions a1, a2, ..., sequentially in // each invocation. template internal::DoAllAction::type...> DoAll( Action&&... action) { return {std::forward_as_tuple(std::forward(action)...)}; } // WithArg(an_action) creates an action that passes the k-th // (0-based) argument of the mock function to an_action and performs // it. It adapts an action accepting one argument to one that accepts // multiple arguments. For convenience, we also provide // WithArgs(an_action) (defined below) as a synonym. template internal::WithArgsAction::type, k> WithArg(InnerAction&& action) { return {std::forward(action)}; } // WithArgs(an_action) creates an action that passes // the selected arguments of the mock function to an_action and // performs it. It serves as an adaptor between actions with // different argument lists. template internal::WithArgsAction::type, k, ks...> WithArgs(InnerAction&& action) { return {std::forward(action)}; } // WithoutArgs(inner_action) can be used in a mock function with a // non-empty argument list to perform inner_action, which takes no // argument. In other words, it adapts an action accepting no // argument to one that accepts (and ignores) arguments. template internal::WithArgsAction::type> WithoutArgs(InnerAction&& action) { return {std::forward(action)}; } // Creates an action that returns 'value'. 'value' is passed by value // instead of const reference - otherwise Return("string literal") // will trigger a compiler error about using array as initializer. template internal::ReturnAction Return(R value) { return internal::ReturnAction(std::move(value)); } // Creates an action that returns NULL. inline PolymorphicAction ReturnNull() { return MakePolymorphicAction(internal::ReturnNullAction()); } // Creates an action that returns from a void function. inline PolymorphicAction Return() { return MakePolymorphicAction(internal::ReturnVoidAction()); } // Creates an action that returns the reference to a variable. template inline internal::ReturnRefAction ReturnRef(R& x) { // NOLINT return internal::ReturnRefAction(x); } // Creates an action that returns the reference to a copy of the // argument. The copy is created when the action is constructed and // lives as long as the action. template inline internal::ReturnRefOfCopyAction ReturnRefOfCopy(const R& x) { return internal::ReturnRefOfCopyAction(x); } // Modifies the parent action (a Return() action) to perform a move of the // argument instead of a copy. // Return(ByMove()) actions can only be executed once and will assert this // invariant. template internal::ByMoveWrapper ByMove(R x) { return internal::ByMoveWrapper(std::move(x)); } // Creates an action that does the default action for the give mock function. inline internal::DoDefaultAction DoDefault() { return internal::DoDefaultAction(); } // Creates an action that sets the variable pointed by the N-th // (0-based) function argument to 'value'. template internal::SetArgumentPointeeAction SetArgPointee(T x) { return {std::move(x)}; } // The following version is DEPRECATED. template internal::SetArgumentPointeeAction SetArgumentPointee(T x) { return {std::move(x)}; } // Creates an action that sets a pointer referent to a given value. template PolymorphicAction > Assign(T1* ptr, T2 val) { return MakePolymorphicAction(internal::AssignAction(ptr, val)); } #if !GTEST_OS_WINDOWS_MOBILE // Creates an action that sets errno and returns the appropriate error. template PolymorphicAction > SetErrnoAndReturn(int errval, T result) { return MakePolymorphicAction( internal::SetErrnoAndReturnAction(errval, result)); } #endif // !GTEST_OS_WINDOWS_MOBILE // Various overloads for Invoke(). // Legacy function. // Actions can now be implicitly constructed from callables. No need to create // wrapper objects. // This function exists for backwards compatibility. template typename std::decay::type Invoke(FunctionImpl&& function_impl) { return std::forward(function_impl); } // Creates an action that invokes the given method on the given object // with the mock function's arguments. template internal::InvokeMethodAction Invoke(Class* obj_ptr, MethodPtr method_ptr) { return {obj_ptr, method_ptr}; } // Creates an action that invokes 'function_impl' with no argument. template internal::InvokeWithoutArgsAction::type> InvokeWithoutArgs(FunctionImpl function_impl) { return {std::move(function_impl)}; } // Creates an action that invokes the given method on the given object // with no argument. template internal::InvokeMethodWithoutArgsAction InvokeWithoutArgs( Class* obj_ptr, MethodPtr method_ptr) { return {obj_ptr, method_ptr}; } // Creates an action that performs an_action and throws away its // result. In other words, it changes the return type of an_action to // void. an_action MUST NOT return void, or the code won't compile. template inline internal::IgnoreResultAction IgnoreResult(const A& an_action) { return internal::IgnoreResultAction(an_action); } // Creates a reference wrapper for the given L-value. If necessary, // you can explicitly specify the type of the reference. For example, // suppose 'derived' is an object of type Derived, ByRef(derived) // would wrap a Derived&. If you want to wrap a const Base& instead, // where Base is a base class of Derived, just write: // // ByRef(derived) // // N.B. ByRef is redundant with std::ref, std::cref and std::reference_wrapper. // However, it may still be used for consistency with ByMove(). template inline ::std::reference_wrapper ByRef(T& l_value) { // NOLINT return ::std::reference_wrapper(l_value); } } // namespace testing #ifdef _MSC_VER # pragma warning(pop) #endif #endif // GMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_ LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/include/gmock/gmock-cardinalities.h000066400000000000000000000136251456444476200311660ustar00rootroot00000000000000// Copyright 2007, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Google Mock - a framework for writing C++ mock classes. // // This file implements some commonly used cardinalities. More // cardinalities can be defined by the user implementing the // CardinalityInterface interface if necessary. // GOOGLETEST_CM0002 DO NOT DELETE #ifndef GMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_ #define GMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_ #include #include #include // NOLINT #include "gmock/internal/gmock-port.h" #include "gtest/gtest.h" GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ /* class A needs to have dll-interface to be used by clients of class B */) namespace testing { // To implement a cardinality Foo, define: // 1. a class FooCardinality that implements the // CardinalityInterface interface, and // 2. a factory function that creates a Cardinality object from a // const FooCardinality*. // // The two-level delegation design follows that of Matcher, providing // consistency for extension developers. It also eases ownership // management as Cardinality objects can now be copied like plain values. // The implementation of a cardinality. class CardinalityInterface { public: virtual ~CardinalityInterface() {} // Conservative estimate on the lower/upper bound of the number of // calls allowed. virtual int ConservativeLowerBound() const { return 0; } virtual int ConservativeUpperBound() const { return INT_MAX; } // Returns true if and only if call_count calls will satisfy this // cardinality. virtual bool IsSatisfiedByCallCount(int call_count) const = 0; // Returns true if and only if call_count calls will saturate this // cardinality. virtual bool IsSaturatedByCallCount(int call_count) const = 0; // Describes self to an ostream. virtual void DescribeTo(::std::ostream* os) const = 0; }; // A Cardinality is a copyable and IMMUTABLE (except by assignment) // object that specifies how many times a mock function is expected to // be called. The implementation of Cardinality is just a std::shared_ptr // to const CardinalityInterface. Don't inherit from Cardinality! class GTEST_API_ Cardinality { public: // Constructs a null cardinality. Needed for storing Cardinality // objects in STL containers. Cardinality() {} // Constructs a Cardinality from its implementation. explicit Cardinality(const CardinalityInterface* impl) : impl_(impl) {} // Conservative estimate on the lower/upper bound of the number of // calls allowed. int ConservativeLowerBound() const { return impl_->ConservativeLowerBound(); } int ConservativeUpperBound() const { return impl_->ConservativeUpperBound(); } // Returns true if and only if call_count calls will satisfy this // cardinality. bool IsSatisfiedByCallCount(int call_count) const { return impl_->IsSatisfiedByCallCount(call_count); } // Returns true if and only if call_count calls will saturate this // cardinality. bool IsSaturatedByCallCount(int call_count) const { return impl_->IsSaturatedByCallCount(call_count); } // Returns true if and only if call_count calls will over-saturate this // cardinality, i.e. exceed the maximum number of allowed calls. bool IsOverSaturatedByCallCount(int call_count) const { return impl_->IsSaturatedByCallCount(call_count) && !impl_->IsSatisfiedByCallCount(call_count); } // Describes self to an ostream void DescribeTo(::std::ostream* os) const { impl_->DescribeTo(os); } // Describes the given actual call count to an ostream. static void DescribeActualCallCountTo(int actual_call_count, ::std::ostream* os); private: std::shared_ptr impl_; }; // Creates a cardinality that allows at least n calls. GTEST_API_ Cardinality AtLeast(int n); // Creates a cardinality that allows at most n calls. GTEST_API_ Cardinality AtMost(int n); // Creates a cardinality that allows any number of calls. GTEST_API_ Cardinality AnyNumber(); // Creates a cardinality that allows between min and max calls. GTEST_API_ Cardinality Between(int min, int max); // Creates a cardinality that allows exactly n calls. GTEST_API_ Cardinality Exactly(int n); // Creates a cardinality from its implementation. inline Cardinality MakeCardinality(const CardinalityInterface* c) { return Cardinality(c); } } // namespace testing GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 #endif // GMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_ LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/include/gmock/gmock-function-mocker.h000066400000000000000000000314171456444476200314550ustar00rootroot00000000000000// Copyright 2007, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Google Mock - a framework for writing C++ mock classes. // // This file implements MOCK_METHOD. // GOOGLETEST_CM0002 DO NOT DELETE #ifndef THIRD_PARTY_GOOGLETEST_GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_FUNCTION_MOCKER_H_ // NOLINT #define THIRD_PARTY_GOOGLETEST_GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_FUNCTION_MOCKER_H_ // NOLINT #include "gmock/gmock-generated-function-mockers.h" // NOLINT #include "gmock/internal/gmock-pp.h" #define MOCK_METHOD(...) \ GMOCK_PP_VARIADIC_CALL(GMOCK_INTERNAL_MOCK_METHOD_ARG_, __VA_ARGS__) #define GMOCK_INTERNAL_MOCK_METHOD_ARG_1(...) \ GMOCK_INTERNAL_WRONG_ARITY(__VA_ARGS__) #define GMOCK_INTERNAL_MOCK_METHOD_ARG_2(...) \ GMOCK_INTERNAL_WRONG_ARITY(__VA_ARGS__) #define GMOCK_INTERNAL_MOCK_METHOD_ARG_3(_Ret, _MethodName, _Args) \ GMOCK_INTERNAL_MOCK_METHOD_ARG_4(_Ret, _MethodName, _Args, ()) #define GMOCK_INTERNAL_MOCK_METHOD_ARG_4(_Ret, _MethodName, _Args, _Spec) \ GMOCK_INTERNAL_ASSERT_PARENTHESIS(_Args); \ GMOCK_INTERNAL_ASSERT_PARENTHESIS(_Spec); \ GMOCK_INTERNAL_ASSERT_VALID_SIGNATURE( \ GMOCK_PP_NARG0 _Args, GMOCK_INTERNAL_SIGNATURE(_Ret, _Args)); \ GMOCK_INTERNAL_ASSERT_VALID_SPEC(_Spec) \ GMOCK_INTERNAL_MOCK_METHOD_IMPL( \ GMOCK_PP_NARG0 _Args, _MethodName, GMOCK_INTERNAL_HAS_CONST(_Spec), \ GMOCK_INTERNAL_HAS_OVERRIDE(_Spec), GMOCK_INTERNAL_HAS_FINAL(_Spec), \ GMOCK_INTERNAL_HAS_NOEXCEPT(_Spec), GMOCK_INTERNAL_GET_CALLTYPE(_Spec), \ (GMOCK_INTERNAL_SIGNATURE(_Ret, _Args))) #define GMOCK_INTERNAL_MOCK_METHOD_ARG_5(...) \ GMOCK_INTERNAL_WRONG_ARITY(__VA_ARGS__) #define GMOCK_INTERNAL_MOCK_METHOD_ARG_6(...) \ GMOCK_INTERNAL_WRONG_ARITY(__VA_ARGS__) #define GMOCK_INTERNAL_MOCK_METHOD_ARG_7(...) \ GMOCK_INTERNAL_WRONG_ARITY(__VA_ARGS__) #define GMOCK_INTERNAL_WRONG_ARITY(...) \ static_assert( \ false, \ "MOCK_METHOD must be called with 3 or 4 arguments. _Ret, " \ "_MethodName, _Args and optionally _Spec. _Args and _Spec must be " \ "enclosed in parentheses. If _Ret is a type with unprotected commas, " \ "it must also be enclosed in parentheses.") #define GMOCK_INTERNAL_ASSERT_PARENTHESIS(_Tuple) \ static_assert( \ GMOCK_PP_IS_ENCLOSED_PARENS(_Tuple), \ GMOCK_PP_STRINGIZE(_Tuple) " should be enclosed in parentheses.") #define GMOCK_INTERNAL_ASSERT_VALID_SIGNATURE(_N, ...) \ static_assert( \ std::is_function<__VA_ARGS__>::value, \ "Signature must be a function type, maybe return type contains " \ "unprotected comma."); \ static_assert( \ ::testing::tuple_size::ArgumentTuple>::value == _N, \ "This method does not take " GMOCK_PP_STRINGIZE( \ _N) " arguments. Parenthesize all types with unproctected commas.") #define GMOCK_INTERNAL_ASSERT_VALID_SPEC(_Spec) \ GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_ASSERT_VALID_SPEC_ELEMENT, ~, _Spec) #define GMOCK_INTERNAL_MOCK_METHOD_IMPL(_N, _MethodName, _Constness, \ _Override, _Final, _Noexcept, \ _CallType, _Signature) \ typename ::testing::internal::Function::Result \ GMOCK_INTERNAL_EXPAND(_CallType) \ _MethodName(GMOCK_PP_REPEAT(GMOCK_INTERNAL_PARAMETER, _Signature, _N)) \ GMOCK_PP_IF(_Constness, const, ) GMOCK_PP_IF(_Noexcept, noexcept, ) \ GMOCK_PP_IF(_Override, override, ) \ GMOCK_PP_IF(_Final, final, ) { \ GMOCK_MOCKER_(_N, _Constness, _MethodName) \ .SetOwnerAndName(this, #_MethodName); \ return GMOCK_MOCKER_(_N, _Constness, _MethodName) \ .Invoke(GMOCK_PP_REPEAT(GMOCK_INTERNAL_FORWARD_ARG, _Signature, _N)); \ } \ ::testing::MockSpec gmock_##_MethodName( \ GMOCK_PP_REPEAT(GMOCK_INTERNAL_MATCHER_PARAMETER, _Signature, _N)) \ GMOCK_PP_IF(_Constness, const, ) { \ GMOCK_MOCKER_(_N, _Constness, _MethodName).RegisterOwner(this); \ return GMOCK_MOCKER_(_N, _Constness, _MethodName) \ .With(GMOCK_PP_REPEAT(GMOCK_INTERNAL_MATCHER_ARGUMENT, , _N)); \ } \ ::testing::MockSpec gmock_##_MethodName( \ const ::testing::internal::WithoutMatchers&, \ GMOCK_PP_IF(_Constness, const, )::testing::internal::Function< \ GMOCK_PP_REMOVE_PARENS(_Signature)>*) \ const GMOCK_PP_IF(_Noexcept, noexcept, ) { \ return GMOCK_PP_CAT(::testing::internal::AdjustConstness_, \ GMOCK_PP_IF(_Constness, const, ))(this) \ ->gmock_##_MethodName(GMOCK_PP_REPEAT( \ GMOCK_INTERNAL_A_MATCHER_ARGUMENT, _Signature, _N)); \ } \ mutable ::testing::FunctionMocker \ GMOCK_MOCKER_(_N, _Constness, _MethodName) #define GMOCK_INTERNAL_EXPAND(...) __VA_ARGS__ // Five Valid modifiers. #define GMOCK_INTERNAL_HAS_CONST(_Tuple) \ GMOCK_PP_HAS_COMMA(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_DETECT_CONST, ~, _Tuple)) #define GMOCK_INTERNAL_HAS_OVERRIDE(_Tuple) \ GMOCK_PP_HAS_COMMA( \ GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_DETECT_OVERRIDE, ~, _Tuple)) #define GMOCK_INTERNAL_HAS_FINAL(_Tuple) \ GMOCK_PP_HAS_COMMA(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_DETECT_FINAL, ~, _Tuple)) #define GMOCK_INTERNAL_HAS_NOEXCEPT(_Tuple) \ GMOCK_PP_HAS_COMMA( \ GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_DETECT_NOEXCEPT, ~, _Tuple)) #define GMOCK_INTERNAL_GET_CALLTYPE(_Tuple) \ GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_GET_CALLTYPE_IMPL, ~, _Tuple) #define GMOCK_INTERNAL_ASSERT_VALID_SPEC_ELEMENT(_i, _, _elem) \ static_assert( \ (GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_CONST(_i, _, _elem)) + \ GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_OVERRIDE(_i, _, _elem)) + \ GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_FINAL(_i, _, _elem)) + \ GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_NOEXCEPT(_i, _, _elem)) + \ GMOCK_INTERNAL_IS_CALLTYPE(_elem)) == 1, \ GMOCK_PP_STRINGIZE( \ _elem) " cannot be recognized as a valid specification modifier."); // Modifiers implementation. #define GMOCK_INTERNAL_DETECT_CONST(_i, _, _elem) \ GMOCK_PP_CAT(GMOCK_INTERNAL_DETECT_CONST_I_, _elem) #define GMOCK_INTERNAL_DETECT_CONST_I_const , #define GMOCK_INTERNAL_DETECT_OVERRIDE(_i, _, _elem) \ GMOCK_PP_CAT(GMOCK_INTERNAL_DETECT_OVERRIDE_I_, _elem) #define GMOCK_INTERNAL_DETECT_OVERRIDE_I_override , #define GMOCK_INTERNAL_DETECT_FINAL(_i, _, _elem) \ GMOCK_PP_CAT(GMOCK_INTERNAL_DETECT_FINAL_I_, _elem) #define GMOCK_INTERNAL_DETECT_FINAL_I_final , // TODO(iserna): Maybe noexcept should accept an argument here as well. #define GMOCK_INTERNAL_DETECT_NOEXCEPT(_i, _, _elem) \ GMOCK_PP_CAT(GMOCK_INTERNAL_DETECT_NOEXCEPT_I_, _elem) #define GMOCK_INTERNAL_DETECT_NOEXCEPT_I_noexcept , #define GMOCK_INTERNAL_GET_CALLTYPE_IMPL(_i, _, _elem) \ GMOCK_PP_IF(GMOCK_INTERNAL_IS_CALLTYPE(_elem), \ GMOCK_INTERNAL_GET_VALUE_CALLTYPE, GMOCK_PP_EMPTY) \ (_elem) // TODO(iserna): GMOCK_INTERNAL_IS_CALLTYPE and // GMOCK_INTERNAL_GET_VALUE_CALLTYPE needed more expansions to work on windows // maybe they can be simplified somehow. #define GMOCK_INTERNAL_IS_CALLTYPE(_arg) \ GMOCK_INTERNAL_IS_CALLTYPE_I( \ GMOCK_PP_CAT(GMOCK_INTERNAL_IS_CALLTYPE_HELPER_, _arg)) #define GMOCK_INTERNAL_IS_CALLTYPE_I(_arg) GMOCK_PP_IS_ENCLOSED_PARENS(_arg) #define GMOCK_INTERNAL_GET_VALUE_CALLTYPE(_arg) \ GMOCK_INTERNAL_GET_VALUE_CALLTYPE_I( \ GMOCK_PP_CAT(GMOCK_INTERNAL_IS_CALLTYPE_HELPER_, _arg)) #define GMOCK_INTERNAL_GET_VALUE_CALLTYPE_I(_arg) \ GMOCK_PP_CAT(GMOCK_PP_IDENTITY, _arg) #define GMOCK_INTERNAL_IS_CALLTYPE_HELPER_Calltype #define GMOCK_INTERNAL_SIGNATURE(_Ret, _Args) \ GMOCK_PP_IF(GMOCK_PP_IS_BEGIN_PARENS(_Ret), GMOCK_PP_REMOVE_PARENS, \ GMOCK_PP_IDENTITY) \ (_Ret)(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_GET_TYPE, _, _Args)) #define GMOCK_INTERNAL_GET_TYPE(_i, _, _elem) \ GMOCK_PP_COMMA_IF(_i) \ GMOCK_PP_IF(GMOCK_PP_IS_BEGIN_PARENS(_elem), GMOCK_PP_REMOVE_PARENS, \ GMOCK_PP_IDENTITY) \ (_elem) #define GMOCK_INTERNAL_PARAMETER(_i, _Signature, _) \ GMOCK_PP_COMMA_IF(_i) \ GMOCK_INTERNAL_ARG_O(typename, GMOCK_PP_INC(_i), \ GMOCK_PP_REMOVE_PARENS(_Signature)) \ gmock_a##_i #define GMOCK_INTERNAL_FORWARD_ARG(_i, _Signature, _) \ GMOCK_PP_COMMA_IF(_i) \ ::std::forward( \ gmock_a##_i) #define GMOCK_INTERNAL_MATCHER_PARAMETER(_i, _Signature, _) \ GMOCK_PP_COMMA_IF(_i) \ GMOCK_INTERNAL_MATCHER_O(typename, GMOCK_PP_INC(_i), \ GMOCK_PP_REMOVE_PARENS(_Signature)) \ gmock_a##_i #define GMOCK_INTERNAL_MATCHER_ARGUMENT(_i, _1, _2) \ GMOCK_PP_COMMA_IF(_i) \ gmock_a##_i #define GMOCK_INTERNAL_A_MATCHER_ARGUMENT(_i, _Signature, _) \ GMOCK_PP_COMMA_IF(_i) \ ::testing::A() #define GMOCK_INTERNAL_ARG_O(_tn, _i, ...) GMOCK_ARG_(_tn, _i, __VA_ARGS__) #define GMOCK_INTERNAL_MATCHER_O(_tn, _i, ...) \ GMOCK_MATCHER_(_tn, _i, __VA_ARGS__) #endif // THIRD_PARTY_GOOGLETEST_GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_FUNCTION_MOCKER_H_ LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/include/gmock/gmock-generated-actions.h000066400000000000000000002466511456444476200317560ustar00rootroot00000000000000// This file was GENERATED by command: // pump.py gmock-generated-actions.h.pump // DO NOT EDIT BY HAND!!! // Copyright 2007, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Google Mock - a framework for writing C++ mock classes. // // This file implements some commonly used variadic actions. // GOOGLETEST_CM0002 DO NOT DELETE #ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ #define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ #include #include #include "gmock/gmock-actions.h" #include "gmock/internal/gmock-port.h" namespace testing { namespace internal { // A macro from the ACTION* family (defined later in this file) // defines an action that can be used in a mock function. Typically, // these actions only care about a subset of the arguments of the mock // function. For example, if such an action only uses the second // argument, it can be used in any mock function that takes >= 2 // arguments where the type of the second argument is compatible. // // Therefore, the action implementation must be prepared to take more // arguments than it needs. The ExcessiveArg type is used to // represent those excessive arguments. In order to keep the compiler // error messages tractable, we define it in the testing namespace // instead of testing::internal. However, this is an INTERNAL TYPE // and subject to change without notice, so a user MUST NOT USE THIS // TYPE DIRECTLY. struct ExcessiveArg {}; // A helper class needed for implementing the ACTION* macros. template class ActionHelper { public: static Result Perform(Impl* impl, const ::std::tuple<>& args) { return impl->template gmock_PerformImpl<>(args, ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg()); } template static Result Perform(Impl* impl, const ::std::tuple& args) { return impl->template gmock_PerformImpl(args, std::get<0>(args), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg()); } template static Result Perform(Impl* impl, const ::std::tuple& args) { return impl->template gmock_PerformImpl(args, std::get<0>(args), std::get<1>(args), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg()); } template static Result Perform(Impl* impl, const ::std::tuple& args) { return impl->template gmock_PerformImpl(args, std::get<0>(args), std::get<1>(args), std::get<2>(args), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg()); } template static Result Perform(Impl* impl, const ::std::tuple& args) { return impl->template gmock_PerformImpl(args, std::get<0>(args), std::get<1>(args), std::get<2>(args), std::get<3>(args), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg()); } template static Result Perform(Impl* impl, const ::std::tuple& args) { return impl->template gmock_PerformImpl(args, std::get<0>(args), std::get<1>(args), std::get<2>(args), std::get<3>(args), std::get<4>(args), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg()); } template static Result Perform(Impl* impl, const ::std::tuple& args) { return impl->template gmock_PerformImpl(args, std::get<0>(args), std::get<1>(args), std::get<2>(args), std::get<3>(args), std::get<4>(args), std::get<5>(args), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg()); } template static Result Perform(Impl* impl, const ::std::tuple& args) { return impl->template gmock_PerformImpl(args, std::get<0>(args), std::get<1>(args), std::get<2>(args), std::get<3>(args), std::get<4>(args), std::get<5>(args), std::get<6>(args), ExcessiveArg(), ExcessiveArg(), ExcessiveArg()); } template static Result Perform(Impl* impl, const ::std::tuple& args) { return impl->template gmock_PerformImpl(args, std::get<0>(args), std::get<1>(args), std::get<2>(args), std::get<3>(args), std::get<4>(args), std::get<5>(args), std::get<6>(args), std::get<7>(args), ExcessiveArg(), ExcessiveArg()); } template static Result Perform(Impl* impl, const ::std::tuple& args) { return impl->template gmock_PerformImpl(args, std::get<0>(args), std::get<1>(args), std::get<2>(args), std::get<3>(args), std::get<4>(args), std::get<5>(args), std::get<6>(args), std::get<7>(args), std::get<8>(args), ExcessiveArg()); } template static Result Perform(Impl* impl, const ::std::tuple& args) { return impl->template gmock_PerformImpl(args, std::get<0>(args), std::get<1>(args), std::get<2>(args), std::get<3>(args), std::get<4>(args), std::get<5>(args), std::get<6>(args), std::get<7>(args), std::get<8>(args), std::get<9>(args)); } }; } // namespace internal } // namespace testing // The ACTION* family of macros can be used in a namespace scope to // define custom actions easily. The syntax: // // ACTION(name) { statements; } // // will define an action with the given name that executes the // statements. The value returned by the statements will be used as // the return value of the action. Inside the statements, you can // refer to the K-th (0-based) argument of the mock function by // 'argK', and refer to its type by 'argK_type'. For example: // // ACTION(IncrementArg1) { // arg1_type temp = arg1; // return ++(*temp); // } // // allows you to write // // ...WillOnce(IncrementArg1()); // // You can also refer to the entire argument tuple and its type by // 'args' and 'args_type', and refer to the mock function type and its // return type by 'function_type' and 'return_type'. // // Note that you don't need to specify the types of the mock function // arguments. However rest assured that your code is still type-safe: // you'll get a compiler error if *arg1 doesn't support the ++ // operator, or if the type of ++(*arg1) isn't compatible with the // mock function's return type, for example. // // Sometimes you'll want to parameterize the action. For that you can use // another macro: // // ACTION_P(name, param_name) { statements; } // // For example: // // ACTION_P(Add, n) { return arg0 + n; } // // will allow you to write: // // ...WillOnce(Add(5)); // // Note that you don't need to provide the type of the parameter // either. If you need to reference the type of a parameter named // 'foo', you can write 'foo_type'. For example, in the body of // ACTION_P(Add, n) above, you can write 'n_type' to refer to the type // of 'n'. // // We also provide ACTION_P2, ACTION_P3, ..., up to ACTION_P10 to support // multi-parameter actions. // // For the purpose of typing, you can view // // ACTION_Pk(Foo, p1, ..., pk) { ... } // // as shorthand for // // template // FooActionPk Foo(p1_type p1, ..., pk_type pk) { ... } // // In particular, you can provide the template type arguments // explicitly when invoking Foo(), as in Foo(5, false); // although usually you can rely on the compiler to infer the types // for you automatically. You can assign the result of expression // Foo(p1, ..., pk) to a variable of type FooActionPk. This can be useful when composing actions. // // You can also overload actions with different numbers of parameters: // // ACTION_P(Plus, a) { ... } // ACTION_P2(Plus, a, b) { ... } // // While it's tempting to always use the ACTION* macros when defining // a new action, you should also consider implementing ActionInterface // or using MakePolymorphicAction() instead, especially if you need to // use the action a lot. While these approaches require more work, // they give you more control on the types of the mock function // arguments and the action parameters, which in general leads to // better compiler error messages that pay off in the long run. They // also allow overloading actions based on parameter types (as opposed // to just based on the number of parameters). // // CAVEAT: // // ACTION*() can only be used in a namespace scope as templates cannot be // declared inside of a local class. // Users can, however, define any local functors (e.g. a lambda) that // can be used as actions. // // MORE INFORMATION: // // To learn more about using these macros, please search for 'ACTION' on // https://github.com/google/googletest/blob/master/googlemock/docs/cook_book.md // An internal macro needed for implementing ACTION*(). #define GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_\ const args_type& args GTEST_ATTRIBUTE_UNUSED_, \ const arg0_type& arg0 GTEST_ATTRIBUTE_UNUSED_, \ const arg1_type& arg1 GTEST_ATTRIBUTE_UNUSED_, \ const arg2_type& arg2 GTEST_ATTRIBUTE_UNUSED_, \ const arg3_type& arg3 GTEST_ATTRIBUTE_UNUSED_, \ const arg4_type& arg4 GTEST_ATTRIBUTE_UNUSED_, \ const arg5_type& arg5 GTEST_ATTRIBUTE_UNUSED_, \ const arg6_type& arg6 GTEST_ATTRIBUTE_UNUSED_, \ const arg7_type& arg7 GTEST_ATTRIBUTE_UNUSED_, \ const arg8_type& arg8 GTEST_ATTRIBUTE_UNUSED_, \ const arg9_type& arg9 GTEST_ATTRIBUTE_UNUSED_ // Sometimes you want to give an action explicit template parameters // that cannot be inferred from its value parameters. ACTION() and // ACTION_P*() don't support that. ACTION_TEMPLATE() remedies that // and can be viewed as an extension to ACTION() and ACTION_P*(). // // The syntax: // // ACTION_TEMPLATE(ActionName, // HAS_m_TEMPLATE_PARAMS(kind1, name1, ..., kind_m, name_m), // AND_n_VALUE_PARAMS(p1, ..., p_n)) { statements; } // // defines an action template that takes m explicit template // parameters and n value parameters. name_i is the name of the i-th // template parameter, and kind_i specifies whether it's a typename, // an integral constant, or a template. p_i is the name of the i-th // value parameter. // // Example: // // // DuplicateArg(output) converts the k-th argument of the mock // // function to type T and copies it to *output. // ACTION_TEMPLATE(DuplicateArg, // HAS_2_TEMPLATE_PARAMS(int, k, typename, T), // AND_1_VALUE_PARAMS(output)) { // *output = T(::std::get(args)); // } // ... // int n; // EXPECT_CALL(mock, Foo(_, _)) // .WillOnce(DuplicateArg<1, unsigned char>(&n)); // // To create an instance of an action template, write: // // ActionName(v1, ..., v_n) // // where the ts are the template arguments and the vs are the value // arguments. The value argument types are inferred by the compiler. // If you want to explicitly specify the value argument types, you can // provide additional template arguments: // // ActionName(v1, ..., v_n) // // where u_i is the desired type of v_i. // // ACTION_TEMPLATE and ACTION/ACTION_P* can be overloaded on the // number of value parameters, but not on the number of template // parameters. Without the restriction, the meaning of the following // is unclear: // // OverloadedAction(x); // // Are we using a single-template-parameter action where 'bool' refers // to the type of x, or are we using a two-template-parameter action // where the compiler is asked to infer the type of x? // // Implementation notes: // // GMOCK_INTERNAL_*_HAS_m_TEMPLATE_PARAMS and // GMOCK_INTERNAL_*_AND_n_VALUE_PARAMS are internal macros for // implementing ACTION_TEMPLATE. The main trick we use is to create // new macro invocations when expanding a macro. For example, we have // // #define ACTION_TEMPLATE(name, template_params, value_params) // ... GMOCK_INTERNAL_DECL_##template_params ... // // which causes ACTION_TEMPLATE(..., HAS_1_TEMPLATE_PARAMS(typename, T), ...) // to expand to // // ... GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS(typename, T) ... // // Since GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS is a macro, the // preprocessor will continue to expand it to // // ... typename T ... // // This technique conforms to the C++ standard and is portable. It // allows us to implement action templates using O(N) code, where N is // the maximum number of template/value parameters supported. Without // using it, we'd have to devote O(N^2) amount of code to implement all // combinations of m and n. // Declares the template parameters. #define GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS(kind0, name0) kind0 name0 #define GMOCK_INTERNAL_DECL_HAS_2_TEMPLATE_PARAMS(kind0, name0, kind1, \ name1) kind0 name0, kind1 name1 #define GMOCK_INTERNAL_DECL_HAS_3_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ kind2, name2) kind0 name0, kind1 name1, kind2 name2 #define GMOCK_INTERNAL_DECL_HAS_4_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ kind2, name2, kind3, name3) kind0 name0, kind1 name1, kind2 name2, \ kind3 name3 #define GMOCK_INTERNAL_DECL_HAS_5_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ kind2, name2, kind3, name3, kind4, name4) kind0 name0, kind1 name1, \ kind2 name2, kind3 name3, kind4 name4 #define GMOCK_INTERNAL_DECL_HAS_6_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ kind2, name2, kind3, name3, kind4, name4, kind5, name5) kind0 name0, \ kind1 name1, kind2 name2, kind3 name3, kind4 name4, kind5 name5 #define GMOCK_INTERNAL_DECL_HAS_7_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, \ name6) kind0 name0, kind1 name1, kind2 name2, kind3 name3, kind4 name4, \ kind5 name5, kind6 name6 #define GMOCK_INTERNAL_DECL_HAS_8_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, name6, \ kind7, name7) kind0 name0, kind1 name1, kind2 name2, kind3 name3, \ kind4 name4, kind5 name5, kind6 name6, kind7 name7 #define GMOCK_INTERNAL_DECL_HAS_9_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, name6, \ kind7, name7, kind8, name8) kind0 name0, kind1 name1, kind2 name2, \ kind3 name3, kind4 name4, kind5 name5, kind6 name6, kind7 name7, \ kind8 name8 #define GMOCK_INTERNAL_DECL_HAS_10_TEMPLATE_PARAMS(kind0, name0, kind1, \ name1, kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, \ name6, kind7, name7, kind8, name8, kind9, name9) kind0 name0, \ kind1 name1, kind2 name2, kind3 name3, kind4 name4, kind5 name5, \ kind6 name6, kind7 name7, kind8 name8, kind9 name9 // Lists the template parameters. #define GMOCK_INTERNAL_LIST_HAS_1_TEMPLATE_PARAMS(kind0, name0) name0 #define GMOCK_INTERNAL_LIST_HAS_2_TEMPLATE_PARAMS(kind0, name0, kind1, \ name1) name0, name1 #define GMOCK_INTERNAL_LIST_HAS_3_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ kind2, name2) name0, name1, name2 #define GMOCK_INTERNAL_LIST_HAS_4_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ kind2, name2, kind3, name3) name0, name1, name2, name3 #define GMOCK_INTERNAL_LIST_HAS_5_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ kind2, name2, kind3, name3, kind4, name4) name0, name1, name2, name3, \ name4 #define GMOCK_INTERNAL_LIST_HAS_6_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ kind2, name2, kind3, name3, kind4, name4, kind5, name5) name0, name1, \ name2, name3, name4, name5 #define GMOCK_INTERNAL_LIST_HAS_7_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, \ name6) name0, name1, name2, name3, name4, name5, name6 #define GMOCK_INTERNAL_LIST_HAS_8_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, name6, \ kind7, name7) name0, name1, name2, name3, name4, name5, name6, name7 #define GMOCK_INTERNAL_LIST_HAS_9_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, name6, \ kind7, name7, kind8, name8) name0, name1, name2, name3, name4, name5, \ name6, name7, name8 #define GMOCK_INTERNAL_LIST_HAS_10_TEMPLATE_PARAMS(kind0, name0, kind1, \ name1, kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, \ name6, kind7, name7, kind8, name8, kind9, name9) name0, name1, name2, \ name3, name4, name5, name6, name7, name8, name9 // Declares the types of value parameters. #define GMOCK_INTERNAL_DECL_TYPE_AND_0_VALUE_PARAMS() #define GMOCK_INTERNAL_DECL_TYPE_AND_1_VALUE_PARAMS(p0) , typename p0##_type #define GMOCK_INTERNAL_DECL_TYPE_AND_2_VALUE_PARAMS(p0, p1) , \ typename p0##_type, typename p1##_type #define GMOCK_INTERNAL_DECL_TYPE_AND_3_VALUE_PARAMS(p0, p1, p2) , \ typename p0##_type, typename p1##_type, typename p2##_type #define GMOCK_INTERNAL_DECL_TYPE_AND_4_VALUE_PARAMS(p0, p1, p2, p3) , \ typename p0##_type, typename p1##_type, typename p2##_type, \ typename p3##_type #define GMOCK_INTERNAL_DECL_TYPE_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) , \ typename p0##_type, typename p1##_type, typename p2##_type, \ typename p3##_type, typename p4##_type #define GMOCK_INTERNAL_DECL_TYPE_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) , \ typename p0##_type, typename p1##_type, typename p2##_type, \ typename p3##_type, typename p4##_type, typename p5##_type #define GMOCK_INTERNAL_DECL_TYPE_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ p6) , typename p0##_type, typename p1##_type, typename p2##_type, \ typename p3##_type, typename p4##_type, typename p5##_type, \ typename p6##_type #define GMOCK_INTERNAL_DECL_TYPE_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ p6, p7) , typename p0##_type, typename p1##_type, typename p2##_type, \ typename p3##_type, typename p4##_type, typename p5##_type, \ typename p6##_type, typename p7##_type #define GMOCK_INTERNAL_DECL_TYPE_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ p6, p7, p8) , typename p0##_type, typename p1##_type, typename p2##_type, \ typename p3##_type, typename p4##_type, typename p5##_type, \ typename p6##_type, typename p7##_type, typename p8##_type #define GMOCK_INTERNAL_DECL_TYPE_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ p6, p7, p8, p9) , typename p0##_type, typename p1##_type, \ typename p2##_type, typename p3##_type, typename p4##_type, \ typename p5##_type, typename p6##_type, typename p7##_type, \ typename p8##_type, typename p9##_type // Initializes the value parameters. #define GMOCK_INTERNAL_INIT_AND_0_VALUE_PARAMS()\ () #define GMOCK_INTERNAL_INIT_AND_1_VALUE_PARAMS(p0)\ (p0##_type gmock_p0) : p0(::std::move(gmock_p0)) #define GMOCK_INTERNAL_INIT_AND_2_VALUE_PARAMS(p0, p1)\ (p0##_type gmock_p0, p1##_type gmock_p1) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)) #define GMOCK_INTERNAL_INIT_AND_3_VALUE_PARAMS(p0, p1, p2)\ (p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)) #define GMOCK_INTERNAL_INIT_AND_4_VALUE_PARAMS(p0, p1, p2, p3)\ (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ p3(::std::move(gmock_p3)) #define GMOCK_INTERNAL_INIT_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4)\ (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)) #define GMOCK_INTERNAL_INIT_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5)\ (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4, \ p5##_type gmock_p5) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ p5(::std::move(gmock_p5)) #define GMOCK_INTERNAL_INIT_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6)\ (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ p6##_type gmock_p6) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)) #define GMOCK_INTERNAL_INIT_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7)\ (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ p6##_type gmock_p6, p7##_type gmock_p7) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)), \ p7(::std::move(gmock_p7)) #define GMOCK_INTERNAL_INIT_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ p7, p8)\ (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ p6##_type gmock_p6, p7##_type gmock_p7, \ p8##_type gmock_p8) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)), \ p7(::std::move(gmock_p7)), p8(::std::move(gmock_p8)) #define GMOCK_INTERNAL_INIT_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ p7, p8, p9)\ (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8, \ p9##_type gmock_p9) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)), \ p7(::std::move(gmock_p7)), p8(::std::move(gmock_p8)), \ p9(::std::move(gmock_p9)) // Declares the fields for storing the value parameters. #define GMOCK_INTERNAL_DEFN_AND_0_VALUE_PARAMS() #define GMOCK_INTERNAL_DEFN_AND_1_VALUE_PARAMS(p0) p0##_type p0; #define GMOCK_INTERNAL_DEFN_AND_2_VALUE_PARAMS(p0, p1) p0##_type p0; \ p1##_type p1; #define GMOCK_INTERNAL_DEFN_AND_3_VALUE_PARAMS(p0, p1, p2) p0##_type p0; \ p1##_type p1; p2##_type p2; #define GMOCK_INTERNAL_DEFN_AND_4_VALUE_PARAMS(p0, p1, p2, p3) p0##_type p0; \ p1##_type p1; p2##_type p2; p3##_type p3; #define GMOCK_INTERNAL_DEFN_AND_5_VALUE_PARAMS(p0, p1, p2, p3, \ p4) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; p4##_type p4; #define GMOCK_INTERNAL_DEFN_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, \ p5) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; p4##_type p4; \ p5##_type p5; #define GMOCK_INTERNAL_DEFN_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ p6) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; p4##_type p4; \ p5##_type p5; p6##_type p6; #define GMOCK_INTERNAL_DEFN_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ p7) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; p4##_type p4; \ p5##_type p5; p6##_type p6; p7##_type p7; #define GMOCK_INTERNAL_DEFN_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ p7, p8) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; \ p4##_type p4; p5##_type p5; p6##_type p6; p7##_type p7; p8##_type p8; #define GMOCK_INTERNAL_DEFN_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ p7, p8, p9) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; \ p4##_type p4; p5##_type p5; p6##_type p6; p7##_type p7; p8##_type p8; \ p9##_type p9; // Lists the value parameters. #define GMOCK_INTERNAL_LIST_AND_0_VALUE_PARAMS() #define GMOCK_INTERNAL_LIST_AND_1_VALUE_PARAMS(p0) p0 #define GMOCK_INTERNAL_LIST_AND_2_VALUE_PARAMS(p0, p1) p0, p1 #define GMOCK_INTERNAL_LIST_AND_3_VALUE_PARAMS(p0, p1, p2) p0, p1, p2 #define GMOCK_INTERNAL_LIST_AND_4_VALUE_PARAMS(p0, p1, p2, p3) p0, p1, p2, p3 #define GMOCK_INTERNAL_LIST_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) p0, p1, \ p2, p3, p4 #define GMOCK_INTERNAL_LIST_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) p0, \ p1, p2, p3, p4, p5 #define GMOCK_INTERNAL_LIST_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ p6) p0, p1, p2, p3, p4, p5, p6 #define GMOCK_INTERNAL_LIST_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ p7) p0, p1, p2, p3, p4, p5, p6, p7 #define GMOCK_INTERNAL_LIST_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ p7, p8) p0, p1, p2, p3, p4, p5, p6, p7, p8 #define GMOCK_INTERNAL_LIST_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ p7, p8, p9) p0, p1, p2, p3, p4, p5, p6, p7, p8, p9 // Lists the value parameter types. #define GMOCK_INTERNAL_LIST_TYPE_AND_0_VALUE_PARAMS() #define GMOCK_INTERNAL_LIST_TYPE_AND_1_VALUE_PARAMS(p0) , p0##_type #define GMOCK_INTERNAL_LIST_TYPE_AND_2_VALUE_PARAMS(p0, p1) , p0##_type, \ p1##_type #define GMOCK_INTERNAL_LIST_TYPE_AND_3_VALUE_PARAMS(p0, p1, p2) , p0##_type, \ p1##_type, p2##_type #define GMOCK_INTERNAL_LIST_TYPE_AND_4_VALUE_PARAMS(p0, p1, p2, p3) , \ p0##_type, p1##_type, p2##_type, p3##_type #define GMOCK_INTERNAL_LIST_TYPE_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) , \ p0##_type, p1##_type, p2##_type, p3##_type, p4##_type #define GMOCK_INTERNAL_LIST_TYPE_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) , \ p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type #define GMOCK_INTERNAL_LIST_TYPE_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ p6) , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type, \ p6##_type #define GMOCK_INTERNAL_LIST_TYPE_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ p6, p7) , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, \ p5##_type, p6##_type, p7##_type #define GMOCK_INTERNAL_LIST_TYPE_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ p6, p7, p8) , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, \ p5##_type, p6##_type, p7##_type, p8##_type #define GMOCK_INTERNAL_LIST_TYPE_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ p6, p7, p8, p9) , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, \ p5##_type, p6##_type, p7##_type, p8##_type, p9##_type // Declares the value parameters. #define GMOCK_INTERNAL_DECL_AND_0_VALUE_PARAMS() #define GMOCK_INTERNAL_DECL_AND_1_VALUE_PARAMS(p0) p0##_type p0 #define GMOCK_INTERNAL_DECL_AND_2_VALUE_PARAMS(p0, p1) p0##_type p0, \ p1##_type p1 #define GMOCK_INTERNAL_DECL_AND_3_VALUE_PARAMS(p0, p1, p2) p0##_type p0, \ p1##_type p1, p2##_type p2 #define GMOCK_INTERNAL_DECL_AND_4_VALUE_PARAMS(p0, p1, p2, p3) p0##_type p0, \ p1##_type p1, p2##_type p2, p3##_type p3 #define GMOCK_INTERNAL_DECL_AND_5_VALUE_PARAMS(p0, p1, p2, p3, \ p4) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4 #define GMOCK_INTERNAL_DECL_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, \ p5) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, \ p5##_type p5 #define GMOCK_INTERNAL_DECL_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ p6) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, \ p5##_type p5, p6##_type p6 #define GMOCK_INTERNAL_DECL_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ p7) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, \ p5##_type p5, p6##_type p6, p7##_type p7 #define GMOCK_INTERNAL_DECL_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ p7, p8) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8 #define GMOCK_INTERNAL_DECL_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ p7, p8, p9) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8, \ p9##_type p9 // The suffix of the class template implementing the action template. #define GMOCK_INTERNAL_COUNT_AND_0_VALUE_PARAMS() #define GMOCK_INTERNAL_COUNT_AND_1_VALUE_PARAMS(p0) P #define GMOCK_INTERNAL_COUNT_AND_2_VALUE_PARAMS(p0, p1) P2 #define GMOCK_INTERNAL_COUNT_AND_3_VALUE_PARAMS(p0, p1, p2) P3 #define GMOCK_INTERNAL_COUNT_AND_4_VALUE_PARAMS(p0, p1, p2, p3) P4 #define GMOCK_INTERNAL_COUNT_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) P5 #define GMOCK_INTERNAL_COUNT_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) P6 #define GMOCK_INTERNAL_COUNT_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6) P7 #define GMOCK_INTERNAL_COUNT_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ p7) P8 #define GMOCK_INTERNAL_COUNT_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ p7, p8) P9 #define GMOCK_INTERNAL_COUNT_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ p7, p8, p9) P10 // The name of the class template implementing the action template. #define GMOCK_ACTION_CLASS_(name, value_params)\ GTEST_CONCAT_TOKEN_(name##Action, GMOCK_INTERNAL_COUNT_##value_params) #define ACTION_TEMPLATE(name, template_params, value_params)\ template \ class GMOCK_ACTION_CLASS_(name, value_params) {\ public:\ explicit GMOCK_ACTION_CLASS_(name, value_params)\ GMOCK_INTERNAL_INIT_##value_params {}\ template \ class gmock_Impl : public ::testing::ActionInterface {\ public:\ typedef F function_type;\ typedef typename ::testing::internal::Function::Result return_type;\ typedef typename ::testing::internal::Function::ArgumentTuple\ args_type;\ explicit gmock_Impl GMOCK_INTERNAL_INIT_##value_params {}\ virtual return_type Perform(const args_type& args) {\ return ::testing::internal::ActionHelper::\ Perform(this, args);\ }\ template \ return_type gmock_PerformImpl(const args_type& args, \ const arg0_type& arg0, const arg1_type& arg1, \ const arg2_type& arg2, const arg3_type& arg3, \ const arg4_type& arg4, const arg5_type& arg5, \ const arg6_type& arg6, const arg7_type& arg7, \ const arg8_type& arg8, const arg9_type& arg9) const;\ GMOCK_INTERNAL_DEFN_##value_params\ private:\ GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ };\ template operator ::testing::Action() const {\ return ::testing::Action(\ new gmock_Impl(GMOCK_INTERNAL_LIST_##value_params));\ }\ GMOCK_INTERNAL_DEFN_##value_params\ private:\ GTEST_DISALLOW_ASSIGN_(GMOCK_ACTION_CLASS_(name, value_params));\ };\ template \ inline GMOCK_ACTION_CLASS_(name, value_params)<\ GMOCK_INTERNAL_LIST_##template_params\ GMOCK_INTERNAL_LIST_TYPE_##value_params> name(\ GMOCK_INTERNAL_DECL_##value_params) {\ return GMOCK_ACTION_CLASS_(name, value_params)<\ GMOCK_INTERNAL_LIST_##template_params\ GMOCK_INTERNAL_LIST_TYPE_##value_params>(\ GMOCK_INTERNAL_LIST_##value_params);\ }\ template \ template \ template \ typename ::testing::internal::Function::Result\ GMOCK_ACTION_CLASS_(name, value_params)<\ GMOCK_INTERNAL_LIST_##template_params\ GMOCK_INTERNAL_LIST_TYPE_##value_params>::gmock_Impl::\ gmock_PerformImpl(\ GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const #define ACTION(name)\ class name##Action {\ public:\ name##Action() {}\ template \ class gmock_Impl : public ::testing::ActionInterface {\ public:\ typedef F function_type;\ typedef typename ::testing::internal::Function::Result return_type;\ typedef typename ::testing::internal::Function::ArgumentTuple\ args_type;\ gmock_Impl() {}\ virtual return_type Perform(const args_type& args) {\ return ::testing::internal::ActionHelper::\ Perform(this, args);\ }\ template \ return_type gmock_PerformImpl(const args_type& args, \ const arg0_type& arg0, const arg1_type& arg1, \ const arg2_type& arg2, const arg3_type& arg3, \ const arg4_type& arg4, const arg5_type& arg5, \ const arg6_type& arg6, const arg7_type& arg7, \ const arg8_type& arg8, const arg9_type& arg9) const;\ private:\ GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ };\ template operator ::testing::Action() const {\ return ::testing::Action(new gmock_Impl());\ }\ private:\ GTEST_DISALLOW_ASSIGN_(name##Action);\ };\ inline name##Action name() {\ return name##Action();\ }\ template \ template \ typename ::testing::internal::Function::Result\ name##Action::gmock_Impl::gmock_PerformImpl(\ GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const #define ACTION_P(name, p0)\ template \ class name##ActionP {\ public:\ explicit name##ActionP(p0##_type gmock_p0) : \ p0(::std::forward(gmock_p0)) {}\ template \ class gmock_Impl : public ::testing::ActionInterface {\ public:\ typedef F function_type;\ typedef typename ::testing::internal::Function::Result return_type;\ typedef typename ::testing::internal::Function::ArgumentTuple\ args_type;\ explicit gmock_Impl(p0##_type gmock_p0) : \ p0(::std::forward(gmock_p0)) {}\ virtual return_type Perform(const args_type& args) {\ return ::testing::internal::ActionHelper::\ Perform(this, args);\ }\ template \ return_type gmock_PerformImpl(const args_type& args, \ const arg0_type& arg0, const arg1_type& arg1, \ const arg2_type& arg2, const arg3_type& arg3, \ const arg4_type& arg4, const arg5_type& arg5, \ const arg6_type& arg6, const arg7_type& arg7, \ const arg8_type& arg8, const arg9_type& arg9) const;\ p0##_type p0;\ private:\ GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ };\ template operator ::testing::Action() const {\ return ::testing::Action(new gmock_Impl(p0));\ }\ p0##_type p0;\ private:\ GTEST_DISALLOW_ASSIGN_(name##ActionP);\ };\ template \ inline name##ActionP name(p0##_type p0) {\ return name##ActionP(p0);\ }\ template \ template \ template \ typename ::testing::internal::Function::Result\ name##ActionP::gmock_Impl::gmock_PerformImpl(\ GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const #define ACTION_P2(name, p0, p1)\ template \ class name##ActionP2 {\ public:\ name##ActionP2(p0##_type gmock_p0, \ p1##_type gmock_p1) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)) {}\ template \ class gmock_Impl : public ::testing::ActionInterface {\ public:\ typedef F function_type;\ typedef typename ::testing::internal::Function::Result return_type;\ typedef typename ::testing::internal::Function::ArgumentTuple\ args_type;\ gmock_Impl(p0##_type gmock_p0, \ p1##_type gmock_p1) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)) {}\ virtual return_type Perform(const args_type& args) {\ return ::testing::internal::ActionHelper::\ Perform(this, args);\ }\ template \ return_type gmock_PerformImpl(const args_type& args, \ const arg0_type& arg0, const arg1_type& arg1, \ const arg2_type& arg2, const arg3_type& arg3, \ const arg4_type& arg4, const arg5_type& arg5, \ const arg6_type& arg6, const arg7_type& arg7, \ const arg8_type& arg8, const arg9_type& arg9) const;\ p0##_type p0;\ p1##_type p1;\ private:\ GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ };\ template operator ::testing::Action() const {\ return ::testing::Action(new gmock_Impl(p0, p1));\ }\ p0##_type p0;\ p1##_type p1;\ private:\ GTEST_DISALLOW_ASSIGN_(name##ActionP2);\ };\ template \ inline name##ActionP2 name(p0##_type p0, \ p1##_type p1) {\ return name##ActionP2(p0, p1);\ }\ template \ template \ template \ typename ::testing::internal::Function::Result\ name##ActionP2::gmock_Impl::gmock_PerformImpl(\ GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const #define ACTION_P3(name, p0, p1, p2)\ template \ class name##ActionP3 {\ public:\ name##ActionP3(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)), \ p2(::std::forward(gmock_p2)) {}\ template \ class gmock_Impl : public ::testing::ActionInterface {\ public:\ typedef F function_type;\ typedef typename ::testing::internal::Function::Result return_type;\ typedef typename ::testing::internal::Function::ArgumentTuple\ args_type;\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)), \ p2(::std::forward(gmock_p2)) {}\ virtual return_type Perform(const args_type& args) {\ return ::testing::internal::ActionHelper::\ Perform(this, args);\ }\ template \ return_type gmock_PerformImpl(const args_type& args, \ const arg0_type& arg0, const arg1_type& arg1, \ const arg2_type& arg2, const arg3_type& arg3, \ const arg4_type& arg4, const arg5_type& arg5, \ const arg6_type& arg6, const arg7_type& arg7, \ const arg8_type& arg8, const arg9_type& arg9) const;\ p0##_type p0;\ p1##_type p1;\ p2##_type p2;\ private:\ GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ };\ template operator ::testing::Action() const {\ return ::testing::Action(new gmock_Impl(p0, p1, p2));\ }\ p0##_type p0;\ p1##_type p1;\ p2##_type p2;\ private:\ GTEST_DISALLOW_ASSIGN_(name##ActionP3);\ };\ template \ inline name##ActionP3 name(p0##_type p0, \ p1##_type p1, p2##_type p2) {\ return name##ActionP3(p0, p1, p2);\ }\ template \ template \ template \ typename ::testing::internal::Function::Result\ name##ActionP3::gmock_Impl::gmock_PerformImpl(\ GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const #define ACTION_P4(name, p0, p1, p2, p3)\ template \ class name##ActionP4 {\ public:\ name##ActionP4(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2, \ p3##_type gmock_p3) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)), \ p2(::std::forward(gmock_p2)), \ p3(::std::forward(gmock_p3)) {}\ template \ class gmock_Impl : public ::testing::ActionInterface {\ public:\ typedef F function_type;\ typedef typename ::testing::internal::Function::Result return_type;\ typedef typename ::testing::internal::Function::ArgumentTuple\ args_type;\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)), \ p2(::std::forward(gmock_p2)), \ p3(::std::forward(gmock_p3)) {}\ virtual return_type Perform(const args_type& args) {\ return ::testing::internal::ActionHelper::\ Perform(this, args);\ }\ template \ return_type gmock_PerformImpl(const args_type& args, \ const arg0_type& arg0, const arg1_type& arg1, \ const arg2_type& arg2, const arg3_type& arg3, \ const arg4_type& arg4, const arg5_type& arg5, \ const arg6_type& arg6, const arg7_type& arg7, \ const arg8_type& arg8, const arg9_type& arg9) const;\ p0##_type p0;\ p1##_type p1;\ p2##_type p2;\ p3##_type p3;\ private:\ GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ };\ template operator ::testing::Action() const {\ return ::testing::Action(new gmock_Impl(p0, p1, p2, p3));\ }\ p0##_type p0;\ p1##_type p1;\ p2##_type p2;\ p3##_type p3;\ private:\ GTEST_DISALLOW_ASSIGN_(name##ActionP4);\ };\ template \ inline name##ActionP4 name(p0##_type p0, p1##_type p1, p2##_type p2, \ p3##_type p3) {\ return name##ActionP4(p0, p1, \ p2, p3);\ }\ template \ template \ template \ typename ::testing::internal::Function::Result\ name##ActionP4::gmock_Impl::gmock_PerformImpl(\ GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const #define ACTION_P5(name, p0, p1, p2, p3, p4)\ template \ class name##ActionP5 {\ public:\ name##ActionP5(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2, p3##_type gmock_p3, \ p4##_type gmock_p4) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)), \ p2(::std::forward(gmock_p2)), \ p3(::std::forward(gmock_p3)), \ p4(::std::forward(gmock_p4)) {}\ template \ class gmock_Impl : public ::testing::ActionInterface {\ public:\ typedef F function_type;\ typedef typename ::testing::internal::Function::Result return_type;\ typedef typename ::testing::internal::Function::ArgumentTuple\ args_type;\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, \ p4##_type gmock_p4) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)), \ p2(::std::forward(gmock_p2)), \ p3(::std::forward(gmock_p3)), \ p4(::std::forward(gmock_p4)) {}\ virtual return_type Perform(const args_type& args) {\ return ::testing::internal::ActionHelper::\ Perform(this, args);\ }\ template \ return_type gmock_PerformImpl(const args_type& args, \ const arg0_type& arg0, const arg1_type& arg1, \ const arg2_type& arg2, const arg3_type& arg3, \ const arg4_type& arg4, const arg5_type& arg5, \ const arg6_type& arg6, const arg7_type& arg7, \ const arg8_type& arg8, const arg9_type& arg9) const;\ p0##_type p0;\ p1##_type p1;\ p2##_type p2;\ p3##_type p3;\ p4##_type p4;\ private:\ GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ };\ template operator ::testing::Action() const {\ return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4));\ }\ p0##_type p0;\ p1##_type p1;\ p2##_type p2;\ p3##_type p3;\ p4##_type p4;\ private:\ GTEST_DISALLOW_ASSIGN_(name##ActionP5);\ };\ template \ inline name##ActionP5 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ p4##_type p4) {\ return name##ActionP5(p0, p1, p2, p3, p4);\ }\ template \ template \ template \ typename ::testing::internal::Function::Result\ name##ActionP5::gmock_Impl::gmock_PerformImpl(\ GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const #define ACTION_P6(name, p0, p1, p2, p3, p4, p5)\ template \ class name##ActionP6 {\ public:\ name##ActionP6(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ p5##_type gmock_p5) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)), \ p2(::std::forward(gmock_p2)), \ p3(::std::forward(gmock_p3)), \ p4(::std::forward(gmock_p4)), \ p5(::std::forward(gmock_p5)) {}\ template \ class gmock_Impl : public ::testing::ActionInterface {\ public:\ typedef F function_type;\ typedef typename ::testing::internal::Function::Result return_type;\ typedef typename ::testing::internal::Function::ArgumentTuple\ args_type;\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4, \ p5##_type gmock_p5) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)), \ p2(::std::forward(gmock_p2)), \ p3(::std::forward(gmock_p3)), \ p4(::std::forward(gmock_p4)), \ p5(::std::forward(gmock_p5)) {}\ virtual return_type Perform(const args_type& args) {\ return ::testing::internal::ActionHelper::\ Perform(this, args);\ }\ template \ return_type gmock_PerformImpl(const args_type& args, \ const arg0_type& arg0, const arg1_type& arg1, \ const arg2_type& arg2, const arg3_type& arg3, \ const arg4_type& arg4, const arg5_type& arg5, \ const arg6_type& arg6, const arg7_type& arg7, \ const arg8_type& arg8, const arg9_type& arg9) const;\ p0##_type p0;\ p1##_type p1;\ p2##_type p2;\ p3##_type p3;\ p4##_type p4;\ p5##_type p5;\ private:\ GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ };\ template operator ::testing::Action() const {\ return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4, p5));\ }\ p0##_type p0;\ p1##_type p1;\ p2##_type p2;\ p3##_type p3;\ p4##_type p4;\ p5##_type p5;\ private:\ GTEST_DISALLOW_ASSIGN_(name##ActionP6);\ };\ template \ inline name##ActionP6 name(p0##_type p0, p1##_type p1, p2##_type p2, \ p3##_type p3, p4##_type p4, p5##_type p5) {\ return name##ActionP6(p0, p1, p2, p3, p4, p5);\ }\ template \ template \ template \ typename ::testing::internal::Function::Result\ name##ActionP6::gmock_Impl::gmock_PerformImpl(\ GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const #define ACTION_P7(name, p0, p1, p2, p3, p4, p5, p6)\ template \ class name##ActionP7 {\ public:\ name##ActionP7(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ p5##_type gmock_p5, \ p6##_type gmock_p6) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)), \ p2(::std::forward(gmock_p2)), \ p3(::std::forward(gmock_p3)), \ p4(::std::forward(gmock_p4)), \ p5(::std::forward(gmock_p5)), \ p6(::std::forward(gmock_p6)) {}\ template \ class gmock_Impl : public ::testing::ActionInterface {\ public:\ typedef F function_type;\ typedef typename ::testing::internal::Function::Result return_type;\ typedef typename ::testing::internal::Function::ArgumentTuple\ args_type;\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ p6##_type gmock_p6) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)), \ p2(::std::forward(gmock_p2)), \ p3(::std::forward(gmock_p3)), \ p4(::std::forward(gmock_p4)), \ p5(::std::forward(gmock_p5)), \ p6(::std::forward(gmock_p6)) {}\ virtual return_type Perform(const args_type& args) {\ return ::testing::internal::ActionHelper::\ Perform(this, args);\ }\ template \ return_type gmock_PerformImpl(const args_type& args, \ const arg0_type& arg0, const arg1_type& arg1, \ const arg2_type& arg2, const arg3_type& arg3, \ const arg4_type& arg4, const arg5_type& arg5, \ const arg6_type& arg6, const arg7_type& arg7, \ const arg8_type& arg8, const arg9_type& arg9) const;\ p0##_type p0;\ p1##_type p1;\ p2##_type p2;\ p3##_type p3;\ p4##_type p4;\ p5##_type p5;\ p6##_type p6;\ private:\ GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ };\ template operator ::testing::Action() const {\ return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4, p5, \ p6));\ }\ p0##_type p0;\ p1##_type p1;\ p2##_type p2;\ p3##_type p3;\ p4##_type p4;\ p5##_type p5;\ p6##_type p6;\ private:\ GTEST_DISALLOW_ASSIGN_(name##ActionP7);\ };\ template \ inline name##ActionP7 name(p0##_type p0, p1##_type p1, \ p2##_type p2, p3##_type p3, p4##_type p4, p5##_type p5, \ p6##_type p6) {\ return name##ActionP7(p0, p1, p2, p3, p4, p5, p6);\ }\ template \ template \ template \ typename ::testing::internal::Function::Result\ name##ActionP7::gmock_Impl::gmock_PerformImpl(\ GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const #define ACTION_P8(name, p0, p1, p2, p3, p4, p5, p6, p7)\ template \ class name##ActionP8 {\ public:\ name##ActionP8(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ p5##_type gmock_p5, p6##_type gmock_p6, \ p7##_type gmock_p7) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)), \ p2(::std::forward(gmock_p2)), \ p3(::std::forward(gmock_p3)), \ p4(::std::forward(gmock_p4)), \ p5(::std::forward(gmock_p5)), \ p6(::std::forward(gmock_p6)), \ p7(::std::forward(gmock_p7)) {}\ template \ class gmock_Impl : public ::testing::ActionInterface {\ public:\ typedef F function_type;\ typedef typename ::testing::internal::Function::Result return_type;\ typedef typename ::testing::internal::Function::ArgumentTuple\ args_type;\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ p6##_type gmock_p6, \ p7##_type gmock_p7) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)), \ p2(::std::forward(gmock_p2)), \ p3(::std::forward(gmock_p3)), \ p4(::std::forward(gmock_p4)), \ p5(::std::forward(gmock_p5)), \ p6(::std::forward(gmock_p6)), \ p7(::std::forward(gmock_p7)) {}\ virtual return_type Perform(const args_type& args) {\ return ::testing::internal::ActionHelper::\ Perform(this, args);\ }\ template \ return_type gmock_PerformImpl(const args_type& args, \ const arg0_type& arg0, const arg1_type& arg1, \ const arg2_type& arg2, const arg3_type& arg3, \ const arg4_type& arg4, const arg5_type& arg5, \ const arg6_type& arg6, const arg7_type& arg7, \ const arg8_type& arg8, const arg9_type& arg9) const;\ p0##_type p0;\ p1##_type p1;\ p2##_type p2;\ p3##_type p3;\ p4##_type p4;\ p5##_type p5;\ p6##_type p6;\ p7##_type p7;\ private:\ GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ };\ template operator ::testing::Action() const {\ return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4, p5, \ p6, p7));\ }\ p0##_type p0;\ p1##_type p1;\ p2##_type p2;\ p3##_type p3;\ p4##_type p4;\ p5##_type p5;\ p6##_type p6;\ p7##_type p7;\ private:\ GTEST_DISALLOW_ASSIGN_(name##ActionP8);\ };\ template \ inline name##ActionP8 name(p0##_type p0, \ p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, p5##_type p5, \ p6##_type p6, p7##_type p7) {\ return name##ActionP8(p0, p1, p2, p3, p4, p5, \ p6, p7);\ }\ template \ template \ template \ typename ::testing::internal::Function::Result\ name##ActionP8::gmock_Impl::gmock_PerformImpl(\ GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const #define ACTION_P9(name, p0, p1, p2, p3, p4, p5, p6, p7, p8)\ template \ class name##ActionP9 {\ public:\ name##ActionP9(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ p5##_type gmock_p5, p6##_type gmock_p6, p7##_type gmock_p7, \ p8##_type gmock_p8) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)), \ p2(::std::forward(gmock_p2)), \ p3(::std::forward(gmock_p3)), \ p4(::std::forward(gmock_p4)), \ p5(::std::forward(gmock_p5)), \ p6(::std::forward(gmock_p6)), \ p7(::std::forward(gmock_p7)), \ p8(::std::forward(gmock_p8)) {}\ template \ class gmock_Impl : public ::testing::ActionInterface {\ public:\ typedef F function_type;\ typedef typename ::testing::internal::Function::Result return_type;\ typedef typename ::testing::internal::Function::ArgumentTuple\ args_type;\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ p6##_type gmock_p6, p7##_type gmock_p7, \ p8##_type gmock_p8) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)), \ p2(::std::forward(gmock_p2)), \ p3(::std::forward(gmock_p3)), \ p4(::std::forward(gmock_p4)), \ p5(::std::forward(gmock_p5)), \ p6(::std::forward(gmock_p6)), \ p7(::std::forward(gmock_p7)), \ p8(::std::forward(gmock_p8)) {}\ virtual return_type Perform(const args_type& args) {\ return ::testing::internal::ActionHelper::\ Perform(this, args);\ }\ template \ return_type gmock_PerformImpl(const args_type& args, \ const arg0_type& arg0, const arg1_type& arg1, \ const arg2_type& arg2, const arg3_type& arg3, \ const arg4_type& arg4, const arg5_type& arg5, \ const arg6_type& arg6, const arg7_type& arg7, \ const arg8_type& arg8, const arg9_type& arg9) const;\ p0##_type p0;\ p1##_type p1;\ p2##_type p2;\ p3##_type p3;\ p4##_type p4;\ p5##_type p5;\ p6##_type p6;\ p7##_type p7;\ p8##_type p8;\ private:\ GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ };\ template operator ::testing::Action() const {\ return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4, p5, \ p6, p7, p8));\ }\ p0##_type p0;\ p1##_type p1;\ p2##_type p2;\ p3##_type p3;\ p4##_type p4;\ p5##_type p5;\ p6##_type p6;\ p7##_type p7;\ p8##_type p8;\ private:\ GTEST_DISALLOW_ASSIGN_(name##ActionP9);\ };\ template \ inline name##ActionP9 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, \ p8##_type p8) {\ return name##ActionP9(p0, p1, p2, \ p3, p4, p5, p6, p7, p8);\ }\ template \ template \ template \ typename ::testing::internal::Function::Result\ name##ActionP9::gmock_Impl::gmock_PerformImpl(\ GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const #define ACTION_P10(name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9)\ template \ class name##ActionP10 {\ public:\ name##ActionP10(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ p5##_type gmock_p5, p6##_type gmock_p6, p7##_type gmock_p7, \ p8##_type gmock_p8, \ p9##_type gmock_p9) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)), \ p2(::std::forward(gmock_p2)), \ p3(::std::forward(gmock_p3)), \ p4(::std::forward(gmock_p4)), \ p5(::std::forward(gmock_p5)), \ p6(::std::forward(gmock_p6)), \ p7(::std::forward(gmock_p7)), \ p8(::std::forward(gmock_p8)), \ p9(::std::forward(gmock_p9)) {}\ template \ class gmock_Impl : public ::testing::ActionInterface {\ public:\ typedef F function_type;\ typedef typename ::testing::internal::Function::Result return_type;\ typedef typename ::testing::internal::Function::ArgumentTuple\ args_type;\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8, \ p9##_type gmock_p9) : p0(::std::forward(gmock_p0)), \ p1(::std::forward(gmock_p1)), \ p2(::std::forward(gmock_p2)), \ p3(::std::forward(gmock_p3)), \ p4(::std::forward(gmock_p4)), \ p5(::std::forward(gmock_p5)), \ p6(::std::forward(gmock_p6)), \ p7(::std::forward(gmock_p7)), \ p8(::std::forward(gmock_p8)), \ p9(::std::forward(gmock_p9)) {}\ virtual return_type Perform(const args_type& args) {\ return ::testing::internal::ActionHelper::\ Perform(this, args);\ }\ template \ return_type gmock_PerformImpl(const args_type& args, \ const arg0_type& arg0, const arg1_type& arg1, \ const arg2_type& arg2, const arg3_type& arg3, \ const arg4_type& arg4, const arg5_type& arg5, \ const arg6_type& arg6, const arg7_type& arg7, \ const arg8_type& arg8, const arg9_type& arg9) const;\ p0##_type p0;\ p1##_type p1;\ p2##_type p2;\ p3##_type p3;\ p4##_type p4;\ p5##_type p5;\ p6##_type p6;\ p7##_type p7;\ p8##_type p8;\ p9##_type p9;\ private:\ GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ };\ template operator ::testing::Action() const {\ return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4, p5, \ p6, p7, p8, p9));\ }\ p0##_type p0;\ p1##_type p1;\ p2##_type p2;\ p3##_type p3;\ p4##_type p4;\ p5##_type p5;\ p6##_type p6;\ p7##_type p7;\ p8##_type p8;\ p9##_type p9;\ private:\ GTEST_DISALLOW_ASSIGN_(name##ActionP10);\ };\ template \ inline name##ActionP10 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8, \ p9##_type p9) {\ return name##ActionP10(p0, \ p1, p2, p3, p4, p5, p6, p7, p8, p9);\ }\ template \ template \ template \ typename ::testing::internal::Function::Result\ name##ActionP10::gmock_Impl::gmock_PerformImpl(\ GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const namespace testing { // The ACTION*() macros trigger warning C4100 (unreferenced formal // parameter) in MSVC with -W4. Unfortunately they cannot be fixed in // the macro definition, as the warnings are generated when the macro // is expanded and macro expansion cannot contain #pragma. Therefore // we suppress them here. #ifdef _MSC_VER # pragma warning(push) # pragma warning(disable:4100) #endif // Various overloads for InvokeArgument(). // // The InvokeArgument(a1, a2, ..., a_k) action invokes the N-th // (0-based) argument, which must be a k-ary callable, of the mock // function, with arguments a1, a2, ..., a_k. // // Notes: // // 1. The arguments are passed by value by default. If you need to // pass an argument by reference, wrap it inside ByRef(). For // example, // // InvokeArgument<1>(5, string("Hello"), ByRef(foo)) // // passes 5 and string("Hello") by value, and passes foo by // reference. // // 2. If the callable takes an argument by reference but ByRef() is // not used, it will receive the reference to a copy of the value, // instead of the original value. For example, when the 0-th // argument of the mock function takes a const string&, the action // // InvokeArgument<0>(string("Hello")) // // makes a copy of the temporary string("Hello") object and passes a // reference of the copy, instead of the original temporary object, // to the callable. This makes it easy for a user to define an // InvokeArgument action from temporary values and have it performed // later. namespace internal { namespace invoke_argument { // Appears in InvokeArgumentAdl's argument list to help avoid // accidental calls to user functions of the same name. struct AdlTag {}; // InvokeArgumentAdl - a helper for InvokeArgument. // The basic overloads are provided here for generic functors. // Overloads for other custom-callables are provided in the // internal/custom/callback-actions.h header. template R InvokeArgumentAdl(AdlTag, F f) { return f(); } template R InvokeArgumentAdl(AdlTag, F f, A1 a1) { return f(a1); } template R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2) { return f(a1, a2); } template R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3) { return f(a1, a2, a3); } template R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4) { return f(a1, a2, a3, a4); } template R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { return f(a1, a2, a3, a4, a5); } template R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6) { return f(a1, a2, a3, a4, a5, a6); } template R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, A7 a7) { return f(a1, a2, a3, a4, a5, a6, a7); } template R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, A7 a7, A8 a8) { return f(a1, a2, a3, a4, a5, a6, a7, a8); } template R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, A7 a7, A8 a8, A9 a9) { return f(a1, a2, a3, a4, a5, a6, a7, a8, a9); } template R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, A7 a7, A8 a8, A9 a9, A10 a10) { return f(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10); } } // namespace invoke_argument } // namespace internal ACTION_TEMPLATE(InvokeArgument, HAS_1_TEMPLATE_PARAMS(int, k), AND_0_VALUE_PARAMS()) { using internal::invoke_argument::InvokeArgumentAdl; return InvokeArgumentAdl( internal::invoke_argument::AdlTag(), ::std::get(args)); } ACTION_TEMPLATE(InvokeArgument, HAS_1_TEMPLATE_PARAMS(int, k), AND_1_VALUE_PARAMS(p0)) { using internal::invoke_argument::InvokeArgumentAdl; return InvokeArgumentAdl( internal::invoke_argument::AdlTag(), ::std::get(args), p0); } ACTION_TEMPLATE(InvokeArgument, HAS_1_TEMPLATE_PARAMS(int, k), AND_2_VALUE_PARAMS(p0, p1)) { using internal::invoke_argument::InvokeArgumentAdl; return InvokeArgumentAdl( internal::invoke_argument::AdlTag(), ::std::get(args), p0, p1); } ACTION_TEMPLATE(InvokeArgument, HAS_1_TEMPLATE_PARAMS(int, k), AND_3_VALUE_PARAMS(p0, p1, p2)) { using internal::invoke_argument::InvokeArgumentAdl; return InvokeArgumentAdl( internal::invoke_argument::AdlTag(), ::std::get(args), p0, p1, p2); } ACTION_TEMPLATE(InvokeArgument, HAS_1_TEMPLATE_PARAMS(int, k), AND_4_VALUE_PARAMS(p0, p1, p2, p3)) { using internal::invoke_argument::InvokeArgumentAdl; return InvokeArgumentAdl( internal::invoke_argument::AdlTag(), ::std::get(args), p0, p1, p2, p3); } ACTION_TEMPLATE(InvokeArgument, HAS_1_TEMPLATE_PARAMS(int, k), AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4)) { using internal::invoke_argument::InvokeArgumentAdl; return InvokeArgumentAdl( internal::invoke_argument::AdlTag(), ::std::get(args), p0, p1, p2, p3, p4); } ACTION_TEMPLATE(InvokeArgument, HAS_1_TEMPLATE_PARAMS(int, k), AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5)) { using internal::invoke_argument::InvokeArgumentAdl; return InvokeArgumentAdl( internal::invoke_argument::AdlTag(), ::std::get(args), p0, p1, p2, p3, p4, p5); } ACTION_TEMPLATE(InvokeArgument, HAS_1_TEMPLATE_PARAMS(int, k), AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6)) { using internal::invoke_argument::InvokeArgumentAdl; return InvokeArgumentAdl( internal::invoke_argument::AdlTag(), ::std::get(args), p0, p1, p2, p3, p4, p5, p6); } ACTION_TEMPLATE(InvokeArgument, HAS_1_TEMPLATE_PARAMS(int, k), AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7)) { using internal::invoke_argument::InvokeArgumentAdl; return InvokeArgumentAdl( internal::invoke_argument::AdlTag(), ::std::get(args), p0, p1, p2, p3, p4, p5, p6, p7); } ACTION_TEMPLATE(InvokeArgument, HAS_1_TEMPLATE_PARAMS(int, k), AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, p8)) { using internal::invoke_argument::InvokeArgumentAdl; return InvokeArgumentAdl( internal::invoke_argument::AdlTag(), ::std::get(args), p0, p1, p2, p3, p4, p5, p6, p7, p8); } ACTION_TEMPLATE(InvokeArgument, HAS_1_TEMPLATE_PARAMS(int, k), AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9)) { using internal::invoke_argument::InvokeArgumentAdl; return InvokeArgumentAdl( internal::invoke_argument::AdlTag(), ::std::get(args), p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } // Various overloads for ReturnNew(). // // The ReturnNew(a1, a2, ..., a_k) action returns a pointer to a new // instance of type T, constructed on the heap with constructor arguments // a1, a2, ..., and a_k. The caller assumes ownership of the returned value. ACTION_TEMPLATE(ReturnNew, HAS_1_TEMPLATE_PARAMS(typename, T), AND_0_VALUE_PARAMS()) { return new T(); } ACTION_TEMPLATE(ReturnNew, HAS_1_TEMPLATE_PARAMS(typename, T), AND_1_VALUE_PARAMS(p0)) { return new T(p0); } ACTION_TEMPLATE(ReturnNew, HAS_1_TEMPLATE_PARAMS(typename, T), AND_2_VALUE_PARAMS(p0, p1)) { return new T(p0, p1); } ACTION_TEMPLATE(ReturnNew, HAS_1_TEMPLATE_PARAMS(typename, T), AND_3_VALUE_PARAMS(p0, p1, p2)) { return new T(p0, p1, p2); } ACTION_TEMPLATE(ReturnNew, HAS_1_TEMPLATE_PARAMS(typename, T), AND_4_VALUE_PARAMS(p0, p1, p2, p3)) { return new T(p0, p1, p2, p3); } ACTION_TEMPLATE(ReturnNew, HAS_1_TEMPLATE_PARAMS(typename, T), AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4)) { return new T(p0, p1, p2, p3, p4); } ACTION_TEMPLATE(ReturnNew, HAS_1_TEMPLATE_PARAMS(typename, T), AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5)) { return new T(p0, p1, p2, p3, p4, p5); } ACTION_TEMPLATE(ReturnNew, HAS_1_TEMPLATE_PARAMS(typename, T), AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6)) { return new T(p0, p1, p2, p3, p4, p5, p6); } ACTION_TEMPLATE(ReturnNew, HAS_1_TEMPLATE_PARAMS(typename, T), AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7)) { return new T(p0, p1, p2, p3, p4, p5, p6, p7); } ACTION_TEMPLATE(ReturnNew, HAS_1_TEMPLATE_PARAMS(typename, T), AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, p8)) { return new T(p0, p1, p2, p3, p4, p5, p6, p7, p8); } ACTION_TEMPLATE(ReturnNew, HAS_1_TEMPLATE_PARAMS(typename, T), AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9)) { return new T(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); } #ifdef _MSC_VER # pragma warning(pop) #endif } // namespace testing // Include any custom callback actions added by the local installation. // We must include this header at the end to make sure it can use the // declarations from this file. #include "gmock/internal/custom/gmock-generated-actions.h" #endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/include/gmock/gmock-generated-actions.h.pump000066400000000000000000000516241456444476200327300ustar00rootroot00000000000000$$ -*- mode: c++; -*- $$ This is a Pump source file. Please use Pump to convert it to $$ gmock-generated-actions.h. $$ $var n = 10 $$ The maximum arity we support. $$}} This meta comment fixes auto-indentation in editors. // Copyright 2007, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Google Mock - a framework for writing C++ mock classes. // // This file implements some commonly used variadic actions. // GOOGLETEST_CM0002 DO NOT DELETE #ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ #define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ #include #include #include "gmock/gmock-actions.h" #include "gmock/internal/gmock-port.h" namespace testing { namespace internal { // A macro from the ACTION* family (defined later in this file) // defines an action that can be used in a mock function. Typically, // these actions only care about a subset of the arguments of the mock // function. For example, if such an action only uses the second // argument, it can be used in any mock function that takes >= 2 // arguments where the type of the second argument is compatible. // // Therefore, the action implementation must be prepared to take more // arguments than it needs. The ExcessiveArg type is used to // represent those excessive arguments. In order to keep the compiler // error messages tractable, we define it in the testing namespace // instead of testing::internal. However, this is an INTERNAL TYPE // and subject to change without notice, so a user MUST NOT USE THIS // TYPE DIRECTLY. struct ExcessiveArg {}; // A helper class needed for implementing the ACTION* macros. template class ActionHelper { public: $range i 0..n $for i [[ $var template = [[$if i==0 [[]] $else [[ $range j 0..i-1 template <$for j, [[typename A$j]]> ]]]] $range j 0..i-1 $var As = [[$for j, [[A$j]]]] $var as = [[$for j, [[std::get<$j>(args)]]]] $range k 1..n-i $var eas = [[$for k, [[ExcessiveArg()]]]] $var arg_list = [[$if (i==0) | (i==n) [[$as$eas]] $else [[$as, $eas]]]] $template static Result Perform(Impl* impl, const ::std::tuple<$As>& args) { return impl->template gmock_PerformImpl<$As>(args, $arg_list); } ]] }; } // namespace internal } // namespace testing // The ACTION* family of macros can be used in a namespace scope to // define custom actions easily. The syntax: // // ACTION(name) { statements; } // // will define an action with the given name that executes the // statements. The value returned by the statements will be used as // the return value of the action. Inside the statements, you can // refer to the K-th (0-based) argument of the mock function by // 'argK', and refer to its type by 'argK_type'. For example: // // ACTION(IncrementArg1) { // arg1_type temp = arg1; // return ++(*temp); // } // // allows you to write // // ...WillOnce(IncrementArg1()); // // You can also refer to the entire argument tuple and its type by // 'args' and 'args_type', and refer to the mock function type and its // return type by 'function_type' and 'return_type'. // // Note that you don't need to specify the types of the mock function // arguments. However rest assured that your code is still type-safe: // you'll get a compiler error if *arg1 doesn't support the ++ // operator, or if the type of ++(*arg1) isn't compatible with the // mock function's return type, for example. // // Sometimes you'll want to parameterize the action. For that you can use // another macro: // // ACTION_P(name, param_name) { statements; } // // For example: // // ACTION_P(Add, n) { return arg0 + n; } // // will allow you to write: // // ...WillOnce(Add(5)); // // Note that you don't need to provide the type of the parameter // either. If you need to reference the type of a parameter named // 'foo', you can write 'foo_type'. For example, in the body of // ACTION_P(Add, n) above, you can write 'n_type' to refer to the type // of 'n'. // // We also provide ACTION_P2, ACTION_P3, ..., up to ACTION_P$n to support // multi-parameter actions. // // For the purpose of typing, you can view // // ACTION_Pk(Foo, p1, ..., pk) { ... } // // as shorthand for // // template // FooActionPk Foo(p1_type p1, ..., pk_type pk) { ... } // // In particular, you can provide the template type arguments // explicitly when invoking Foo(), as in Foo(5, false); // although usually you can rely on the compiler to infer the types // for you automatically. You can assign the result of expression // Foo(p1, ..., pk) to a variable of type FooActionPk. This can be useful when composing actions. // // You can also overload actions with different numbers of parameters: // // ACTION_P(Plus, a) { ... } // ACTION_P2(Plus, a, b) { ... } // // While it's tempting to always use the ACTION* macros when defining // a new action, you should also consider implementing ActionInterface // or using MakePolymorphicAction() instead, especially if you need to // use the action a lot. While these approaches require more work, // they give you more control on the types of the mock function // arguments and the action parameters, which in general leads to // better compiler error messages that pay off in the long run. They // also allow overloading actions based on parameter types (as opposed // to just based on the number of parameters). // // CAVEAT: // // ACTION*() can only be used in a namespace scope as templates cannot be // declared inside of a local class. // Users can, however, define any local functors (e.g. a lambda) that // can be used as actions. // // MORE INFORMATION: // // To learn more about using these macros, please search for 'ACTION' on // https://github.com/google/googletest/blob/master/googlemock/docs/cook_book.md $range i 0..n $range k 0..n-1 // An internal macro needed for implementing ACTION*(). #define GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_\ const args_type& args GTEST_ATTRIBUTE_UNUSED_ $for k [[, \ const arg$k[[]]_type& arg$k GTEST_ATTRIBUTE_UNUSED_]] // Sometimes you want to give an action explicit template parameters // that cannot be inferred from its value parameters. ACTION() and // ACTION_P*() don't support that. ACTION_TEMPLATE() remedies that // and can be viewed as an extension to ACTION() and ACTION_P*(). // // The syntax: // // ACTION_TEMPLATE(ActionName, // HAS_m_TEMPLATE_PARAMS(kind1, name1, ..., kind_m, name_m), // AND_n_VALUE_PARAMS(p1, ..., p_n)) { statements; } // // defines an action template that takes m explicit template // parameters and n value parameters. name_i is the name of the i-th // template parameter, and kind_i specifies whether it's a typename, // an integral constant, or a template. p_i is the name of the i-th // value parameter. // // Example: // // // DuplicateArg(output) converts the k-th argument of the mock // // function to type T and copies it to *output. // ACTION_TEMPLATE(DuplicateArg, // HAS_2_TEMPLATE_PARAMS(int, k, typename, T), // AND_1_VALUE_PARAMS(output)) { // *output = T(::std::get(args)); // } // ... // int n; // EXPECT_CALL(mock, Foo(_, _)) // .WillOnce(DuplicateArg<1, unsigned char>(&n)); // // To create an instance of an action template, write: // // ActionName(v1, ..., v_n) // // where the ts are the template arguments and the vs are the value // arguments. The value argument types are inferred by the compiler. // If you want to explicitly specify the value argument types, you can // provide additional template arguments: // // ActionName(v1, ..., v_n) // // where u_i is the desired type of v_i. // // ACTION_TEMPLATE and ACTION/ACTION_P* can be overloaded on the // number of value parameters, but not on the number of template // parameters. Without the restriction, the meaning of the following // is unclear: // // OverloadedAction(x); // // Are we using a single-template-parameter action where 'bool' refers // to the type of x, or are we using a two-template-parameter action // where the compiler is asked to infer the type of x? // // Implementation notes: // // GMOCK_INTERNAL_*_HAS_m_TEMPLATE_PARAMS and // GMOCK_INTERNAL_*_AND_n_VALUE_PARAMS are internal macros for // implementing ACTION_TEMPLATE. The main trick we use is to create // new macro invocations when expanding a macro. For example, we have // // #define ACTION_TEMPLATE(name, template_params, value_params) // ... GMOCK_INTERNAL_DECL_##template_params ... // // which causes ACTION_TEMPLATE(..., HAS_1_TEMPLATE_PARAMS(typename, T), ...) // to expand to // // ... GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS(typename, T) ... // // Since GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS is a macro, the // preprocessor will continue to expand it to // // ... typename T ... // // This technique conforms to the C++ standard and is portable. It // allows us to implement action templates using O(N) code, where N is // the maximum number of template/value parameters supported. Without // using it, we'd have to devote O(N^2) amount of code to implement all // combinations of m and n. // Declares the template parameters. $range j 1..n $for j [[ $range m 0..j-1 #define GMOCK_INTERNAL_DECL_HAS_$j[[]] _TEMPLATE_PARAMS($for m, [[kind$m, name$m]]) $for m, [[kind$m name$m]] ]] // Lists the template parameters. $for j [[ $range m 0..j-1 #define GMOCK_INTERNAL_LIST_HAS_$j[[]] _TEMPLATE_PARAMS($for m, [[kind$m, name$m]]) $for m, [[name$m]] ]] // Declares the types of value parameters. $for i [[ $range j 0..i-1 #define GMOCK_INTERNAL_DECL_TYPE_AND_$i[[]] _VALUE_PARAMS($for j, [[p$j]]) $for j [[, typename p$j##_type]] ]] // Initializes the value parameters. $for i [[ $range j 0..i-1 #define GMOCK_INTERNAL_INIT_AND_$i[[]]_VALUE_PARAMS($for j, [[p$j]])\ ($for j, [[p$j##_type gmock_p$j]])$if i>0 [[ : ]]$for j, [[p$j(::std::move(gmock_p$j))]] ]] // Declares the fields for storing the value parameters. $for i [[ $range j 0..i-1 #define GMOCK_INTERNAL_DEFN_AND_$i[[]] _VALUE_PARAMS($for j, [[p$j]]) $for j [[p$j##_type p$j; ]] ]] // Lists the value parameters. $for i [[ $range j 0..i-1 #define GMOCK_INTERNAL_LIST_AND_$i[[]] _VALUE_PARAMS($for j, [[p$j]]) $for j, [[p$j]] ]] // Lists the value parameter types. $for i [[ $range j 0..i-1 #define GMOCK_INTERNAL_LIST_TYPE_AND_$i[[]] _VALUE_PARAMS($for j, [[p$j]]) $for j [[, p$j##_type]] ]] // Declares the value parameters. $for i [[ $range j 0..i-1 #define GMOCK_INTERNAL_DECL_AND_$i[[]]_VALUE_PARAMS($for j, [[p$j]]) [[]] $for j, [[p$j##_type p$j]] ]] // The suffix of the class template implementing the action template. $for i [[ $range j 0..i-1 #define GMOCK_INTERNAL_COUNT_AND_$i[[]]_VALUE_PARAMS($for j, [[p$j]]) [[]] $if i==1 [[P]] $elif i>=2 [[P$i]] ]] // The name of the class template implementing the action template. #define GMOCK_ACTION_CLASS_(name, value_params)\ GTEST_CONCAT_TOKEN_(name##Action, GMOCK_INTERNAL_COUNT_##value_params) $range k 0..n-1 #define ACTION_TEMPLATE(name, template_params, value_params)\ template \ class GMOCK_ACTION_CLASS_(name, value_params) {\ public:\ explicit GMOCK_ACTION_CLASS_(name, value_params)\ GMOCK_INTERNAL_INIT_##value_params {}\ template \ class gmock_Impl : public ::testing::ActionInterface {\ public:\ typedef F function_type;\ typedef typename ::testing::internal::Function::Result return_type;\ typedef typename ::testing::internal::Function::ArgumentTuple\ args_type;\ explicit gmock_Impl GMOCK_INTERNAL_INIT_##value_params {}\ virtual return_type Perform(const args_type& args) {\ return ::testing::internal::ActionHelper::\ Perform(this, args);\ }\ template <$for k, [[typename arg$k[[]]_type]]>\ return_type gmock_PerformImpl(const args_type& args[[]] $for k [[, const arg$k[[]]_type& arg$k]]) const;\ GMOCK_INTERNAL_DEFN_##value_params\ private:\ GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ };\ template operator ::testing::Action() const {\ return ::testing::Action(\ new gmock_Impl(GMOCK_INTERNAL_LIST_##value_params));\ }\ GMOCK_INTERNAL_DEFN_##value_params\ private:\ GTEST_DISALLOW_ASSIGN_(GMOCK_ACTION_CLASS_(name, value_params));\ };\ template \ inline GMOCK_ACTION_CLASS_(name, value_params)<\ GMOCK_INTERNAL_LIST_##template_params\ GMOCK_INTERNAL_LIST_TYPE_##value_params> name(\ GMOCK_INTERNAL_DECL_##value_params) {\ return GMOCK_ACTION_CLASS_(name, value_params)<\ GMOCK_INTERNAL_LIST_##template_params\ GMOCK_INTERNAL_LIST_TYPE_##value_params>(\ GMOCK_INTERNAL_LIST_##value_params);\ }\ template \ template \ template \ typename ::testing::internal::Function::Result\ GMOCK_ACTION_CLASS_(name, value_params)<\ GMOCK_INTERNAL_LIST_##template_params\ GMOCK_INTERNAL_LIST_TYPE_##value_params>::gmock_Impl::\ gmock_PerformImpl(\ GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const $for i [[ $var template = [[$if i==0 [[]] $else [[ $range j 0..i-1 template <$for j, [[typename p$j##_type]]>\ ]]]] $var class_name = [[name##Action[[$if i==0 [[]] $elif i==1 [[P]] $else [[P$i]]]]]] $range j 0..i-1 $var ctor_param_list = [[$for j, [[p$j##_type gmock_p$j]]]] $var param_types_and_names = [[$for j, [[p$j##_type p$j]]]] $var inits = [[$if i==0 [[]] $else [[ : $for j, [[p$j(::std::forward(gmock_p$j))]]]]]] $var param_field_decls = [[$for j [[ p$j##_type p$j;\ ]]]] $var param_field_decls2 = [[$for j [[ p$j##_type p$j;\ ]]]] $var params = [[$for j, [[p$j]]]] $var param_types = [[$if i==0 [[]] $else [[<$for j, [[p$j##_type]]>]]]] $var typename_arg_types = [[$for k, [[typename arg$k[[]]_type]]]] $var arg_types_and_names = [[$for k, [[const arg$k[[]]_type& arg$k]]]] $var macro_name = [[$if i==0 [[ACTION]] $elif i==1 [[ACTION_P]] $else [[ACTION_P$i]]]] #define $macro_name(name$for j [[, p$j]])\$template class $class_name {\ public:\ [[$if i==1 [[explicit ]]]]$class_name($ctor_param_list)$inits {}\ template \ class gmock_Impl : public ::testing::ActionInterface {\ public:\ typedef F function_type;\ typedef typename ::testing::internal::Function::Result return_type;\ typedef typename ::testing::internal::Function::ArgumentTuple\ args_type;\ [[$if i==1 [[explicit ]]]]gmock_Impl($ctor_param_list)$inits {}\ virtual return_type Perform(const args_type& args) {\ return ::testing::internal::ActionHelper::\ Perform(this, args);\ }\ template <$typename_arg_types>\ return_type gmock_PerformImpl(const args_type& args, [[]] $arg_types_and_names) const;\$param_field_decls private:\ GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ };\ template operator ::testing::Action() const {\ return ::testing::Action(new gmock_Impl($params));\ }\$param_field_decls2 private:\ GTEST_DISALLOW_ASSIGN_($class_name);\ };\$template inline $class_name$param_types name($param_types_and_names) {\ return $class_name$param_types($params);\ }\$template template \ template <$typename_arg_types>\ typename ::testing::internal::Function::Result\ $class_name$param_types::gmock_Impl::gmock_PerformImpl(\ GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const ]] $$ } // This meta comment fixes auto-indentation in Emacs. It won't $$ // show up in the generated code. namespace testing { // The ACTION*() macros trigger warning C4100 (unreferenced formal // parameter) in MSVC with -W4. Unfortunately they cannot be fixed in // the macro definition, as the warnings are generated when the macro // is expanded and macro expansion cannot contain #pragma. Therefore // we suppress them here. #ifdef _MSC_VER # pragma warning(push) # pragma warning(disable:4100) #endif // Various overloads for InvokeArgument(). // // The InvokeArgument(a1, a2, ..., a_k) action invokes the N-th // (0-based) argument, which must be a k-ary callable, of the mock // function, with arguments a1, a2, ..., a_k. // // Notes: // // 1. The arguments are passed by value by default. If you need to // pass an argument by reference, wrap it inside ByRef(). For // example, // // InvokeArgument<1>(5, string("Hello"), ByRef(foo)) // // passes 5 and string("Hello") by value, and passes foo by // reference. // // 2. If the callable takes an argument by reference but ByRef() is // not used, it will receive the reference to a copy of the value, // instead of the original value. For example, when the 0-th // argument of the mock function takes a const string&, the action // // InvokeArgument<0>(string("Hello")) // // makes a copy of the temporary string("Hello") object and passes a // reference of the copy, instead of the original temporary object, // to the callable. This makes it easy for a user to define an // InvokeArgument action from temporary values and have it performed // later. namespace internal { namespace invoke_argument { // Appears in InvokeArgumentAdl's argument list to help avoid // accidental calls to user functions of the same name. struct AdlTag {}; // InvokeArgumentAdl - a helper for InvokeArgument. // The basic overloads are provided here for generic functors. // Overloads for other custom-callables are provided in the // internal/custom/callback-actions.h header. $range i 0..n $for i [[ $range j 1..i template R InvokeArgumentAdl(AdlTag, F f[[$for j [[, A$j a$j]]]]) { return f([[$for j, [[a$j]]]]); } ]] } // namespace invoke_argument } // namespace internal $range i 0..n $for i [[ $range j 0..i-1 ACTION_TEMPLATE(InvokeArgument, HAS_1_TEMPLATE_PARAMS(int, k), AND_$i[[]]_VALUE_PARAMS($for j, [[p$j]])) { using internal::invoke_argument::InvokeArgumentAdl; return InvokeArgumentAdl( internal::invoke_argument::AdlTag(), ::std::get(args)$for j [[, p$j]]); } ]] // Various overloads for ReturnNew(). // // The ReturnNew(a1, a2, ..., a_k) action returns a pointer to a new // instance of type T, constructed on the heap with constructor arguments // a1, a2, ..., and a_k. The caller assumes ownership of the returned value. $range i 0..n $for i [[ $range j 0..i-1 $var ps = [[$for j, [[p$j]]]] ACTION_TEMPLATE(ReturnNew, HAS_1_TEMPLATE_PARAMS(typename, T), AND_$i[[]]_VALUE_PARAMS($ps)) { return new T($ps); } ]] #ifdef _MSC_VER # pragma warning(pop) #endif } // namespace testing // Include any custom callback actions added by the local installation. // We must include this header at the end to make sure it can use the // declarations from this file. #include "gmock/internal/custom/gmock-generated-actions.h" #endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/include/gmock/gmock-generated-function-mockers.h000066400000000000000000001142211456444476200335670ustar00rootroot00000000000000// This file was GENERATED by command: // pump.py gmock-generated-function-mockers.h.pump // DO NOT EDIT BY HAND!!! // Copyright 2007, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Google Mock - a framework for writing C++ mock classes. // // This file implements function mockers of various arities. // GOOGLETEST_CM0002 DO NOT DELETE #ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ #define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ #include #include #include "gmock/gmock-spec-builders.h" #include "gmock/internal/gmock-internal-utils.h" namespace testing { namespace internal { // Removes the given pointer; this is a helper for the expectation setter method // for parameterless matchers. // // We want to make sure that the user cannot set a parameterless expectation on // overloaded methods, including methods which are overloaded on const. Example: // // class MockClass { // MOCK_METHOD0(GetName, string&()); // MOCK_CONST_METHOD0(GetName, const string&()); // }; // // TEST() { // // This should be an error, as it's not clear which overload is expected. // EXPECT_CALL(mock, GetName).WillOnce(ReturnRef(value)); // } // // Here are the generated expectation-setter methods: // // class MockClass { // // Overload 1 // MockSpec gmock_GetName() { ... } // // Overload 2. Declared const so that the compiler will generate an // // error when trying to resolve between this and overload 4 in // // 'gmock_GetName(WithoutMatchers(), nullptr)'. // MockSpec gmock_GetName( // const WithoutMatchers&, const Function*) const { // // Removes const from this, calls overload 1 // return AdjustConstness_(this)->gmock_GetName(); // } // // // Overload 3 // const string& gmock_GetName() const { ... } // // Overload 4 // MockSpec gmock_GetName( // const WithoutMatchers&, const Function*) const { // // Does not remove const, calls overload 3 // return AdjustConstness_const(this)->gmock_GetName(); // } // } // template const MockType* AdjustConstness_const(const MockType* mock) { return mock; } // Removes const from and returns the given pointer; this is a helper for the // expectation setter method for parameterless matchers. template MockType* AdjustConstness_(const MockType* mock) { return const_cast(mock); } } // namespace internal // The style guide prohibits "using" statements in a namespace scope // inside a header file. However, the FunctionMocker class template // is meant to be defined in the ::testing namespace. The following // line is just a trick for working around a bug in MSVC 8.0, which // cannot handle it if we define FunctionMocker in ::testing. using internal::FunctionMocker; // GMOCK_RESULT_(tn, F) expands to the result type of function type F. // We define this as a variadic macro in case F contains unprotected // commas (the same reason that we use variadic macros in other places // in this file). // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_RESULT_(tn, ...) \ tn ::testing::internal::Function<__VA_ARGS__>::Result // The type of argument N of the given function type. // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_ARG_(tn, N, ...) \ tn ::testing::internal::Function<__VA_ARGS__>::template Arg::type // The matcher type for argument N of the given function type. // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_MATCHER_(tn, N, ...) \ const ::testing::Matcher& // The variable for mocking the given method. // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_MOCKER_(arity, constness, Method) \ GTEST_CONCAT_TOKEN_(gmock##constness##arity##_##Method##_, __LINE__) // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_METHOD0_(tn, constness, ct, Method, ...) \ static_assert(0 == \ ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ "MOCK_METHOD must match argument count.");\ GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ ) constness { \ GMOCK_MOCKER_(0, constness, Method).SetOwnerAndName(this, #Method); \ return GMOCK_MOCKER_(0, constness, Method).Invoke(); \ } \ ::testing::MockSpec<__VA_ARGS__> \ gmock_##Method() constness { \ GMOCK_MOCKER_(0, constness, Method).RegisterOwner(this); \ return GMOCK_MOCKER_(0, constness, Method).With(); \ } \ ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ const ::testing::internal::WithoutMatchers&, \ constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ return ::testing::internal::AdjustConstness_##constness(this)-> \ gmock_##Method(); \ } \ mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(0, constness, \ Method) // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_METHOD1_(tn, constness, ct, Method, ...) \ static_assert(1 == \ ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ "MOCK_METHOD must match argument count.");\ GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1) constness { \ GMOCK_MOCKER_(1, constness, Method).SetOwnerAndName(this, #Method); \ return GMOCK_MOCKER_(1, constness, \ Method).Invoke(::std::forward(gmock_a1)); \ } \ ::testing::MockSpec<__VA_ARGS__> \ gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1) constness { \ GMOCK_MOCKER_(1, constness, Method).RegisterOwner(this); \ return GMOCK_MOCKER_(1, constness, Method).With(gmock_a1); \ } \ ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ const ::testing::internal::WithoutMatchers&, \ constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ return ::testing::internal::AdjustConstness_##constness(this)-> \ gmock_##Method(::testing::A()); \ } \ mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(1, constness, \ Method) // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_METHOD2_(tn, constness, ct, Method, ...) \ static_assert(2 == \ ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ "MOCK_METHOD must match argument count.");\ GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ __VA_ARGS__) gmock_a2) constness { \ GMOCK_MOCKER_(2, constness, Method).SetOwnerAndName(this, #Method); \ return GMOCK_MOCKER_(2, constness, \ Method).Invoke(::std::forward(gmock_a1), \ ::std::forward(gmock_a2)); \ } \ ::testing::MockSpec<__VA_ARGS__> \ gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2) constness { \ GMOCK_MOCKER_(2, constness, Method).RegisterOwner(this); \ return GMOCK_MOCKER_(2, constness, Method).With(gmock_a1, gmock_a2); \ } \ ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ const ::testing::internal::WithoutMatchers&, \ constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ return ::testing::internal::AdjustConstness_##constness(this)-> \ gmock_##Method(::testing::A(), \ ::testing::A()); \ } \ mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(2, constness, \ Method) // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_METHOD3_(tn, constness, ct, Method, ...) \ static_assert(3 == \ ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ "MOCK_METHOD must match argument count.");\ GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ __VA_ARGS__) gmock_a2, GMOCK_ARG_(tn, 3, \ __VA_ARGS__) gmock_a3) constness { \ GMOCK_MOCKER_(3, constness, Method).SetOwnerAndName(this, #Method); \ return GMOCK_MOCKER_(3, constness, \ Method).Invoke(::std::forward(gmock_a1), \ ::std::forward(gmock_a2), \ ::std::forward(gmock_a3)); \ } \ ::testing::MockSpec<__VA_ARGS__> \ gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3) constness { \ GMOCK_MOCKER_(3, constness, Method).RegisterOwner(this); \ return GMOCK_MOCKER_(3, constness, Method).With(gmock_a1, gmock_a2, \ gmock_a3); \ } \ ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ const ::testing::internal::WithoutMatchers&, \ constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ return ::testing::internal::AdjustConstness_##constness(this)-> \ gmock_##Method(::testing::A(), \ ::testing::A(), \ ::testing::A()); \ } \ mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(3, constness, \ Method) // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_METHOD4_(tn, constness, ct, Method, ...) \ static_assert(4 == \ ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ "MOCK_METHOD must match argument count.");\ GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ __VA_ARGS__) gmock_a2, GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4) constness { \ GMOCK_MOCKER_(4, constness, Method).SetOwnerAndName(this, #Method); \ return GMOCK_MOCKER_(4, constness, \ Method).Invoke(::std::forward(gmock_a1), \ ::std::forward(gmock_a2), \ ::std::forward(gmock_a3), \ ::std::forward(gmock_a4)); \ } \ ::testing::MockSpec<__VA_ARGS__> \ gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4) constness { \ GMOCK_MOCKER_(4, constness, Method).RegisterOwner(this); \ return GMOCK_MOCKER_(4, constness, Method).With(gmock_a1, gmock_a2, \ gmock_a3, gmock_a4); \ } \ ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ const ::testing::internal::WithoutMatchers&, \ constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ return ::testing::internal::AdjustConstness_##constness(this)-> \ gmock_##Method(::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A()); \ } \ mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(4, constness, \ Method) // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_METHOD5_(tn, constness, ct, Method, ...) \ static_assert(5 == \ ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ "MOCK_METHOD must match argument count.");\ GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ __VA_ARGS__) gmock_a2, GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, GMOCK_ARG_(tn, 5, \ __VA_ARGS__) gmock_a5) constness { \ GMOCK_MOCKER_(5, constness, Method).SetOwnerAndName(this, #Method); \ return GMOCK_MOCKER_(5, constness, \ Method).Invoke(::std::forward(gmock_a1), \ ::std::forward(gmock_a2), \ ::std::forward(gmock_a3), \ ::std::forward(gmock_a4), \ ::std::forward(gmock_a5)); \ } \ ::testing::MockSpec<__VA_ARGS__> \ gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5) constness { \ GMOCK_MOCKER_(5, constness, Method).RegisterOwner(this); \ return GMOCK_MOCKER_(5, constness, Method).With(gmock_a1, gmock_a2, \ gmock_a3, gmock_a4, gmock_a5); \ } \ ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ const ::testing::internal::WithoutMatchers&, \ constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ return ::testing::internal::AdjustConstness_##constness(this)-> \ gmock_##Method(::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A()); \ } \ mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(5, constness, \ Method) // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_METHOD6_(tn, constness, ct, Method, ...) \ static_assert(6 == \ ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ "MOCK_METHOD must match argument count.");\ GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ __VA_ARGS__) gmock_a2, GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, GMOCK_ARG_(tn, 5, \ __VA_ARGS__) gmock_a5, GMOCK_ARG_(tn, 6, \ __VA_ARGS__) gmock_a6) constness { \ GMOCK_MOCKER_(6, constness, Method).SetOwnerAndName(this, #Method); \ return GMOCK_MOCKER_(6, constness, \ Method).Invoke(::std::forward(gmock_a1), \ ::std::forward(gmock_a2), \ ::std::forward(gmock_a3), \ ::std::forward(gmock_a4), \ ::std::forward(gmock_a5), \ ::std::forward(gmock_a6)); \ } \ ::testing::MockSpec<__VA_ARGS__> \ gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5, \ GMOCK_MATCHER_(tn, 6, __VA_ARGS__) gmock_a6) constness { \ GMOCK_MOCKER_(6, constness, Method).RegisterOwner(this); \ return GMOCK_MOCKER_(6, constness, Method).With(gmock_a1, gmock_a2, \ gmock_a3, gmock_a4, gmock_a5, gmock_a6); \ } \ ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ const ::testing::internal::WithoutMatchers&, \ constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ return ::testing::internal::AdjustConstness_##constness(this)-> \ gmock_##Method(::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A()); \ } \ mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(6, constness, \ Method) // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_METHOD7_(tn, constness, ct, Method, ...) \ static_assert(7 == \ ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ "MOCK_METHOD must match argument count.");\ GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ __VA_ARGS__) gmock_a2, GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, GMOCK_ARG_(tn, 5, \ __VA_ARGS__) gmock_a5, GMOCK_ARG_(tn, 6, __VA_ARGS__) gmock_a6, \ GMOCK_ARG_(tn, 7, __VA_ARGS__) gmock_a7) constness { \ GMOCK_MOCKER_(7, constness, Method).SetOwnerAndName(this, #Method); \ return GMOCK_MOCKER_(7, constness, \ Method).Invoke(::std::forward(gmock_a1), \ ::std::forward(gmock_a2), \ ::std::forward(gmock_a3), \ ::std::forward(gmock_a4), \ ::std::forward(gmock_a5), \ ::std::forward(gmock_a6), \ ::std::forward(gmock_a7)); \ } \ ::testing::MockSpec<__VA_ARGS__> \ gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5, \ GMOCK_MATCHER_(tn, 6, __VA_ARGS__) gmock_a6, \ GMOCK_MATCHER_(tn, 7, __VA_ARGS__) gmock_a7) constness { \ GMOCK_MOCKER_(7, constness, Method).RegisterOwner(this); \ return GMOCK_MOCKER_(7, constness, Method).With(gmock_a1, gmock_a2, \ gmock_a3, gmock_a4, gmock_a5, gmock_a6, gmock_a7); \ } \ ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ const ::testing::internal::WithoutMatchers&, \ constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ return ::testing::internal::AdjustConstness_##constness(this)-> \ gmock_##Method(::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A()); \ } \ mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(7, constness, \ Method) // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_METHOD8_(tn, constness, ct, Method, ...) \ static_assert(8 == \ ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ "MOCK_METHOD must match argument count.");\ GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ __VA_ARGS__) gmock_a2, GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, GMOCK_ARG_(tn, 5, \ __VA_ARGS__) gmock_a5, GMOCK_ARG_(tn, 6, __VA_ARGS__) gmock_a6, \ GMOCK_ARG_(tn, 7, __VA_ARGS__) gmock_a7, GMOCK_ARG_(tn, 8, \ __VA_ARGS__) gmock_a8) constness { \ GMOCK_MOCKER_(8, constness, Method).SetOwnerAndName(this, #Method); \ return GMOCK_MOCKER_(8, constness, \ Method).Invoke(::std::forward(gmock_a1), \ ::std::forward(gmock_a2), \ ::std::forward(gmock_a3), \ ::std::forward(gmock_a4), \ ::std::forward(gmock_a5), \ ::std::forward(gmock_a6), \ ::std::forward(gmock_a7), \ ::std::forward(gmock_a8)); \ } \ ::testing::MockSpec<__VA_ARGS__> \ gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5, \ GMOCK_MATCHER_(tn, 6, __VA_ARGS__) gmock_a6, \ GMOCK_MATCHER_(tn, 7, __VA_ARGS__) gmock_a7, \ GMOCK_MATCHER_(tn, 8, __VA_ARGS__) gmock_a8) constness { \ GMOCK_MOCKER_(8, constness, Method).RegisterOwner(this); \ return GMOCK_MOCKER_(8, constness, Method).With(gmock_a1, gmock_a2, \ gmock_a3, gmock_a4, gmock_a5, gmock_a6, gmock_a7, gmock_a8); \ } \ ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ const ::testing::internal::WithoutMatchers&, \ constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ return ::testing::internal::AdjustConstness_##constness(this)-> \ gmock_##Method(::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A()); \ } \ mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(8, constness, \ Method) // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_METHOD9_(tn, constness, ct, Method, ...) \ static_assert(9 == \ ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ "MOCK_METHOD must match argument count.");\ GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ __VA_ARGS__) gmock_a2, GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, GMOCK_ARG_(tn, 5, \ __VA_ARGS__) gmock_a5, GMOCK_ARG_(tn, 6, __VA_ARGS__) gmock_a6, \ GMOCK_ARG_(tn, 7, __VA_ARGS__) gmock_a7, GMOCK_ARG_(tn, 8, \ __VA_ARGS__) gmock_a8, GMOCK_ARG_(tn, 9, \ __VA_ARGS__) gmock_a9) constness { \ GMOCK_MOCKER_(9, constness, Method).SetOwnerAndName(this, #Method); \ return GMOCK_MOCKER_(9, constness, \ Method).Invoke(::std::forward(gmock_a1), \ ::std::forward(gmock_a2), \ ::std::forward(gmock_a3), \ ::std::forward(gmock_a4), \ ::std::forward(gmock_a5), \ ::std::forward(gmock_a6), \ ::std::forward(gmock_a7), \ ::std::forward(gmock_a8), \ ::std::forward(gmock_a9)); \ } \ ::testing::MockSpec<__VA_ARGS__> \ gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5, \ GMOCK_MATCHER_(tn, 6, __VA_ARGS__) gmock_a6, \ GMOCK_MATCHER_(tn, 7, __VA_ARGS__) gmock_a7, \ GMOCK_MATCHER_(tn, 8, __VA_ARGS__) gmock_a8, \ GMOCK_MATCHER_(tn, 9, __VA_ARGS__) gmock_a9) constness { \ GMOCK_MOCKER_(9, constness, Method).RegisterOwner(this); \ return GMOCK_MOCKER_(9, constness, Method).With(gmock_a1, gmock_a2, \ gmock_a3, gmock_a4, gmock_a5, gmock_a6, gmock_a7, gmock_a8, \ gmock_a9); \ } \ ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ const ::testing::internal::WithoutMatchers&, \ constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ return ::testing::internal::AdjustConstness_##constness(this)-> \ gmock_##Method(::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A()); \ } \ mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(9, constness, \ Method) // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_METHOD10_(tn, constness, ct, Method, ...) \ static_assert(10 == \ ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ "MOCK_METHOD must match argument count.");\ GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ __VA_ARGS__) gmock_a2, GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, GMOCK_ARG_(tn, 5, \ __VA_ARGS__) gmock_a5, GMOCK_ARG_(tn, 6, __VA_ARGS__) gmock_a6, \ GMOCK_ARG_(tn, 7, __VA_ARGS__) gmock_a7, GMOCK_ARG_(tn, 8, \ __VA_ARGS__) gmock_a8, GMOCK_ARG_(tn, 9, __VA_ARGS__) gmock_a9, \ GMOCK_ARG_(tn, 10, __VA_ARGS__) gmock_a10) constness { \ GMOCK_MOCKER_(10, constness, Method).SetOwnerAndName(this, #Method); \ return GMOCK_MOCKER_(10, constness, \ Method).Invoke(::std::forward(gmock_a1), \ ::std::forward(gmock_a2), \ ::std::forward(gmock_a3), \ ::std::forward(gmock_a4), \ ::std::forward(gmock_a5), \ ::std::forward(gmock_a6), \ ::std::forward(gmock_a7), \ ::std::forward(gmock_a8), \ ::std::forward(gmock_a9), \ ::std::forward(gmock_a10)); \ } \ ::testing::MockSpec<__VA_ARGS__> \ gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5, \ GMOCK_MATCHER_(tn, 6, __VA_ARGS__) gmock_a6, \ GMOCK_MATCHER_(tn, 7, __VA_ARGS__) gmock_a7, \ GMOCK_MATCHER_(tn, 8, __VA_ARGS__) gmock_a8, \ GMOCK_MATCHER_(tn, 9, __VA_ARGS__) gmock_a9, \ GMOCK_MATCHER_(tn, 10, \ __VA_ARGS__) gmock_a10) constness { \ GMOCK_MOCKER_(10, constness, Method).RegisterOwner(this); \ return GMOCK_MOCKER_(10, constness, Method).With(gmock_a1, gmock_a2, \ gmock_a3, gmock_a4, gmock_a5, gmock_a6, gmock_a7, gmock_a8, gmock_a9, \ gmock_a10); \ } \ ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ const ::testing::internal::WithoutMatchers&, \ constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ return ::testing::internal::AdjustConstness_##constness(this)-> \ gmock_##Method(::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A(), \ ::testing::A()); \ } \ mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(10, constness, \ Method) #define MOCK_METHOD0(m, ...) GMOCK_METHOD0_(, , , m, __VA_ARGS__) #define MOCK_METHOD1(m, ...) GMOCK_METHOD1_(, , , m, __VA_ARGS__) #define MOCK_METHOD2(m, ...) GMOCK_METHOD2_(, , , m, __VA_ARGS__) #define MOCK_METHOD3(m, ...) GMOCK_METHOD3_(, , , m, __VA_ARGS__) #define MOCK_METHOD4(m, ...) GMOCK_METHOD4_(, , , m, __VA_ARGS__) #define MOCK_METHOD5(m, ...) GMOCK_METHOD5_(, , , m, __VA_ARGS__) #define MOCK_METHOD6(m, ...) GMOCK_METHOD6_(, , , m, __VA_ARGS__) #define MOCK_METHOD7(m, ...) GMOCK_METHOD7_(, , , m, __VA_ARGS__) #define MOCK_METHOD8(m, ...) GMOCK_METHOD8_(, , , m, __VA_ARGS__) #define MOCK_METHOD9(m, ...) GMOCK_METHOD9_(, , , m, __VA_ARGS__) #define MOCK_METHOD10(m, ...) GMOCK_METHOD10_(, , , m, __VA_ARGS__) #define MOCK_CONST_METHOD0(m, ...) GMOCK_METHOD0_(, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD1(m, ...) GMOCK_METHOD1_(, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD2(m, ...) GMOCK_METHOD2_(, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD3(m, ...) GMOCK_METHOD3_(, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD4(m, ...) GMOCK_METHOD4_(, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD5(m, ...) GMOCK_METHOD5_(, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD6(m, ...) GMOCK_METHOD6_(, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD7(m, ...) GMOCK_METHOD7_(, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD8(m, ...) GMOCK_METHOD8_(, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD9(m, ...) GMOCK_METHOD9_(, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD10(m, ...) GMOCK_METHOD10_(, const, , m, __VA_ARGS__) #define MOCK_METHOD0_T(m, ...) GMOCK_METHOD0_(typename, , , m, __VA_ARGS__) #define MOCK_METHOD1_T(m, ...) GMOCK_METHOD1_(typename, , , m, __VA_ARGS__) #define MOCK_METHOD2_T(m, ...) GMOCK_METHOD2_(typename, , , m, __VA_ARGS__) #define MOCK_METHOD3_T(m, ...) GMOCK_METHOD3_(typename, , , m, __VA_ARGS__) #define MOCK_METHOD4_T(m, ...) GMOCK_METHOD4_(typename, , , m, __VA_ARGS__) #define MOCK_METHOD5_T(m, ...) GMOCK_METHOD5_(typename, , , m, __VA_ARGS__) #define MOCK_METHOD6_T(m, ...) GMOCK_METHOD6_(typename, , , m, __VA_ARGS__) #define MOCK_METHOD7_T(m, ...) GMOCK_METHOD7_(typename, , , m, __VA_ARGS__) #define MOCK_METHOD8_T(m, ...) GMOCK_METHOD8_(typename, , , m, __VA_ARGS__) #define MOCK_METHOD9_T(m, ...) GMOCK_METHOD9_(typename, , , m, __VA_ARGS__) #define MOCK_METHOD10_T(m, ...) GMOCK_METHOD10_(typename, , , m, __VA_ARGS__) #define MOCK_CONST_METHOD0_T(m, ...) \ GMOCK_METHOD0_(typename, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD1_T(m, ...) \ GMOCK_METHOD1_(typename, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD2_T(m, ...) \ GMOCK_METHOD2_(typename, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD3_T(m, ...) \ GMOCK_METHOD3_(typename, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD4_T(m, ...) \ GMOCK_METHOD4_(typename, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD5_T(m, ...) \ GMOCK_METHOD5_(typename, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD6_T(m, ...) \ GMOCK_METHOD6_(typename, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD7_T(m, ...) \ GMOCK_METHOD7_(typename, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD8_T(m, ...) \ GMOCK_METHOD8_(typename, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD9_T(m, ...) \ GMOCK_METHOD9_(typename, const, , m, __VA_ARGS__) #define MOCK_CONST_METHOD10_T(m, ...) \ GMOCK_METHOD10_(typename, const, , m, __VA_ARGS__) #define MOCK_METHOD0_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD0_(, , ct, m, __VA_ARGS__) #define MOCK_METHOD1_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD1_(, , ct, m, __VA_ARGS__) #define MOCK_METHOD2_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD2_(, , ct, m, __VA_ARGS__) #define MOCK_METHOD3_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD3_(, , ct, m, __VA_ARGS__) #define MOCK_METHOD4_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD4_(, , ct, m, __VA_ARGS__) #define MOCK_METHOD5_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD5_(, , ct, m, __VA_ARGS__) #define MOCK_METHOD6_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD6_(, , ct, m, __VA_ARGS__) #define MOCK_METHOD7_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD7_(, , ct, m, __VA_ARGS__) #define MOCK_METHOD8_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD8_(, , ct, m, __VA_ARGS__) #define MOCK_METHOD9_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD9_(, , ct, m, __VA_ARGS__) #define MOCK_METHOD10_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD10_(, , ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD0_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD0_(, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD1_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD1_(, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD2_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD2_(, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD3_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD3_(, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD4_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD4_(, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD5_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD5_(, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD6_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD6_(, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD7_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD7_(, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD8_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD8_(, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD9_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD9_(, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD10_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD10_(, const, ct, m, __VA_ARGS__) #define MOCK_METHOD0_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD0_(typename, , ct, m, __VA_ARGS__) #define MOCK_METHOD1_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD1_(typename, , ct, m, __VA_ARGS__) #define MOCK_METHOD2_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD2_(typename, , ct, m, __VA_ARGS__) #define MOCK_METHOD3_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD3_(typename, , ct, m, __VA_ARGS__) #define MOCK_METHOD4_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD4_(typename, , ct, m, __VA_ARGS__) #define MOCK_METHOD5_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD5_(typename, , ct, m, __VA_ARGS__) #define MOCK_METHOD6_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD6_(typename, , ct, m, __VA_ARGS__) #define MOCK_METHOD7_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD7_(typename, , ct, m, __VA_ARGS__) #define MOCK_METHOD8_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD8_(typename, , ct, m, __VA_ARGS__) #define MOCK_METHOD9_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD9_(typename, , ct, m, __VA_ARGS__) #define MOCK_METHOD10_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD10_(typename, , ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD0_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD0_(typename, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD1_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD1_(typename, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD2_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD2_(typename, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD3_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD3_(typename, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD4_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD4_(typename, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD5_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD5_(typename, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD6_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD6_(typename, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD7_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD7_(typename, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD8_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD8_(typename, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD9_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD9_(typename, const, ct, m, __VA_ARGS__) #define MOCK_CONST_METHOD10_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD10_(typename, const, ct, m, __VA_ARGS__) } // namespace testing #endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ gmock-generated-function-mockers.h.pump000066400000000000000000000176151456444476200345010ustar00rootroot00000000000000LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/include/gmock$$ -*- mode: c++; -*- $$ This is a Pump source file. Please use Pump to convert $$ it to gmock-generated-function-mockers.h. $$ $var n = 10 $$ The maximum arity we support. // Copyright 2007, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Google Mock - a framework for writing C++ mock classes. // // This file implements function mockers of various arities. // GOOGLETEST_CM0002 DO NOT DELETE #ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ #define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ #include #include #include "gmock/gmock-spec-builders.h" #include "gmock/internal/gmock-internal-utils.h" namespace testing { namespace internal { $range i 0..n // Removes the given pointer; this is a helper for the expectation setter method // for parameterless matchers. // // We want to make sure that the user cannot set a parameterless expectation on // overloaded methods, including methods which are overloaded on const. Example: // // class MockClass { // MOCK_METHOD0(GetName, string&()); // MOCK_CONST_METHOD0(GetName, const string&()); // }; // // TEST() { // // This should be an error, as it's not clear which overload is expected. // EXPECT_CALL(mock, GetName).WillOnce(ReturnRef(value)); // } // // Here are the generated expectation-setter methods: // // class MockClass { // // Overload 1 // MockSpec gmock_GetName() { ... } // // Overload 2. Declared const so that the compiler will generate an // // error when trying to resolve between this and overload 4 in // // 'gmock_GetName(WithoutMatchers(), nullptr)'. // MockSpec gmock_GetName( // const WithoutMatchers&, const Function*) const { // // Removes const from this, calls overload 1 // return AdjustConstness_(this)->gmock_GetName(); // } // // // Overload 3 // const string& gmock_GetName() const { ... } // // Overload 4 // MockSpec gmock_GetName( // const WithoutMatchers&, const Function*) const { // // Does not remove const, calls overload 3 // return AdjustConstness_const(this)->gmock_GetName(); // } // } // template const MockType* AdjustConstness_const(const MockType* mock) { return mock; } // Removes const from and returns the given pointer; this is a helper for the // expectation setter method for parameterless matchers. template MockType* AdjustConstness_(const MockType* mock) { return const_cast(mock); } } // namespace internal // The style guide prohibits "using" statements in a namespace scope // inside a header file. However, the FunctionMocker class template // is meant to be defined in the ::testing namespace. The following // line is just a trick for working around a bug in MSVC 8.0, which // cannot handle it if we define FunctionMocker in ::testing. using internal::FunctionMocker; // GMOCK_RESULT_(tn, F) expands to the result type of function type F. // We define this as a variadic macro in case F contains unprotected // commas (the same reason that we use variadic macros in other places // in this file). // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_RESULT_(tn, ...) \ tn ::testing::internal::Function<__VA_ARGS__>::Result // The type of argument N of the given function type. // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_ARG_(tn, N, ...) \ tn ::testing::internal::Function<__VA_ARGS__>::template Arg::type // The matcher type for argument N of the given function type. // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_MATCHER_(tn, N, ...) \ const ::testing::Matcher& // The variable for mocking the given method. // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_MOCKER_(arity, constness, Method) \ GTEST_CONCAT_TOKEN_(gmock##constness##arity##_##Method##_, __LINE__) $for i [[ $range j 1..i $var arg_as = [[$for j, [[GMOCK_ARG_(tn, $j, __VA_ARGS__) gmock_a$j]]]] $var as = [[$for j, \ [[::std::forward(gmock_a$j)]]]] $var matcher_arg_as = [[$for j, \ [[GMOCK_MATCHER_(tn, $j, __VA_ARGS__) gmock_a$j]]]] $var matcher_as = [[$for j, [[gmock_a$j]]]] $var anything_matchers = [[$for j, \ [[::testing::A()]]]] // INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! #define GMOCK_METHOD$i[[]]_(tn, constness, ct, Method, ...) \ static_assert($i == ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, "MOCK_METHOD must match argument count.");\ GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ $arg_as) constness { \ GMOCK_MOCKER_($i, constness, Method).SetOwnerAndName(this, #Method); \ return GMOCK_MOCKER_($i, constness, Method).Invoke($as); \ } \ ::testing::MockSpec<__VA_ARGS__> \ gmock_##Method($matcher_arg_as) constness { \ GMOCK_MOCKER_($i, constness, Method).RegisterOwner(this); \ return GMOCK_MOCKER_($i, constness, Method).With($matcher_as); \ } \ ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ const ::testing::internal::WithoutMatchers&, \ constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ return ::testing::internal::AdjustConstness_##constness(this)-> \ gmock_##Method($anything_matchers); \ } \ mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_($i, constness, Method) ]] $for i [[ #define MOCK_METHOD$i(m, ...) GMOCK_METHOD$i[[]]_(, , , m, __VA_ARGS__) ]] $for i [[ #define MOCK_CONST_METHOD$i(m, ...) GMOCK_METHOD$i[[]]_(, const, , m, __VA_ARGS__) ]] $for i [[ #define MOCK_METHOD$i[[]]_T(m, ...) GMOCK_METHOD$i[[]]_(typename, , , m, __VA_ARGS__) ]] $for i [[ #define MOCK_CONST_METHOD$i[[]]_T(m, ...) \ GMOCK_METHOD$i[[]]_(typename, const, , m, __VA_ARGS__) ]] $for i [[ #define MOCK_METHOD$i[[]]_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD$i[[]]_(, , ct, m, __VA_ARGS__) ]] $for i [[ #define MOCK_CONST_METHOD$i[[]]_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD$i[[]]_(, const, ct, m, __VA_ARGS__) ]] $for i [[ #define MOCK_METHOD$i[[]]_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD$i[[]]_(typename, , ct, m, __VA_ARGS__) ]] $for i [[ #define MOCK_CONST_METHOD$i[[]]_T_WITH_CALLTYPE(ct, m, ...) \ GMOCK_METHOD$i[[]]_(typename, const, ct, m, __VA_ARGS__) ]] } // namespace testing #endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/include/gmock/gmock-generated-matchers.h000066400000000000000000001310141456444476200321060ustar00rootroot00000000000000// This file was GENERATED by command: // pump.py gmock-generated-matchers.h.pump // DO NOT EDIT BY HAND!!! // Copyright 2008, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Google Mock - a framework for writing C++ mock classes. // // This file implements some commonly used variadic matchers. // GOOGLETEST_CM0002 DO NOT DELETE #ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ #define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ #include #include #include #include #include #include "gmock/gmock-matchers.h" // The MATCHER* family of macros can be used in a namespace scope to // define custom matchers easily. // // Basic Usage // =========== // // The syntax // // MATCHER(name, description_string) { statements; } // // defines a matcher with the given name that executes the statements, // which must return a bool to indicate if the match succeeds. Inside // the statements, you can refer to the value being matched by 'arg', // and refer to its type by 'arg_type'. // // The description string documents what the matcher does, and is used // to generate the failure message when the match fails. Since a // MATCHER() is usually defined in a header file shared by multiple // C++ source files, we require the description to be a C-string // literal to avoid possible side effects. It can be empty, in which // case we'll use the sequence of words in the matcher name as the // description. // // For example: // // MATCHER(IsEven, "") { return (arg % 2) == 0; } // // allows you to write // // // Expects mock_foo.Bar(n) to be called where n is even. // EXPECT_CALL(mock_foo, Bar(IsEven())); // // or, // // // Verifies that the value of some_expression is even. // EXPECT_THAT(some_expression, IsEven()); // // If the above assertion fails, it will print something like: // // Value of: some_expression // Expected: is even // Actual: 7 // // where the description "is even" is automatically calculated from the // matcher name IsEven. // // Argument Type // ============= // // Note that the type of the value being matched (arg_type) is // determined by the context in which you use the matcher and is // supplied to you by the compiler, so you don't need to worry about // declaring it (nor can you). This allows the matcher to be // polymorphic. For example, IsEven() can be used to match any type // where the value of "(arg % 2) == 0" can be implicitly converted to // a bool. In the "Bar(IsEven())" example above, if method Bar() // takes an int, 'arg_type' will be int; if it takes an unsigned long, // 'arg_type' will be unsigned long; and so on. // // Parameterizing Matchers // ======================= // // Sometimes you'll want to parameterize the matcher. For that you // can use another macro: // // MATCHER_P(name, param_name, description_string) { statements; } // // For example: // // MATCHER_P(HasAbsoluteValue, value, "") { return abs(arg) == value; } // // will allow you to write: // // EXPECT_THAT(Blah("a"), HasAbsoluteValue(n)); // // which may lead to this message (assuming n is 10): // // Value of: Blah("a") // Expected: has absolute value 10 // Actual: -9 // // Note that both the matcher description and its parameter are // printed, making the message human-friendly. // // In the matcher definition body, you can write 'foo_type' to // reference the type of a parameter named 'foo'. For example, in the // body of MATCHER_P(HasAbsoluteValue, value) above, you can write // 'value_type' to refer to the type of 'value'. // // We also provide MATCHER_P2, MATCHER_P3, ..., up to MATCHER_P10 to // support multi-parameter matchers. // // Describing Parameterized Matchers // ================================= // // The last argument to MATCHER*() is a string-typed expression. The // expression can reference all of the matcher's parameters and a // special bool-typed variable named 'negation'. When 'negation' is // false, the expression should evaluate to the matcher's description; // otherwise it should evaluate to the description of the negation of // the matcher. For example, // // using testing::PrintToString; // // MATCHER_P2(InClosedRange, low, hi, // std::string(negation ? "is not" : "is") + " in range [" + // PrintToString(low) + ", " + PrintToString(hi) + "]") { // return low <= arg && arg <= hi; // } // ... // EXPECT_THAT(3, InClosedRange(4, 6)); // EXPECT_THAT(3, Not(InClosedRange(2, 4))); // // would generate two failures that contain the text: // // Expected: is in range [4, 6] // ... // Expected: is not in range [2, 4] // // If you specify "" as the description, the failure message will // contain the sequence of words in the matcher name followed by the // parameter values printed as a tuple. For example, // // MATCHER_P2(InClosedRange, low, hi, "") { ... } // ... // EXPECT_THAT(3, InClosedRange(4, 6)); // EXPECT_THAT(3, Not(InClosedRange(2, 4))); // // would generate two failures that contain the text: // // Expected: in closed range (4, 6) // ... // Expected: not (in closed range (2, 4)) // // Types of Matcher Parameters // =========================== // // For the purpose of typing, you can view // // MATCHER_Pk(Foo, p1, ..., pk, description_string) { ... } // // as shorthand for // // template // FooMatcherPk // Foo(p1_type p1, ..., pk_type pk) { ... } // // When you write Foo(v1, ..., vk), the compiler infers the types of // the parameters v1, ..., and vk for you. If you are not happy with // the result of the type inference, you can specify the types by // explicitly instantiating the template, as in Foo(5, // false). As said earlier, you don't get to (or need to) specify // 'arg_type' as that's determined by the context in which the matcher // is used. You can assign the result of expression Foo(p1, ..., pk) // to a variable of type FooMatcherPk. This // can be useful when composing matchers. // // While you can instantiate a matcher template with reference types, // passing the parameters by pointer usually makes your code more // readable. If, however, you still want to pass a parameter by // reference, be aware that in the failure message generated by the // matcher you will see the value of the referenced object but not its // address. // // Explaining Match Results // ======================== // // Sometimes the matcher description alone isn't enough to explain why // the match has failed or succeeded. For example, when expecting a // long string, it can be very helpful to also print the diff between // the expected string and the actual one. To achieve that, you can // optionally stream additional information to a special variable // named result_listener, whose type is a pointer to class // MatchResultListener: // // MATCHER_P(EqualsLongString, str, "") { // if (arg == str) return true; // // *result_listener << "the difference: " /// << DiffStrings(str, arg); // return false; // } // // Overloading Matchers // ==================== // // You can overload matchers with different numbers of parameters: // // MATCHER_P(Blah, a, description_string1) { ... } // MATCHER_P2(Blah, a, b, description_string2) { ... } // // Caveats // ======= // // When defining a new matcher, you should also consider implementing // MatcherInterface or using MakePolymorphicMatcher(). These // approaches require more work than the MATCHER* macros, but also // give you more control on the types of the value being matched and // the matcher parameters, which may leads to better compiler error // messages when the matcher is used wrong. They also allow // overloading matchers based on parameter types (as opposed to just // based on the number of parameters). // // MATCHER*() can only be used in a namespace scope as templates cannot be // declared inside of a local class. // // More Information // ================ // // To learn more about using these macros, please search for 'MATCHER' // on // https://github.com/google/googletest/blob/master/googlemock/docs/cook_book.md #define MATCHER(name, description)\ class name##Matcher {\ public:\ template \ class gmock_Impl : public ::testing::MatcherInterface<\ GTEST_REFERENCE_TO_CONST_(arg_type)> {\ public:\ gmock_Impl()\ {}\ virtual bool MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener) const;\ virtual void DescribeTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(false);\ }\ virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(true);\ }\ private:\ ::std::string FormatDescription(bool negation) const {\ ::std::string gmock_description = (description);\ if (!gmock_description.empty()) {\ return gmock_description;\ }\ return ::testing::internal::FormatMatcherDescription(\ negation, #name, \ ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ ::std::tuple<>()));\ }\ };\ template \ operator ::testing::Matcher() const {\ return ::testing::Matcher(\ new gmock_Impl());\ }\ name##Matcher() {\ }\ private:\ };\ inline name##Matcher name() {\ return name##Matcher();\ }\ template \ bool name##Matcher::gmock_Impl::MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ const #define MATCHER_P(name, p0, description)\ template \ class name##MatcherP {\ public:\ template \ class gmock_Impl : public ::testing::MatcherInterface<\ GTEST_REFERENCE_TO_CONST_(arg_type)> {\ public:\ explicit gmock_Impl(p0##_type gmock_p0)\ : p0(::std::move(gmock_p0)) {}\ virtual bool MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener) const;\ virtual void DescribeTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(false);\ }\ virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(true);\ }\ p0##_type const p0;\ private:\ ::std::string FormatDescription(bool negation) const {\ ::std::string gmock_description = (description);\ if (!gmock_description.empty()) {\ return gmock_description;\ }\ return ::testing::internal::FormatMatcherDescription(\ negation, #name, \ ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ ::std::tuple(p0)));\ }\ };\ template \ operator ::testing::Matcher() const {\ return ::testing::Matcher(\ new gmock_Impl(p0));\ }\ explicit name##MatcherP(p0##_type gmock_p0) : p0(::std::move(gmock_p0)) {\ }\ p0##_type const p0;\ private:\ };\ template \ inline name##MatcherP name(p0##_type p0) {\ return name##MatcherP(p0);\ }\ template \ template \ bool name##MatcherP::gmock_Impl::MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ const #define MATCHER_P2(name, p0, p1, description)\ template \ class name##MatcherP2 {\ public:\ template \ class gmock_Impl : public ::testing::MatcherInterface<\ GTEST_REFERENCE_TO_CONST_(arg_type)> {\ public:\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1)\ : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)) {}\ virtual bool MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener) const;\ virtual void DescribeTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(false);\ }\ virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(true);\ }\ p0##_type const p0;\ p1##_type const p1;\ private:\ ::std::string FormatDescription(bool negation) const {\ ::std::string gmock_description = (description);\ if (!gmock_description.empty()) {\ return gmock_description;\ }\ return ::testing::internal::FormatMatcherDescription(\ negation, #name, \ ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ ::std::tuple(p0, p1)));\ }\ };\ template \ operator ::testing::Matcher() const {\ return ::testing::Matcher(\ new gmock_Impl(p0, p1));\ }\ name##MatcherP2(p0##_type gmock_p0, \ p1##_type gmock_p1) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)) {\ }\ p0##_type const p0;\ p1##_type const p1;\ private:\ };\ template \ inline name##MatcherP2 name(p0##_type p0, \ p1##_type p1) {\ return name##MatcherP2(p0, p1);\ }\ template \ template \ bool name##MatcherP2::gmock_Impl::MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ const #define MATCHER_P3(name, p0, p1, p2, description)\ template \ class name##MatcherP3 {\ public:\ template \ class gmock_Impl : public ::testing::MatcherInterface<\ GTEST_REFERENCE_TO_CONST_(arg_type)> {\ public:\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2)\ : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)), \ p2(::std::move(gmock_p2)) {}\ virtual bool MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener) const;\ virtual void DescribeTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(false);\ }\ virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(true);\ }\ p0##_type const p0;\ p1##_type const p1;\ p2##_type const p2;\ private:\ ::std::string FormatDescription(bool negation) const {\ ::std::string gmock_description = (description);\ if (!gmock_description.empty()) {\ return gmock_description;\ }\ return ::testing::internal::FormatMatcherDescription(\ negation, #name, \ ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ ::std::tuple(p0, p1, p2)));\ }\ };\ template \ operator ::testing::Matcher() const {\ return ::testing::Matcher(\ new gmock_Impl(p0, p1, p2));\ }\ name##MatcherP3(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)) {\ }\ p0##_type const p0;\ p1##_type const p1;\ p2##_type const p2;\ private:\ };\ template \ inline name##MatcherP3 name(p0##_type p0, \ p1##_type p1, p2##_type p2) {\ return name##MatcherP3(p0, p1, p2);\ }\ template \ template \ bool name##MatcherP3::gmock_Impl::MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ const #define MATCHER_P4(name, p0, p1, p2, p3, description)\ template \ class name##MatcherP4 {\ public:\ template \ class gmock_Impl : public ::testing::MatcherInterface<\ GTEST_REFERENCE_TO_CONST_(arg_type)> {\ public:\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3)\ : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)), \ p2(::std::move(gmock_p2)), p3(::std::move(gmock_p3)) {}\ virtual bool MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener) const;\ virtual void DescribeTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(false);\ }\ virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(true);\ }\ p0##_type const p0;\ p1##_type const p1;\ p2##_type const p2;\ p3##_type const p3;\ private:\ ::std::string FormatDescription(bool negation) const {\ ::std::string gmock_description = (description);\ if (!gmock_description.empty()) {\ return gmock_description;\ }\ return ::testing::internal::FormatMatcherDescription(\ negation, #name, \ ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ ::std::tuple(p0, \ p1, p2, p3)));\ }\ };\ template \ operator ::testing::Matcher() const {\ return ::testing::Matcher(\ new gmock_Impl(p0, p1, p2, p3));\ }\ name##MatcherP4(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2, p3##_type gmock_p3) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ p3(::std::move(gmock_p3)) {\ }\ p0##_type const p0;\ p1##_type const p1;\ p2##_type const p2;\ p3##_type const p3;\ private:\ };\ template \ inline name##MatcherP4 name(p0##_type p0, p1##_type p1, p2##_type p2, \ p3##_type p3) {\ return name##MatcherP4(p0, \ p1, p2, p3);\ }\ template \ template \ bool name##MatcherP4::gmock_Impl::MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ const #define MATCHER_P5(name, p0, p1, p2, p3, p4, description)\ template \ class name##MatcherP5 {\ public:\ template \ class gmock_Impl : public ::testing::MatcherInterface<\ GTEST_REFERENCE_TO_CONST_(arg_type)> {\ public:\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4)\ : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)), \ p2(::std::move(gmock_p2)), p3(::std::move(gmock_p3)), \ p4(::std::move(gmock_p4)) {}\ virtual bool MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener) const;\ virtual void DescribeTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(false);\ }\ virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(true);\ }\ p0##_type const p0;\ p1##_type const p1;\ p2##_type const p2;\ p3##_type const p3;\ p4##_type const p4;\ private:\ ::std::string FormatDescription(bool negation) const {\ ::std::string gmock_description = (description);\ if (!gmock_description.empty()) {\ return gmock_description;\ }\ return ::testing::internal::FormatMatcherDescription(\ negation, #name, \ ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ ::std::tuple(p0, p1, p2, p3, p4)));\ }\ };\ template \ operator ::testing::Matcher() const {\ return ::testing::Matcher(\ new gmock_Impl(p0, p1, p2, p3, p4));\ }\ name##MatcherP5(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2, p3##_type gmock_p3, \ p4##_type gmock_p4) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)) {\ }\ p0##_type const p0;\ p1##_type const p1;\ p2##_type const p2;\ p3##_type const p3;\ p4##_type const p4;\ private:\ };\ template \ inline name##MatcherP5 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ p4##_type p4) {\ return name##MatcherP5(p0, p1, p2, p3, p4);\ }\ template \ template \ bool name##MatcherP5::gmock_Impl::MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ const #define MATCHER_P6(name, p0, p1, p2, p3, p4, p5, description)\ template \ class name##MatcherP6 {\ public:\ template \ class gmock_Impl : public ::testing::MatcherInterface<\ GTEST_REFERENCE_TO_CONST_(arg_type)> {\ public:\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5)\ : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)), \ p2(::std::move(gmock_p2)), p3(::std::move(gmock_p3)), \ p4(::std::move(gmock_p4)), p5(::std::move(gmock_p5)) {}\ virtual bool MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener) const;\ virtual void DescribeTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(false);\ }\ virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(true);\ }\ p0##_type const p0;\ p1##_type const p1;\ p2##_type const p2;\ p3##_type const p3;\ p4##_type const p4;\ p5##_type const p5;\ private:\ ::std::string FormatDescription(bool negation) const {\ ::std::string gmock_description = (description);\ if (!gmock_description.empty()) {\ return gmock_description;\ }\ return ::testing::internal::FormatMatcherDescription(\ negation, #name, \ ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ ::std::tuple(p0, p1, p2, p3, p4, p5)));\ }\ };\ template \ operator ::testing::Matcher() const {\ return ::testing::Matcher(\ new gmock_Impl(p0, p1, p2, p3, p4, p5));\ }\ name##MatcherP6(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ p5##_type gmock_p5) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ p5(::std::move(gmock_p5)) {\ }\ p0##_type const p0;\ p1##_type const p1;\ p2##_type const p2;\ p3##_type const p3;\ p4##_type const p4;\ p5##_type const p5;\ private:\ };\ template \ inline name##MatcherP6 name(p0##_type p0, p1##_type p1, p2##_type p2, \ p3##_type p3, p4##_type p4, p5##_type p5) {\ return name##MatcherP6(p0, p1, p2, p3, p4, p5);\ }\ template \ template \ bool name##MatcherP6::gmock_Impl::MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ const #define MATCHER_P7(name, p0, p1, p2, p3, p4, p5, p6, description)\ template \ class name##MatcherP7 {\ public:\ template \ class gmock_Impl : public ::testing::MatcherInterface<\ GTEST_REFERENCE_TO_CONST_(arg_type)> {\ public:\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ p6##_type gmock_p6)\ : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)), \ p2(::std::move(gmock_p2)), p3(::std::move(gmock_p3)), \ p4(::std::move(gmock_p4)), p5(::std::move(gmock_p5)), \ p6(::std::move(gmock_p6)) {}\ virtual bool MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener) const;\ virtual void DescribeTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(false);\ }\ virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(true);\ }\ p0##_type const p0;\ p1##_type const p1;\ p2##_type const p2;\ p3##_type const p3;\ p4##_type const p4;\ p5##_type const p5;\ p6##_type const p6;\ private:\ ::std::string FormatDescription(bool negation) const {\ ::std::string gmock_description = (description);\ if (!gmock_description.empty()) {\ return gmock_description;\ }\ return ::testing::internal::FormatMatcherDescription(\ negation, #name, \ ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ ::std::tuple(p0, p1, p2, p3, p4, p5, \ p6)));\ }\ };\ template \ operator ::testing::Matcher() const {\ return ::testing::Matcher(\ new gmock_Impl(p0, p1, p2, p3, p4, p5, p6));\ }\ name##MatcherP7(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ p5##_type gmock_p5, p6##_type gmock_p6) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)) {\ }\ p0##_type const p0;\ p1##_type const p1;\ p2##_type const p2;\ p3##_type const p3;\ p4##_type const p4;\ p5##_type const p5;\ p6##_type const p6;\ private:\ };\ template \ inline name##MatcherP7 name(p0##_type p0, p1##_type p1, \ p2##_type p2, p3##_type p3, p4##_type p4, p5##_type p5, \ p6##_type p6) {\ return name##MatcherP7(p0, p1, p2, p3, p4, p5, p6);\ }\ template \ template \ bool name##MatcherP7::gmock_Impl::MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ const #define MATCHER_P8(name, p0, p1, p2, p3, p4, p5, p6, p7, description)\ template \ class name##MatcherP8 {\ public:\ template \ class gmock_Impl : public ::testing::MatcherInterface<\ GTEST_REFERENCE_TO_CONST_(arg_type)> {\ public:\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ p6##_type gmock_p6, p7##_type gmock_p7)\ : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)), \ p2(::std::move(gmock_p2)), p3(::std::move(gmock_p3)), \ p4(::std::move(gmock_p4)), p5(::std::move(gmock_p5)), \ p6(::std::move(gmock_p6)), p7(::std::move(gmock_p7)) {}\ virtual bool MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener) const;\ virtual void DescribeTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(false);\ }\ virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(true);\ }\ p0##_type const p0;\ p1##_type const p1;\ p2##_type const p2;\ p3##_type const p3;\ p4##_type const p4;\ p5##_type const p5;\ p6##_type const p6;\ p7##_type const p7;\ private:\ ::std::string FormatDescription(bool negation) const {\ ::std::string gmock_description = (description);\ if (!gmock_description.empty()) {\ return gmock_description;\ }\ return ::testing::internal::FormatMatcherDescription(\ negation, #name, \ ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ ::std::tuple(p0, p1, p2, \ p3, p4, p5, p6, p7)));\ }\ };\ template \ operator ::testing::Matcher() const {\ return ::testing::Matcher(\ new gmock_Impl(p0, p1, p2, p3, p4, p5, p6, p7));\ }\ name##MatcherP8(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ p5##_type gmock_p5, p6##_type gmock_p6, \ p7##_type gmock_p7) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)), \ p7(::std::move(gmock_p7)) {\ }\ p0##_type const p0;\ p1##_type const p1;\ p2##_type const p2;\ p3##_type const p3;\ p4##_type const p4;\ p5##_type const p5;\ p6##_type const p6;\ p7##_type const p7;\ private:\ };\ template \ inline name##MatcherP8 name(p0##_type p0, \ p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, p5##_type p5, \ p6##_type p6, p7##_type p7) {\ return name##MatcherP8(p0, p1, p2, p3, p4, p5, \ p6, p7);\ }\ template \ template \ bool name##MatcherP8::gmock_Impl::MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ const #define MATCHER_P9(name, p0, p1, p2, p3, p4, p5, p6, p7, p8, description)\ template \ class name##MatcherP9 {\ public:\ template \ class gmock_Impl : public ::testing::MatcherInterface<\ GTEST_REFERENCE_TO_CONST_(arg_type)> {\ public:\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8)\ : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)), \ p2(::std::move(gmock_p2)), p3(::std::move(gmock_p3)), \ p4(::std::move(gmock_p4)), p5(::std::move(gmock_p5)), \ p6(::std::move(gmock_p6)), p7(::std::move(gmock_p7)), \ p8(::std::move(gmock_p8)) {}\ virtual bool MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener) const;\ virtual void DescribeTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(false);\ }\ virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(true);\ }\ p0##_type const p0;\ p1##_type const p1;\ p2##_type const p2;\ p3##_type const p3;\ p4##_type const p4;\ p5##_type const p5;\ p6##_type const p6;\ p7##_type const p7;\ p8##_type const p8;\ private:\ ::std::string FormatDescription(bool negation) const {\ ::std::string gmock_description = (description);\ if (!gmock_description.empty()) {\ return gmock_description;\ }\ return ::testing::internal::FormatMatcherDescription(\ negation, #name, \ ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ ::std::tuple(p0, p1, p2, p3, p4, p5, p6, p7, p8)));\ }\ };\ template \ operator ::testing::Matcher() const {\ return ::testing::Matcher(\ new gmock_Impl(p0, p1, p2, p3, p4, p5, p6, p7, p8));\ }\ name##MatcherP9(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ p5##_type gmock_p5, p6##_type gmock_p6, p7##_type gmock_p7, \ p8##_type gmock_p8) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)), \ p7(::std::move(gmock_p7)), p8(::std::move(gmock_p8)) {\ }\ p0##_type const p0;\ p1##_type const p1;\ p2##_type const p2;\ p3##_type const p3;\ p4##_type const p4;\ p5##_type const p5;\ p6##_type const p6;\ p7##_type const p7;\ p8##_type const p8;\ private:\ };\ template \ inline name##MatcherP9 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, \ p8##_type p8) {\ return name##MatcherP9(p0, p1, p2, \ p3, p4, p5, p6, p7, p8);\ }\ template \ template \ bool name##MatcherP9::gmock_Impl::MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ const #define MATCHER_P10(name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, description)\ template \ class name##MatcherP10 {\ public:\ template \ class gmock_Impl : public ::testing::MatcherInterface<\ GTEST_REFERENCE_TO_CONST_(arg_type)> {\ public:\ gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8, \ p9##_type gmock_p9)\ : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)), \ p2(::std::move(gmock_p2)), p3(::std::move(gmock_p3)), \ p4(::std::move(gmock_p4)), p5(::std::move(gmock_p5)), \ p6(::std::move(gmock_p6)), p7(::std::move(gmock_p7)), \ p8(::std::move(gmock_p8)), p9(::std::move(gmock_p9)) {}\ virtual bool MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener) const;\ virtual void DescribeTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(false);\ }\ virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(true);\ }\ p0##_type const p0;\ p1##_type const p1;\ p2##_type const p2;\ p3##_type const p3;\ p4##_type const p4;\ p5##_type const p5;\ p6##_type const p6;\ p7##_type const p7;\ p8##_type const p8;\ p9##_type const p9;\ private:\ ::std::string FormatDescription(bool negation) const {\ ::std::string gmock_description = (description);\ if (!gmock_description.empty()) {\ return gmock_description;\ }\ return ::testing::internal::FormatMatcherDescription(\ negation, #name, \ ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ ::std::tuple(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9)));\ }\ };\ template \ operator ::testing::Matcher() const {\ return ::testing::Matcher(\ new gmock_Impl(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9));\ }\ name##MatcherP10(p0##_type gmock_p0, p1##_type gmock_p1, \ p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ p5##_type gmock_p5, p6##_type gmock_p6, p7##_type gmock_p7, \ p8##_type gmock_p8, p9##_type gmock_p9) : p0(::std::move(gmock_p0)), \ p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)), \ p7(::std::move(gmock_p7)), p8(::std::move(gmock_p8)), \ p9(::std::move(gmock_p9)) {\ }\ p0##_type const p0;\ p1##_type const p1;\ p2##_type const p2;\ p3##_type const p3;\ p4##_type const p4;\ p5##_type const p5;\ p6##_type const p6;\ p7##_type const p7;\ p8##_type const p8;\ p9##_type const p9;\ private:\ };\ template \ inline name##MatcherP10 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8, \ p9##_type p9) {\ return name##MatcherP10(p0, \ p1, p2, p3, p4, p5, p6, p7, p8, p9);\ }\ template \ template \ bool name##MatcherP10::gmock_Impl::MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ const #endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/include/gmock/gmock-generated-matchers.h.pump000066400000000000000000000310471456444476200330730ustar00rootroot00000000000000$$ -*- mode: c++; -*- $$ This is a Pump source file. Please use Pump to convert $$ it to gmock-generated-matchers.h. $$ $var n = 10 $$ The maximum arity we support. $$ }} This line fixes auto-indentation of the following code in Emacs. // Copyright 2008, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Google Mock - a framework for writing C++ mock classes. // // This file implements some commonly used variadic matchers. // GOOGLETEST_CM0002 DO NOT DELETE #ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ #define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ #include #include #include #include #include #include "gmock/gmock-matchers.h" // The MATCHER* family of macros can be used in a namespace scope to // define custom matchers easily. // // Basic Usage // =========== // // The syntax // // MATCHER(name, description_string) { statements; } // // defines a matcher with the given name that executes the statements, // which must return a bool to indicate if the match succeeds. Inside // the statements, you can refer to the value being matched by 'arg', // and refer to its type by 'arg_type'. // // The description string documents what the matcher does, and is used // to generate the failure message when the match fails. Since a // MATCHER() is usually defined in a header file shared by multiple // C++ source files, we require the description to be a C-string // literal to avoid possible side effects. It can be empty, in which // case we'll use the sequence of words in the matcher name as the // description. // // For example: // // MATCHER(IsEven, "") { return (arg % 2) == 0; } // // allows you to write // // // Expects mock_foo.Bar(n) to be called where n is even. // EXPECT_CALL(mock_foo, Bar(IsEven())); // // or, // // // Verifies that the value of some_expression is even. // EXPECT_THAT(some_expression, IsEven()); // // If the above assertion fails, it will print something like: // // Value of: some_expression // Expected: is even // Actual: 7 // // where the description "is even" is automatically calculated from the // matcher name IsEven. // // Argument Type // ============= // // Note that the type of the value being matched (arg_type) is // determined by the context in which you use the matcher and is // supplied to you by the compiler, so you don't need to worry about // declaring it (nor can you). This allows the matcher to be // polymorphic. For example, IsEven() can be used to match any type // where the value of "(arg % 2) == 0" can be implicitly converted to // a bool. In the "Bar(IsEven())" example above, if method Bar() // takes an int, 'arg_type' will be int; if it takes an unsigned long, // 'arg_type' will be unsigned long; and so on. // // Parameterizing Matchers // ======================= // // Sometimes you'll want to parameterize the matcher. For that you // can use another macro: // // MATCHER_P(name, param_name, description_string) { statements; } // // For example: // // MATCHER_P(HasAbsoluteValue, value, "") { return abs(arg) == value; } // // will allow you to write: // // EXPECT_THAT(Blah("a"), HasAbsoluteValue(n)); // // which may lead to this message (assuming n is 10): // // Value of: Blah("a") // Expected: has absolute value 10 // Actual: -9 // // Note that both the matcher description and its parameter are // printed, making the message human-friendly. // // In the matcher definition body, you can write 'foo_type' to // reference the type of a parameter named 'foo'. For example, in the // body of MATCHER_P(HasAbsoluteValue, value) above, you can write // 'value_type' to refer to the type of 'value'. // // We also provide MATCHER_P2, MATCHER_P3, ..., up to MATCHER_P$n to // support multi-parameter matchers. // // Describing Parameterized Matchers // ================================= // // The last argument to MATCHER*() is a string-typed expression. The // expression can reference all of the matcher's parameters and a // special bool-typed variable named 'negation'. When 'negation' is // false, the expression should evaluate to the matcher's description; // otherwise it should evaluate to the description of the negation of // the matcher. For example, // // using testing::PrintToString; // // MATCHER_P2(InClosedRange, low, hi, // std::string(negation ? "is not" : "is") + " in range [" + // PrintToString(low) + ", " + PrintToString(hi) + "]") { // return low <= arg && arg <= hi; // } // ... // EXPECT_THAT(3, InClosedRange(4, 6)); // EXPECT_THAT(3, Not(InClosedRange(2, 4))); // // would generate two failures that contain the text: // // Expected: is in range [4, 6] // ... // Expected: is not in range [2, 4] // // If you specify "" as the description, the failure message will // contain the sequence of words in the matcher name followed by the // parameter values printed as a tuple. For example, // // MATCHER_P2(InClosedRange, low, hi, "") { ... } // ... // EXPECT_THAT(3, InClosedRange(4, 6)); // EXPECT_THAT(3, Not(InClosedRange(2, 4))); // // would generate two failures that contain the text: // // Expected: in closed range (4, 6) // ... // Expected: not (in closed range (2, 4)) // // Types of Matcher Parameters // =========================== // // For the purpose of typing, you can view // // MATCHER_Pk(Foo, p1, ..., pk, description_string) { ... } // // as shorthand for // // template // FooMatcherPk // Foo(p1_type p1, ..., pk_type pk) { ... } // // When you write Foo(v1, ..., vk), the compiler infers the types of // the parameters v1, ..., and vk for you. If you are not happy with // the result of the type inference, you can specify the types by // explicitly instantiating the template, as in Foo(5, // false). As said earlier, you don't get to (or need to) specify // 'arg_type' as that's determined by the context in which the matcher // is used. You can assign the result of expression Foo(p1, ..., pk) // to a variable of type FooMatcherPk. This // can be useful when composing matchers. // // While you can instantiate a matcher template with reference types, // passing the parameters by pointer usually makes your code more // readable. If, however, you still want to pass a parameter by // reference, be aware that in the failure message generated by the // matcher you will see the value of the referenced object but not its // address. // // Explaining Match Results // ======================== // // Sometimes the matcher description alone isn't enough to explain why // the match has failed or succeeded. For example, when expecting a // long string, it can be very helpful to also print the diff between // the expected string and the actual one. To achieve that, you can // optionally stream additional information to a special variable // named result_listener, whose type is a pointer to class // MatchResultListener: // // MATCHER_P(EqualsLongString, str, "") { // if (arg == str) return true; // // *result_listener << "the difference: " /// << DiffStrings(str, arg); // return false; // } // // Overloading Matchers // ==================== // // You can overload matchers with different numbers of parameters: // // MATCHER_P(Blah, a, description_string1) { ... } // MATCHER_P2(Blah, a, b, description_string2) { ... } // // Caveats // ======= // // When defining a new matcher, you should also consider implementing // MatcherInterface or using MakePolymorphicMatcher(). These // approaches require more work than the MATCHER* macros, but also // give you more control on the types of the value being matched and // the matcher parameters, which may leads to better compiler error // messages when the matcher is used wrong. They also allow // overloading matchers based on parameter types (as opposed to just // based on the number of parameters). // // MATCHER*() can only be used in a namespace scope as templates cannot be // declared inside of a local class. // // More Information // ================ // // To learn more about using these macros, please search for 'MATCHER' // on // https://github.com/google/googletest/blob/master/googlemock/docs/cook_book.md $range i 0..n $for i [[ $var macro_name = [[$if i==0 [[MATCHER]] $elif i==1 [[MATCHER_P]] $else [[MATCHER_P$i]]]] $var class_name = [[name##Matcher[[$if i==0 [[]] $elif i==1 [[P]] $else [[P$i]]]]]] $range j 0..i-1 $var template = [[$if i==0 [[]] $else [[ template <$for j, [[typename p$j##_type]]>\ ]]]] $var ctor_param_list = [[$for j, [[p$j##_type gmock_p$j]]]] $var impl_ctor_param_list = [[$for j, [[p$j##_type gmock_p$j]]]] $var impl_inits = [[$if i==0 [[]] $else [[ : $for j, [[p$j(::std::move(gmock_p$j))]]]]]] $var inits = [[$if i==0 [[]] $else [[ : $for j, [[p$j(::std::move(gmock_p$j))]]]]]] $var params = [[$for j, [[p$j]]]] $var param_types = [[$if i==0 [[]] $else [[<$for j, [[p$j##_type]]>]]]] $var param_types_and_names = [[$for j, [[p$j##_type p$j]]]] $var param_field_decls = [[$for j [[ p$j##_type const p$j;\ ]]]] $var param_field_decls2 = [[$for j [[ p$j##_type const p$j;\ ]]]] #define $macro_name(name$for j [[, p$j]], description)\$template class $class_name {\ public:\ template \ class gmock_Impl : public ::testing::MatcherInterface<\ GTEST_REFERENCE_TO_CONST_(arg_type)> {\ public:\ [[$if i==1 [[explicit ]]]]gmock_Impl($impl_ctor_param_list)\ $impl_inits {}\ virtual bool MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener) const;\ virtual void DescribeTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(false);\ }\ virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ *gmock_os << FormatDescription(true);\ }\$param_field_decls private:\ ::std::string FormatDescription(bool negation) const {\ ::std::string gmock_description = (description);\ if (!gmock_description.empty()) {\ return gmock_description;\ }\ return ::testing::internal::FormatMatcherDescription(\ negation, #name, \ ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ ::std::tuple<$for j, [[p$j##_type]]>($for j, [[p$j]])));\ }\ };\ template \ operator ::testing::Matcher() const {\ return ::testing::Matcher(\ new gmock_Impl($params));\ }\ [[$if i==1 [[explicit ]]]]$class_name($ctor_param_list)$inits {\ }\$param_field_decls2 private:\ };\$template inline $class_name$param_types name($param_types_and_names) {\ return $class_name$param_types($params);\ }\$template template \ bool $class_name$param_types::gmock_Impl::MatchAndExplain(\ GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ const ]] #endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ LucenePlusPlus-rel_3.0.9/src/test/gtest/googlemock/include/gmock/gmock-matchers.h000066400000000000000000005042351456444476200301630ustar00rootroot00000000000000// Copyright 2007, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Google Mock - a framework for writing C++ mock classes. // // This file implements some commonly used argument matchers. More // matchers can be defined by the user implementing the // MatcherInterface interface if necessary. // // See googletest/include/gtest/gtest-matchers.h for the definition of class // Matcher, class MatcherInterface, and others. // GOOGLETEST_CM0002 DO NOT DELETE #ifndef GMOCK_INCLUDE_GMOCK_GMOCK_MATCHERS_H_ #define GMOCK_INCLUDE_GMOCK_GMOCK_MATCHERS_H_ #include #include #include #include #include #include #include // NOLINT #include #include #include #include #include #include "gmock/internal/gmock-internal-utils.h" #include "gmock/internal/gmock-port.h" #include "gtest/gtest.h" // MSVC warning C5046 is new as of VS2017 version 15.8. #if defined(_MSC_VER) && _MSC_VER >= 1915 #define GMOCK_MAYBE_5046_ 5046 #else #define GMOCK_MAYBE_5046_ #endif GTEST_DISABLE_MSC_WARNINGS_PUSH_( 4251 GMOCK_MAYBE_5046_ /* class A needs to have dll-interface to be used by clients of class B */ /* Symbol involving type with internal linkage not defined */) namespace testing { // To implement a matcher Foo for type T, define: // 1. a class FooMatcherImpl that implements the // MatcherInterface interface, and // 2. a factory function that creates a Matcher object from a // FooMatcherImpl*. // // The two-level delegation design makes it possible to allow a user // to write "v" instead of "Eq(v)" where a Matcher is expected, which // is impossible if we pass matchers by pointers. It also eases // ownership management as Matcher objects can now be copied like // plain values. // A match result listener that stores the explanation in a string. class StringMatchResultListener : public MatchResultListener { public: StringMatchResultListener() : MatchResultListener(&ss_) {} // Returns the explanation accumulated so far. std::string str() const { return ss_.str(); } // Clears the explanation accumulated so far. void Clear() { ss_.str(""); } private: ::std::stringstream ss_; GTEST_DISALLOW_COPY_AND_ASSIGN_(StringMatchResultListener); }; // Anything inside the 'internal' namespace IS INTERNAL IMPLEMENTATION // and MUST NOT BE USED IN USER CODE!!! namespace internal { // The MatcherCastImpl class template is a helper for implementing // MatcherCast(). We need this helper in order to partially // specialize the implementation of MatcherCast() (C++ allows // class/struct templates to be partially specialized, but not // function templates.). // This general version is used when MatcherCast()'s argument is a // polymorphic matcher (i.e. something that can be converted to a // Matcher but is not one yet; for example, Eq(value)) or a value (for // example, "hello"). template class MatcherCastImpl { public: static Matcher Cast(const M& polymorphic_matcher_or_value) { // M can be a polymorphic matcher, in which case we want to use // its conversion operator to create Matcher. Or it can be a value // that should be passed to the Matcher's constructor. // // We can't call Matcher(polymorphic_matcher_or_value) when M is a // polymorphic matcher because it'll be ambiguous if T has an implicit // constructor from M (this usually happens when T has an implicit // constructor from any type). // // It won't work to unconditionally implict_cast // polymorphic_matcher_or_value to Matcher because it won't trigger // a user-defined conversion from M to T if one exists (assuming M is // a value). return CastImpl(polymorphic_matcher_or_value, std::is_convertible>{}, std::is_convertible{}); } private: template static Matcher CastImpl(const M& polymorphic_matcher_or_value, std::true_type /* convertible_to_matcher */, bool_constant) { // M is implicitly convertible to Matcher, which means that either // M is a polymorphic matcher or Matcher has an implicit constructor // from M. In both cases using the implicit conversion will produce a // matcher. // // Even if T has an implicit constructor from M, it won't be called because // creating Matcher would require a chain of two user-defined conversions // (first to create T from M and then to create Matcher from T). return polymorphic_matcher_or_value; } // M can't be implicitly converted to Matcher, so M isn't a polymorphic // matcher. It's a value of a type implicitly convertible to T. Use direct // initialization to create a matcher. static Matcher CastImpl(const M& value, std::false_type /* convertible_to_matcher */, std::true_type /* convertible_to_T */) { return Matcher(ImplicitCast_(value)); } // M can't be implicitly converted to either Matcher or T. Attempt to use // polymorphic matcher Eq(value) in this case. // // Note that we first attempt to perform an implicit cast on the value and // only fall back to the polymorphic Eq() matcher afterwards because the // latter calls bool operator==(const Lhs& lhs, const Rhs& rhs) in the end // which might be undefined even when Rhs is implicitly convertible to Lhs // (e.g. std::pair vs. std::pair). // // We don't define this method inline as we need the declaration of Eq(). static Matcher CastImpl(const M& value, std::false_type /* convertible_to_matcher */, std::false_type /* convertible_to_T */); }; // This more specialized version is used when MatcherCast()'s argument // is already a Matcher. This only compiles when type T can be // statically converted to type U. template class MatcherCastImpl > { public: static Matcher Cast(const Matcher& source_matcher) { return Matcher(new Impl(source_matcher)); } private: class Impl : public MatcherInterface { public: explicit Impl(const Matcher& source_matcher) : source_matcher_(source_matcher) {} // We delegate the matching logic to the source matcher. bool MatchAndExplain(T x, MatchResultListener* listener) const override { using FromType = typename std::remove_cv::type>::type>::type; using ToType = typename std::remove_cv::type>::type>::type; // Do not allow implicitly converting base*/& to derived*/&. static_assert( // Do not trigger if only one of them is a pointer. That implies a // regular conversion and not a down_cast. (std::is_pointer::type>::value != std::is_pointer::type>::value) || std::is_same::value || !std::is_base_of::value, "Can't implicitly convert from to "); return source_matcher_.MatchAndExplain(static_cast(x), listener); } void DescribeTo(::std::ostream* os) const override { source_matcher_.DescribeTo(os); } void DescribeNegationTo(::std::ostream* os) const override { source_matcher_.DescribeNegationTo(os); } private: const Matcher source_matcher_; GTEST_DISALLOW_ASSIGN_(Impl); }; }; // This even more specialized version is used for efficiently casting // a matcher to its own type. template class MatcherCastImpl > { public: static Matcher Cast(const Matcher& matcher) { return matcher; } }; } // namespace internal // In order to be safe and clear, casting between different matcher // types is done explicitly via MatcherCast(m), which takes a // matcher m and returns a Matcher. It compiles only when T can be // statically converted to the argument type of m. template inline Matcher MatcherCast(const M& matcher) { return internal::MatcherCastImpl::Cast(matcher); } // Implements SafeMatcherCast(). // // FIXME: The intermediate SafeMatcherCastImpl class was introduced as a // workaround for a compiler bug, and can now be removed. template class SafeMatcherCastImpl { public: // This overload handles polymorphic matchers and values only since // monomorphic matchers are handled by the next one. template static inline Matcher Cast(const M& polymorphic_matcher_or_value) { return internal::MatcherCastImpl::Cast(polymorphic_matcher_or_value); } // This overload handles monomorphic matchers. // // In general, if type T can be implicitly converted to type U, we can // safely convert a Matcher to a Matcher (i.e. Matcher is // contravariant): just keep a copy of the original Matcher, convert the // argument from type T to U, and then pass it to the underlying Matcher. // The only exception is when U is a reference and T is not, as the // underlying Matcher may be interested in the argument's address, which // is not preserved in the conversion from T to U. template static inline Matcher Cast(const Matcher& matcher) { // Enforce that T can be implicitly converted to U. GTEST_COMPILE_ASSERT_((std::is_convertible::value), "T must be implicitly convertible to U"); // Enforce that we are not converting a non-reference type T to a reference // type U. GTEST_COMPILE_ASSERT_( std::is_reference::value || !std::is_reference::value, cannot_convert_non_reference_arg_to_reference); // In case both T and U are arithmetic types, enforce that the // conversion is not lossy. typedef GTEST_REMOVE_REFERENCE_AND_CONST_(T) RawT; typedef GTEST_REMOVE_REFERENCE_AND_CONST_(U) RawU; const bool kTIsOther = GMOCK_KIND_OF_(RawT) == internal::kOther; const bool kUIsOther = GMOCK_KIND_OF_(RawU) == internal::kOther; GTEST_COMPILE_ASSERT_( kTIsOther || kUIsOther || (internal::LosslessArithmeticConvertible::value), conversion_of_arithmetic_types_must_be_lossless); return MatcherCast(matcher); } }; template inline Matcher SafeMatcherCast(const M& polymorphic_matcher) { return SafeMatcherCastImpl::Cast(polymorphic_matcher); } // A() returns a matcher that matches any value of type T. template Matcher A(); // Anything inside the 'internal' namespace IS INTERNAL IMPLEMENTATION // and MUST NOT BE USED IN USER CODE!!! namespace internal { // If the explanation is not empty, prints it to the ostream. inline void PrintIfNotEmpty(const std::string& explanation, ::std::ostream* os) { if (explanation != "" && os != nullptr) { *os << ", " << explanation; } } // Returns true if the given type name is easy to read by a human. // This is used to decide whether printing the type of a value might // be helpful. inline bool IsReadableTypeName(const std::string& type_name) { // We consider a type name readable if it's short or doesn't contain // a template or function type. return (type_name.length() <= 20 || type_name.find_first_of("<(") == std::string::npos); } // Matches the value against the given matcher, prints the value and explains // the match result to the listener. Returns the match result. // 'listener' must not be NULL. // Value cannot be passed by const reference, because some matchers take a // non-const argument. template bool MatchPrintAndExplain(Value& value, const Matcher& matcher, MatchResultListener* listener) { if (!listener->IsInterested()) { // If the listener is not interested, we do not need to construct the // inner explanation. return matcher.Matches(value); } StringMatchResultListener inner_listener; const bool match = matcher.MatchAndExplain(value, &inner_listener); UniversalPrint(value, listener->stream()); #if GTEST_HAS_RTTI const std::string& type_name = GetTypeName(); if (IsReadableTypeName(type_name)) *listener->stream() << " (of type " << type_name << ")"; #endif PrintIfNotEmpty(inner_listener.str(), listener->stream()); return match; } // An internal helper class for doing compile-time loop on a tuple's // fields. template class TuplePrefix { public: // TuplePrefix::Matches(matcher_tuple, value_tuple) returns true // if and only if the first N fields of matcher_tuple matches // the first N fields of value_tuple, respectively. template static bool Matches(const MatcherTuple& matcher_tuple, const ValueTuple& value_tuple) { return TuplePrefix::Matches(matcher_tuple, value_tuple) && std::get(matcher_tuple).Matches(std::get(value_tuple)); } // TuplePrefix::ExplainMatchFailuresTo(matchers, values, os) // describes failures in matching the first N fields of matchers // against the first N fields of values. If there is no failure, // nothing will be streamed to os. template static void ExplainMatchFailuresTo(const MatcherTuple& matchers, const ValueTuple& values, ::std::ostream* os) { // First, describes failures in the first N - 1 fields. TuplePrefix::ExplainMatchFailuresTo(matchers, values, os); // Then describes the failure (if any) in the (N - 1)-th (0-based) // field. typename std::tuple_element::type matcher = std::get(matchers); typedef typename std::tuple_element::type Value; const Value& value = std::get(values); StringMatchResultListener listener; if (!matcher.MatchAndExplain(value, &listener)) { *os << " Expected arg #" << N - 1 << ": "; std::get(matchers).DescribeTo(os); *os << "\n Actual: "; // We remove the reference in type Value to prevent the // universal printer from printing the address of value, which // isn't interesting to the user most of the time. The // matcher's MatchAndExplain() method handles the case when // the address is interesting. internal::UniversalPrint(value, os); PrintIfNotEmpty(listener.str(), os); *os << "\n"; } } }; // The base case. template <> class TuplePrefix<0> { public: template static bool Matches(const MatcherTuple& /* matcher_tuple */, const ValueTuple& /* value_tuple */) { return true; } template static void ExplainMatchFailuresTo(const MatcherTuple& /* matchers */, const ValueTuple& /* values */, ::std::ostream* /* os */) {} }; // TupleMatches(matcher_tuple, value_tuple) returns true if and only if // all matchers in matcher_tuple match the corresponding fields in // value_tuple. It is a compiler error if matcher_tuple and // value_tuple have different number of fields or incompatible field // types. template bool TupleMatches(const MatcherTuple& matcher_tuple, const ValueTuple& value_tuple) { // Makes sure that matcher_tuple and value_tuple have the same // number of fields. GTEST_COMPILE_ASSERT_(std::tuple_size::value == std::tuple_size::value, matcher_and_value_have_different_numbers_of_fields); return TuplePrefix::value>::Matches(matcher_tuple, value_tuple); } // Describes failures in matching matchers against values. If there // is no failure, nothing will be streamed to os. template void ExplainMatchFailureTupleTo(const MatcherTuple& matchers, const ValueTuple& values, ::std::ostream* os) { TuplePrefix::value>::ExplainMatchFailuresTo( matchers, values, os); } // TransformTupleValues and its helper. // // TransformTupleValuesHelper hides the internal machinery that // TransformTupleValues uses to implement a tuple traversal. template class TransformTupleValuesHelper { private: typedef ::std::tuple_size TupleSize; public: // For each member of tuple 't', taken in order, evaluates '*out++ = f(t)'. // Returns the final value of 'out' in case the caller needs it. static OutIter Run(Func f, const Tuple& t, OutIter out) { return IterateOverTuple()(f, t, out); } private: template struct IterateOverTuple { OutIter operator() (Func f, const Tup& t, OutIter out) const { *out++ = f(::std::get(t)); return IterateOverTuple()(f, t, out); } }; template struct IterateOverTuple { OutIter operator() (Func /* f */, const Tup& /* t */, OutIter out) const { return out; } }; }; // Successively invokes 'f(element)' on each element of the tuple 't', // appending each result to the 'out' iterator. Returns the final value // of 'out'. template OutIter TransformTupleValues(Func f, const Tuple& t, OutIter out) { return TransformTupleValuesHelper::Run(f, t, out); } // Implements A(). template class AnyMatcherImpl : public MatcherInterface { public: bool MatchAndExplain(const T& /* x */, MatchResultListener* /* listener */) const override { return true; } void DescribeTo(::std::ostream* os) const override { *os << "is anything"; } void DescribeNegationTo(::std::ostream* os) const override { // This is mostly for completeness' safe, as it's not very useful // to write Not(A()). However we cannot completely rule out // such a possibility, and it doesn't hurt to be prepared. *os << "never matches"; } }; // Implements _, a matcher that matches any value of any // type. This is a polymorphic matcher, so we need a template type // conversion operator to make it appearing as a Matcher for any // type T. class AnythingMatcher { public: template operator Matcher() const { return A(); } }; // Implements the polymorphic IsNull() matcher, which matches any raw or smart // pointer that is NULL. class IsNullMatcher { public: template bool MatchAndExplain(const Pointer& p, MatchResultListener* /* listener */) const { return p == nullptr; } void DescribeTo(::std::ostream* os) const { *os << "is NULL"; } void DescribeNegationTo(::std::ostream* os) const { *os << "isn't NULL"; } }; // Implements the polymorphic NotNull() matcher, which matches any raw or smart // pointer that is not NULL. class NotNullMatcher { public: template bool MatchAndExplain(const Pointer& p, MatchResultListener* /* listener */) const { return p != nullptr; } void DescribeTo(::std::ostream* os) const { *os << "isn't NULL"; } void DescribeNegationTo(::std::ostream* os) const { *os << "is NULL"; } }; // Ref(variable) matches any argument that is a reference to // 'variable'. This matcher is polymorphic as it can match any // super type of the type of 'variable'. // // The RefMatcher template class implements Ref(variable). It can // only be instantiated with a reference type. This prevents a user // from mistakenly using Ref(x) to match a non-reference function // argument. For example, the following will righteously cause a // compiler error: // // int n; // Matcher m1 = Ref(n); // This won't compile. // Matcher m2 = Ref(n); // This will compile. template class RefMatcher; template class RefMatcher { // Google Mock is a generic framework and thus needs to support // mocking any function types, including those that take non-const // reference arguments. Therefore the template parameter T (and // Super below) can be instantiated to either a const type or a // non-const type. public: // RefMatcher() takes a T& instead of const T&, as we want the // compiler to catch using Ref(const_value) as a matcher for a // non-const reference. explicit RefMatcher(T& x) : object_(x) {} // NOLINT template operator Matcher() const { // By passing object_ (type T&) to Impl(), which expects a Super&, // we make sure that Super is a super type of T. In particular, // this catches using Ref(const_value) as a matcher for a // non-const reference, as you cannot implicitly convert a const // reference to a non-const reference. return MakeMatcher(new Impl(object_)); } private: template class Impl : public MatcherInterface { public: explicit Impl(Super& x) : object_(x) {} // NOLINT // MatchAndExplain() takes a Super& (as opposed to const Super&) // in order to match the interface MatcherInterface. bool MatchAndExplain(Super& x, MatchResultListener* listener) const override { *listener << "which is located @" << static_cast(&x); return &x == &object_; } void DescribeTo(::std::ostream* os) const override { *os << "references the variable "; UniversalPrinter::Print(object_, os); } void DescribeNegationTo(::std::ostream* os) const override { *os << "does not reference the variable "; UniversalPrinter::Print(object_, os); } private: const Super& object_; GTEST_DISALLOW_ASSIGN_(Impl); }; T& object_; GTEST_DISALLOW_ASSIGN_(RefMatcher); }; // Polymorphic helper functions for narrow and wide string matchers. inline bool CaseInsensitiveCStringEquals(const char* lhs, const char* rhs) { return String::CaseInsensitiveCStringEquals(lhs, rhs); } inline bool CaseInsensitiveCStringEquals(const wchar_t* lhs, const wchar_t* rhs) { return String::CaseInsensitiveWideCStringEquals(lhs, rhs); } // String comparison for narrow or wide strings that can have embedded NUL // characters. template bool CaseInsensitiveStringEquals(const StringType& s1, const StringType& s2) { // Are the heads equal? if (!CaseInsensitiveCStringEquals(s1.c_str(), s2.c_str())) { return false; } // Skip the equal heads. const typename StringType::value_type nul = 0; const size_t i1 = s1.find(nul), i2 = s2.find(nul); // Are we at the end of either s1 or s2? if (i1 == StringType::npos || i2 == StringType::npos) { return i1 == i2; } // Are the tails equal? return CaseInsensitiveStringEquals(s1.substr(i1 + 1), s2.substr(i2 + 1)); } // String matchers. // Implements equality-based string matchers like StrEq, StrCaseNe, and etc. template class StrEqualityMatcher { public: StrEqualityMatcher(const StringType& str, bool expect_eq, bool case_sensitive) : string_(str), expect_eq_(expect_eq), case_sensitive_(case_sensitive) {} #if GTEST_HAS_ABSL bool MatchAndExplain(const absl::string_view& s, MatchResultListener* listener) const { // This should fail to compile if absl::string_view is used with wide // strings. const StringType& str = std::string(s); return MatchAndExplain(str, listener); } #endif // GTEST_HAS_ABSL // Accepts pointer types, particularly: // const char* // char* // const wchar_t* // wchar_t* template bool MatchAndExplain(CharType* s, MatchResultListener* listener) const { if (s == nullptr) { return !expect_eq_; } return MatchAndExplain(StringType(s), listener); } // Matches anything that can convert to StringType. // // This is a template, not just a plain function with const StringType&, // because absl::string_view has some interfering non-explicit constructors. template bool MatchAndExplain(const MatcheeStringType& s, MatchResultListener* /* listener */) const { const StringType& s2(s); const bool eq = case_sensitive_ ? s2 == string_ : CaseInsensitiveStringEquals(s2, string_); return expect_eq_ == eq; } void DescribeTo(::std::ostream* os) const { DescribeToHelper(expect_eq_, os); } void DescribeNegationTo(::std::ostream* os) const { DescribeToHelper(!expect_eq_, os); } private: void DescribeToHelper(bool expect_eq, ::std::ostream* os) const { *os << (expect_eq ? "is " : "isn't "); *os << "equal to "; if (!case_sensitive_) { *os << "(ignoring case) "; } UniversalPrint(string_, os); } const StringType string_; const bool expect_eq_; const bool case_sensitive_; GTEST_DISALLOW_ASSIGN_(StrEqualityMatcher); }; // Implements the polymorphic HasSubstr(substring) matcher, which // can be used as a Matcher as long as T can be converted to a // string. template class HasSubstrMatcher { public: explicit HasSubstrMatcher(const StringType& substring) : substring_(substring) {} #if GTEST_HAS_ABSL bool MatchAndExplain(const absl::string_view& s, MatchResultListener* listener) const { // This should fail to compile if absl::string_view is used with wide // strings. const StringType& str = std::string(s); return MatchAndExplain(str, listener); } #endif // GTEST_HAS_ABSL // Accepts pointer types, particularly: // const char* // char* // const wchar_t* // wchar_t* template bool MatchAndExplain(CharType* s, MatchResultListener* listener) const { return s != nullptr && MatchAndExplain(StringType(s), listener); } // Matches anything that can convert to StringType. // // This is a template, not just a plain function with const StringType&, // because absl::string_view has some interfering non-explicit constructors. template bool MatchAndExplain(const MatcheeStringType& s, MatchResultListener* /* listener */) const { const StringType& s2(s); return s2.find(substring_) != StringType::npos; } // Describes what this matcher matches. void DescribeTo(::std::ostream* os) const { *os << "has substring "; UniversalPrint(substring_, os); } void DescribeNegationTo(::std::ostream* os) const { *os << "has no substring "; UniversalPrint(substring_, os); } private: const StringType substring_; GTEST_DISALLOW_ASSIGN_(HasSubstrMatcher); }; // Implements the polymorphic StartsWith(substring) matcher, which // can be used as a Matcher as long as T can be converted to a // string. template class StartsWithMatcher { public: explicit StartsWithMatcher(const StringType& prefix) : prefix_(prefix) { } #if GTEST_HAS_ABSL bool MatchAndExplain(const absl::string_view& s, MatchResultListener* listener) const { // This should fail to compile if absl::string_view is used with wide // strings. const StringType& str = std::string(s); return MatchAndExplain(str, listener); } #endif // GTEST_HAS_ABSL // Accepts pointer types, particularly: // const char* // char* // const wchar_t* // wchar_t* template bool MatchAndExplain(CharType* s, MatchResultListener* listener) const { return s != nullptr && MatchAndExplain(StringType(s), listener); } // Matches anything that can convert to StringType. // // This is a template, not just a plain function with const StringType&, // because absl::string_view has some interfering non-explicit constructors. template bool MatchAndExplain(const MatcheeStringType& s, MatchResultListener* /* listener */) const { const StringType& s2(s); return s2.length() >= prefix_.length() && s2.substr(0, prefix_.length()) == prefix_; } void DescribeTo(::std::ostream* os) const { *os << "starts with "; UniversalPrint(prefix_, os); } void DescribeNegationTo(::std::ostream* os) const { *os << "doesn't start with "; UniversalPrint(prefix_, os); } private: const StringType prefix_; GTEST_DISALLOW_ASSIGN_(StartsWithMatcher); }; // Implements the polymorphic EndsWith(substring) matcher, which // can be used as a Matcher as long as T can be converted to a // string. template class EndsWithMatcher { public: explicit EndsWithMatcher(const StringType& suffix) : suffix_(suffix) {} #if GTEST_HAS_ABSL bool MatchAndExplain(const absl::string_view& s, MatchResultListener* listener) const { // This should fail to compile if absl::string_view is used with wide // strings. const StringType& str = std::string(s); return MatchAndExplain(str, listener); } #endif // GTEST_HAS_ABSL // Accepts pointer types, particularly: // const char* // char* // const wchar_t* // wchar_t* template bool MatchAndExplain(CharType* s, MatchResultListener* listener) const { return s != nullptr && MatchAndExplain(StringType(s), listener); } // Matches anything that can convert to StringType. // // This is a template, not just a plain function with const StringType&, // because absl::string_view has some interfering non-explicit constructors. template bool MatchAndExplain(const MatcheeStringType& s, MatchResultListener* /* listener */) const { const StringType& s2(s); return s2.length() >= suffix_.length() && s2.substr(s2.length() - suffix_.length()) == suffix_; } void DescribeTo(::std::ostream* os) const { *os << "ends with "; UniversalPrint(suffix_, os); } void DescribeNegationTo(::std::ostream* os) const { *os << "doesn't end with "; UniversalPrint(suffix_, os); } private: const StringType suffix_; GTEST_DISALLOW_ASSIGN_(EndsWithMatcher); }; // Implements a matcher that compares the two fields of a 2-tuple // using one of the ==, <=, <, etc, operators. The two fields being // compared don't have to have the same type. // // The matcher defined here is polymorphic (for example, Eq() can be // used to match a std::tuple, a std::tuple, // etc). Therefore we use a template type conversion operator in the // implementation. template class PairMatchBase { public: template operator Matcher<::std::tuple>() const { return Matcher<::std::tuple>(new Impl&>); } template operator Matcher&>() const { return MakeMatcher(new Impl&>); } private: static ::std::ostream& GetDesc(::std::ostream& os) { // NOLINT return os << D::Desc(); } template class Impl : public MatcherInterface { public: bool MatchAndExplain(Tuple args, MatchResultListener* /* listener */) const override { return Op()(::std::get<0>(args), ::std::get<1>(args)); } void DescribeTo(::std::ostream* os) const override { *os << "are " << GetDesc; } void DescribeNegationTo(::std::ostream* os) const override { *os << "aren't " << GetDesc; } }; }; class Eq2Matcher : public PairMatchBase { public: static const char* Desc() { return "an equal pair"; } }; class Ne2Matcher : public PairMatchBase { public: static const char* Desc() { return "an unequal pair"; } }; class Lt2Matcher : public PairMatchBase { public: static const char* Desc() { return "a pair where the first < the second"; } }; class Gt2Matcher : public PairMatchBase { public: static const char* Desc() { return "a pair where the first > the second"; } }; class Le2Matcher : public PairMatchBase { public: static const char* Desc() { return "a pair where the first <= the second"; } }; class Ge2Matcher : public PairMatchBase { public: static const char* Desc() { return "a pair where the first >= the second"; } }; // Implements the Not(...) matcher for a particular argument type T. // We do not nest it inside the NotMatcher class template, as that // will prevent different instantiations of NotMatcher from sharing // the same NotMatcherImpl class. template class NotMatcherImpl : public MatcherInterface { public: explicit NotMatcherImpl(const Matcher& matcher) : matcher_(matcher) {} bool MatchAndExplain(const T& x, MatchResultListener* listener) const override { return !matcher_.MatchAndExplain(x, listener); } void DescribeTo(::std::ostream* os) const override { matcher_.DescribeNegationTo(os); } void DescribeNegationTo(::std::ostream* os) const override { matcher_.DescribeTo(os); } private: const Matcher matcher_; GTEST_DISALLOW_ASSIGN_(NotMatcherImpl); }; // Implements the Not(m) matcher, which matches a value that doesn't // match matcher m. template class NotMatcher { public: explicit NotMatcher(InnerMatcher matcher) : matcher_(matcher) {} // This template type conversion operator allows Not(m) to be used // to match any type m can match. template operator Matcher() const { return Matcher(new NotMatcherImpl(SafeMatcherCast(matcher_))); } private: InnerMatcher matcher_; GTEST_DISALLOW_ASSIGN_(NotMatcher); }; // Implements the AllOf(m1, m2) matcher for a particular argument type // T. We do not nest it inside the BothOfMatcher class template, as // that will prevent different instantiations of BothOfMatcher from // sharing the same BothOfMatcherImpl class. template class AllOfMatcherImpl : public MatcherInterface { public: explicit AllOfMatcherImpl(std::vector > matchers) : matchers_(std::move(matchers)) {} void DescribeTo(::std::ostream* os) const override { *os << "("; for (size_t i = 0; i < matchers_.size(); ++i) { if (i != 0) *os << ") and ("; matchers_[i].DescribeTo(os); } *os << ")"; } void DescribeNegationTo(::std::ostream* os) const override { *os << "("; for (size_t i = 0; i < matchers_.size(); ++i) { if (i != 0) *os << ") or ("; matchers_[i].DescribeNegationTo(os); } *os << ")"; } bool MatchAndExplain(const T& x, MatchResultListener* listener) const override { // If either matcher1_ or matcher2_ doesn't match x, we only need // to explain why one of them fails. std::string all_match_result; for (size_t i = 0; i < matchers_.size(); ++i) { StringMatchResultListener slistener; if (matchers_[i].MatchAndExplain(x, &slistener)) { if (all_match_result.empty()) { all_match_result = slistener.str(); } else { std::string result = slistener.str(); if (!result.empty()) { all_match_result += ", and "; all_match_result += result; } } } else { *listener << slistener.str(); return false; } } // Otherwise we need to explain why *both* of them match. *listener << all_match_result; return true; } private: const std::vector > matchers_; GTEST_DISALLOW_ASSIGN_(AllOfMatcherImpl); }; // VariadicMatcher is used for the variadic implementation of // AllOf(m_1, m_2, ...) and AnyOf(m_1, m_2, ...). // CombiningMatcher is used to recursively combine the provided matchers // (of type Args...). template