pax_global_header00006660000000000000000000000064152057763540014530gustar00rootroot0000000000000052 comment=004c4eae26ca2daba9bc5d62d46cc6c4c2545b3b kylin-ai-data-management-service-1.2.0.0/000077500000000000000000000000001520577635400200625ustar00rootroot00000000000000kylin-ai-data-management-service-1.2.0.0/.clang-format000066400000000000000000000002131520577635400224310ustar00rootroot00000000000000--- Language: Cpp BasedOnStyle: Google ColumnLimit: 120 IndentWidth: 4 AccessModifierOffset: -4 TabWidth: 4 #DerivePointerAlignment: false kylin-ai-data-management-service-1.2.0.0/.gitignore000066400000000000000000000014661520577635400220610ustar00rootroot00000000000000# This file is used to ignore files which are generated # ---------------------------------------------------------------------------- *~ *.autosave *.a *.core *.moc *.o *.obj *.orig *.rej *.so *.so.* *_pch.h.cpp *_resource.rc *.qm .#* *.*# core !core/ tags .DS_Store .directory *.debug Makefile* *.prl *.app moc_*.cpp ui_*.h qrc_*.cpp Thumbs.db *.res *.rc /.qmake.cache /.qmake.stash # qtcreator generated files *.pro.user* CMakeLists.txt.user* # xemacs temporary files *.flc # Vim temporary files .*.swp # Visual Studio generated files *.ib_pdb_index *.idb *.ilk *.pdb *.sln *.suo *.vcproj *vcproj.*.*.user *.ncb *.sdf *.opensdf *.vcxproj *vcxproj.* # MinGW generated files *.Debug *.Release # Python byte code *.pyc # Binaries # -------- *.dll *.exe build .vscode .cache .reuse # clion .idea cmake-build-debug kylin-ai-data-management-service-1.2.0.0/CMakeLists.txt000066400000000000000000000061431520577635400226260ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.5) project(kyai-data-management-service LANGUAGES CXX C) set(CMAKE_CXX_STANDARD 17) file(GLOB_RECURSE SOURCE_LIST ${PROJECT_SOURCE_DIR}/src/aiIndex/*.cpp ${PROJECT_SOURCE_DIR}/src/controller/*.cpp ${PROJECT_SOURCE_DIR}/src/dao/*.cpp ${PROJECT_SOURCE_DIR}/src/service/*.cpp ${PROJECT_SOURCE_DIR}/src/main.cpp) find_package(PkgConfig REQUIRED) pkg_check_modules(BUSINESS_FRAMEWORK REQUIRED IMPORTED_TARGET kyai-business-framework) pkg_check_modules(DOCUMENT_SERVICE REQUIRED IMPORTED_TARGET kylin-ai-document-service) pkg_check_modules(GIO REQUIRED gio-unix-2.0) pkg_check_modules(NLP REQUIRED IMPORTED_TARGET kysdk-genai-nlp) find_package(nlohmann_json REQUIRED) find_package(SQLite3 REQUIRED) if(SQLite3_VERSION VERSION_GREATER_EQUAL "3.42.0") message(STATUS "SQLite3 version ${SQLite3_VERSION} >= 3.42.0 - Enabling database recovery features") set(SQLITE_RECOVERY_SOURCES ${PROJECT_SOURCE_DIR}/src/sqliteRecover/dbdata.c ${PROJECT_SOURCE_DIR}/src/sqliteRecover/sqlite3recover.c ${PROJECT_SOURCE_DIR}/src/sqliteRecover/recoverdb.cpp) else() message(WARNING "SQLite3 version ${SQLite3_VERSION} is too old (need >= 3.42.0) - Disabling database recovery features") set(SQLITE_RECOVERY_SOURCES "") endif() include_directories(${PROJECT_SOURCE_DIR}/src ${PROJECT_SOURCE_DIR}/src/service ${PROJECT_SOURCE_DIR}/src/dao ${PROJECT_SOURCE_DIR}/src/aiIndex ${PROJECT_SOURCE_DIR}/src/sqliteRecover ${GIO_INCLUDE_DIRS} ${NLP_INCLUDE_DIRS}) add_executable(kyai-data-management-service ${SOURCE_LIST} ${SQLITE_RECOVERY_SOURCES}) if(SQLite3_VERSION VERSION_GREATER_EQUAL "3.42.0") target_compile_definitions(kyai-data-management-service PRIVATE ENABLE_SQLITE_DB_RECOVERY) endif() target_link_libraries(kyai-data-management-service PRIVATE PkgConfig::BUSINESS_FRAMEWORK PRIVATE PkgConfig::DOCUMENT_SERVICE PRIVATE nlohmann_json::nlohmann_json PRIVATE PkgConfig::NLP PRIVATE ${SQLite3_LIBRARIES} PRIVATE ${GIO_LIBRARIES} PRIVATE pthread) # 解决龙芯架构不支持 fstream 标准库问题 if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9) target_link_libraries(kyai-data-management-service PRIVATE stdc++fs) endif () endif () install(FILES conf/DataManagement.json DESTINATION /usr/share/kylin-ai/kyai-business-framework) install(FILES conf/kyai-data-management-service.service DESTINATION /usr/lib/systemd/user) install(TARGETS kyai-data-management-service LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) if (DEFINED ENABLE_TEST) unset(ENABLE_TEST CACHE) endif (DEFINED ENABLE_TEST) option(ENABLE_TEST "Build Test" OFF) if (BUILD_TESTING OR ENABLE_TEST) # 单元测试覆盖率 本地获取覆盖率报告即可 获取前需要sudo apt install lcov # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage") # set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage") enable_testing() add_subdirectory(test) endif () kylin-ai-data-management-service-1.2.0.0/LICENSE000066400000000000000000001057571520577635400211060ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . kylin-ai-data-management-service-1.2.0.0/conf/000077500000000000000000000000001520577635400210075ustar00rootroot00000000000000kylin-ai-data-management-service-1.2.0.0/conf/DataManagement.json000077500000000000000000000003031520577635400245470ustar00rootroot00000000000000{ "service_name": "DataManagement", "log_level": 3, "chunk_size": 512, "nlp_model_name": "Qwen-2.5-3b_1.0", "extract_tag_nums":6, "idle_second": 60, "ai_index_task_type": [1,2,3] } kylin-ai-data-management-service-1.2.0.0/conf/kyai-data-management-service.service000066400000000000000000000003511520577635400300040ustar00rootroot00000000000000[Unit] Description=Kylin AI Data Management Business Service [Service] ExecStart=/usr/bin/kyai-data-management-service -s DataManagement Restart=always StandardOutput=journal StandardError=journal [Install] WantedBy=default.target kylin-ai-data-management-service-1.2.0.0/src/000077500000000000000000000000001520577635400206515ustar00rootroot00000000000000kylin-ai-data-management-service-1.2.0.0/src/aiIndex/000077500000000000000000000000001520577635400222325ustar00rootroot00000000000000kylin-ai-data-management-service-1.2.0.0/src/aiIndex/aiIndex.cpp000066400000000000000000000336541520577635400243320ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include "aiIndex.h" #include #include #include #include "cpptime.h" #include "embedding.h" #include "fileInfoService.h" #include "recoverdb.h" #include "summaryTask.h" #include "systemCallback.h" #include "tagTask.h" #include "textEmbeddingTask.h" namespace DataManagement { AIIndex& AIIndex::getInstance() { static AIIndex aiIndex; return aiIndex; } AIIndex::AIIndex() : idleSignalConnection(nullptr), idleSignalId(0), idleState(IDLE_STATE_BUSY), gSettings(nullptr), schema(nullptr) {} int AIIndex::exec() { if (checkAndRecoverDatabase()) { return -1; } syncData(); resetFailedTasksOnRestart(); if (listenAIIndexToggle()) { return -1; } if (listenSignal()) { return -1; } createWorkTask(); startWorkTask(); return 0; } void AIIndex::setIdleState(IdleState newStatus) { PRINT_INFO("SetIdleState from %d to %d.\n", idleState, newStatus); std::unique_lock lock(idleLock); if (!checkIdleState(newStatus)) { PRINT_ERROR("SetIdleState status is wrong, from %d to %d.\n", idleState, newStatus); return; } idleState = newStatus; } void AIIndex::setAIIndexEnable(bool enable) { PRINT_INFO("SetAIIndexEnable from %s to %s.\n", isAIIndexEnable ? "true" : "false", enable ? "true" : "false"); std::unique_lock lock(aiIndexLock); isAIIndexEnable = enable; } bool AIIndex::isEnable() { std::shared_lock lock(aiIndexLock); return isAIIndexEnable; } void AIIndex::notifyAllTask() { for (const auto& worker : taskLists) { worker->awakeSelf(); } } bool AIIndex::isIdle() { std::shared_lock lock(idleLock); return idleState == IDLE_STATE_TRUE_IDLE; } int AIIndex::listenSignal() { PRINT_INFO("Start listening system idle signal...\n"); // 连接到会话总线,dbus demo的方式 GError* error = nullptr; idleSignalConnection = g_bus_get_sync(G_BUS_TYPE_SESSION, nullptr, &error); if (idleSignalConnection == nullptr) { if (error) { PRINT_ERROR("Connecting dbus demo failed, error: %s.\n", error->message); g_error_free(error); } else { PRINT_ERROR("Connecting dbus demo failed, but error is null.\n"); } return -1; } // 系统空闲信号的地址 const char* sender = "org.gnome.SessionManager"; const char* interfaceName = "org.gnome.SessionManager.Presence"; const char* objectPath = "/org/gnome/SessionManager/Presence"; const char* single = "StatusChanged"; idleSignalId = g_dbus_connection_signal_subscribe(idleSignalConnection, sender, interfaceName, single, objectPath, nullptr, G_DBUS_SIGNAL_FLAGS_NONE, judgeIsIdle, nullptr, nullptr); PRINT_INFO("Start listening system idle signal success, id: %d\n", idleSignalId); return 0; } int AIIndex::listenAIIndexToggle() { PRINT_INFO("Start listening ai index toggle signal...\n"); // 监听AI Index Toggle const gchar* schemaID = "org.ukui.search.settings"; const gchar* keyName = "ai-index-enable"; const gchar* expectedType = "b"; const gchar* single = "changed::ai-index-enable"; GSettingsSchemaSource* source = g_settings_schema_source_get_default(); schema = g_settings_schema_source_lookup(source, schemaID, TRUE); if (!schema) { PRINT_ERROR("Schema %s not found.", schemaID); return -1; } gSettings = g_settings_new(schemaID); if (!gSettings) { g_settings_schema_unref(schema); schema = nullptr; PRINT_ERROR("Failed to create GSettings object with schema ID: %s", schemaID); return -1; } if (!isAiIndexKeyValid(schema, gSettings, keyName, expectedType)) { g_settings_schema_unref(schema); schema = nullptr; g_object_unref(gSettings); gSettings = nullptr; return -1; } g_signal_connect(gSettings, single, G_CALLBACK(judgeIsAIIndexEnable), NULL); // isAIIndexEnable初始化 isAIIndexEnable = g_settings_get_boolean(gSettings, keyName); PRINT_INFO("AIIndex::listenAIIndexToggle current AIIndexEnable is: %s.\n", isAIIndexEnable ? "true" : "false"); if (isAIIndexEnable) { // 如果在进程启动并开着ai索引时,这个时候采取开启定时器去加载模型 loadResourcesAfterDelay(); } return 0; } bool AIIndex::isAiIndexKeyValid(GSettingsSchema* schema, GSettings* settings, const gchar* key, const gchar* expectedType) { if (!schema || !settings || !key || !expectedType) { PRINT_ERROR("AIIndex::isAiIndexKeyValid parameter input error.\n"); return false; } gchar** keys = g_settings_schema_list_keys(schema); if (!keys) { PRINT_ERROR("AIIndex::isAiIndexKeyValid get schema list keys error.\n"); return false; } // 通过g_settings_get_value接口获取不存在的键会越界 bool isKeyExists = false; for (gchar** k = keys; *k; k++) { if (g_strcmp0(*k, key) == 0) { isKeyExists = true; break; } } g_strfreev(keys); if (!isKeyExists) { PRINT_ERROR("Key %s does not exist in schema.\n", key); return false; } // 检查键类型是否匹配 GVariantType* expectedVarType = g_variant_type_new(expectedType); GVariant* value = g_settings_get_value(settings, key); const GVariantType* actualVarType = g_variant_get_type(value); // 返回是引用不需要释放 bool type_matches = g_variant_type_equal(expectedVarType, actualVarType); g_variant_unref(value); g_variant_type_free(expectedVarType); if (!type_matches) { PRINT_ERROR("Key %s type does not match expected type %s.\n", key, expectedType); return false; } return true; } bool AIIndex::checkIdleState(IdleState newStatus) const { switch (newStatus) { case IDLE_STATE_BUSY: return idleState == IDLE_STATE_SYSTEM_IDLE || idleState == IDLE_STATE_TRUE_IDLE; case IDLE_STATE_SYSTEM_IDLE: return idleState == IDLE_STATE_BUSY; case IDLE_STATE_TRUE_IDLE: return idleState == IDLE_STATE_SYSTEM_IDLE; default: return false; } } void AIIndex::createWorkTask() { const std::map> taskTypeMap = { {TASK_TYPE_TEXT_EMBEDDING, std::make_shared()}, {TASK_TYPE_TAG, std::make_shared()}, {TASK_TYPE_SUMMARY, std::make_shared()}, }; std::vector taskType = Configuration::getIntArray("ai_index_task_type", {1, 2, 3}); for (const auto& item : taskType) { const std::map>::const_iterator& it = taskTypeMap.find(item); if (it == taskTypeMap.end()) { PRINT_ERROR("createWorkTask cannot find task type: %d.\n", item); continue; } PRINT_INFO("createWorkTask %d.\n", item); taskLists.push_back(it->second); } } void AIIndex::startWorkTask() { for (const auto& worker : taskLists) { worker->start(); } } /* *** 启动10s后加载向量模型 防止在没有加载向量化模型时 *** 第一次搜索等待时间过长 (等向量化模型加载时间没有问题的时候可以去掉) */ void AIIndex::loadResourcesAfterDelay() { int waitSecond = 10; auto t = CppTime::getGlobalTimer(); t->add(std::chrono::seconds(waitSecond), [t](CppTime::timer_id id) { Embedding textEmbedding(DataManagementEmbeddingDataType::Text, scene::TaskPriority::MEDIUM); Embedding imageTextEmbedding(DataManagementEmbeddingDataType::ImageText, scene::TaskPriority::HIGH); PRINT_INFO("AIIndex::loadResourcesAfterDelay load embedding model resources.\n"); t->discard(id); }); } /** * 版本升级,同步已经建好的数据 */ void AIIndex::syncData() { SQLite::Database db = FileDatabase::createDatabase(); if (!db.tableExists("Files")) { PRINT_INFO("AIIndex::syncData Files table not exist.\n"); return; } SQLite::Statement query(db, "SELECT file_id, file_path FROM Files"); int rows; while (query.executeStep()) { int64_t fileId = query.getColumn(0); std::string filePath = query.getColumn(1); std::string modifyTime = FileInfoService::getLastModifyTime(filePath); // 图片向量化已经做完,无需创建任务 const std::vector imageSupportFormat = {"png", "jpg", "jpeg", "jpe", "bmp", "dib"}; if (std::find(imageSupportFormat.begin(), imageSupportFormat.end(), filePath.substr(filePath.rfind('.') + 1)) != imageSupportFormat.end()) { PRINT_DEBUG("file path: %s, is image.\n", filePath.c_str()); SQLite::Statement fileInfoStatement( db, "INSERT INTO t_file_info(file_id, enable, file_path, file_modify_time) VALUES(?, 1, ?, ?)"); fileInfoStatement.bind(1, fileId); fileInfoStatement.bind(2, filePath); fileInfoStatement.bind(3, modifyTime); rows = fileInfoStatement.exec(); PRINT_DEBUG("SQL: %s, success %d rows.\n", fileInfoStatement.getExpandedSQL().c_str(), rows); continue; } SQLite::Statement fileInfoStatement( db, "INSERT INTO t_file_info(file_id, file_path, file_modify_time) VALUES(?, ?, ?)"); fileInfoStatement.bind(1, fileId); fileInfoStatement.bind(2, filePath); fileInfoStatement.bind(3, modifyTime); rows = fileInfoStatement.exec(); PRINT_DEBUG("SQL: %s, success %d rows.\n", fileInfoStatement.getExpandedSQL().c_str(), rows); // 同步过来的数据,向量化已经做完,标签和摘要未做完 SQLite::Statement aiIndexStatement( db, "INSERT INTO t_ai_index_task(file_id, file_path, state, type) VALUES(?,?,?,?), (?,?,?,?), (?,?,?,?)"); aiIndexStatement.bind(1, fileId); aiIndexStatement.bind(2, filePath); aiIndexStatement.bind(3, TASK_STATE_SUCCESS); aiIndexStatement.bind(4, TASK_TYPE_TEXT_EMBEDDING); aiIndexStatement.bind(5, fileId); aiIndexStatement.bind(6, filePath); aiIndexStatement.bind(7, TASK_STATE_INIT); aiIndexStatement.bind(8, TASK_TYPE_TAG); aiIndexStatement.bind(9, fileId); aiIndexStatement.bind(10, filePath); aiIndexStatement.bind(11, TASK_STATE_INIT); aiIndexStatement.bind(12, TASK_TYPE_SUMMARY); PRINT_DEBUG("SQL: %s.\n", aiIndexStatement.getExpandedSQL().c_str()); rows = aiIndexStatement.exec(); PRINT_DEBUG("SQL: %s, success %d rows.\n", aiIndexStatement.getExpandedSQL().c_str(), rows); } db.exec("DROP TABLE Files"); db.exec("DROP TABLE Tags"); db.exec("DROP TABLE FileTags"); PRINT_INFO("AIIndex::syncData drop table: Files, Tags, FileTags.\n"); scene::VectorDatabase::getInstance().dropCollection("files_tags"); PRINT_INFO("AIIndex::syncData drop vector-db collection: files_tags.\n"); } /** * 失败且重试次数等于3的任务,在服务重启后,任务重置 */ void AIIndex::resetFailedTasksOnRestart() { SQLite::Database db = FileDatabase::createDatabase(); SQLite::Statement query(db, "SELECT id FROM t_ai_index_task WHERE (state=5 AND retry_num>=3)"); int rows; while (query.executeStep()) { int id = query.getColumn(0); SQLite::Statement statement(db, "UPDATE t_ai_index_task SET state=0, retry_num=0 WHERE id=?"); statement.bind(1, id); rows = statement.exec(); PRINT_DEBUG("SQL: %s, success %d rows.\n", statement.getExpandedSQL().c_str(), rows); } } #ifdef ENABLE_SQLITE_DB_RECOVERY int AIIndex::checkAndRecoverDatabase() { const std::string dbPath = FileDatabase::getDbFilePath(); std::error_code ec; if (!std::filesystem::exists(std::filesystem::path(dbPath), ec) || ec) { // 在新环境中第一次启动,还没有数据库,所以不用去检查数据库是否损坏 return 0; } PRINT_DEBUG("AIIndex::checkAndRecoverDatabase database integrity check start.\n"); if (Sqlite3Recover::checkDatabaseIntegrity(dbPath)) { PRINT_DEBUG("AIIndex::checkAndRecoverDatabase database integrity check success.\n"); return 0; } if (!Sqlite3Recover::recoverDatabase(dbPath)) { PRINT_ERROR("AIIndex::checkAndRecoverDatabase database recover failed.\n"); return -1; } PRINT_DEBUG("AIIndex::checkAndRecoverDatabase database integrity check success.\n"); return 0; } #else int AIIndex::checkAndRecoverDatabase() { PRINT_DEBUG("SQLite3 version is too old (< 3.42.0), database recovery is disabled.\n"); return 0; } #endif AIIndex::~AIIndex() { if (idleSignalConnection && idleSignalId > 0) { g_dbus_connection_signal_unsubscribe(idleSignalConnection, idleSignalId); idleSignalId = 0; g_object_unref(idleSignalConnection); idleSignalConnection = nullptr; } if (gSettings) { g_object_unref(gSettings); gSettings = nullptr; } if (schema) { g_settings_schema_unref(schema); schema = nullptr; } } } // namespace DataManagement kylin-ai-data-management-service-1.2.0.0/src/aiIndex/aiIndex.h000066400000000000000000000041071520577635400237660ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma once #include #include #include #include #include #include "baseTask.h" namespace DataManagement { enum IdleState : int { IDLE_STATE_BUSY = 0, // 忙碌 IDLE_STATE_SYSTEM_IDLE, // 收到系统空闲信号 IDLE_STATE_TRUE_IDLE, // 等待时间过后可以开始建索引 IDLE_STATE_MAX }; class AIIndex { public: static AIIndex &getInstance(); int exec(); void setIdleState(IdleState newStatus); void setAIIndexEnable(bool enable); void notifyAllTask(); bool isIdle(); bool isEnable(); private: AIIndex(); void syncData(); void resetFailedTasksOnRestart(); int checkAndRecoverDatabase(); int listenSignal(); int listenAIIndexToggle(); bool isAiIndexKeyValid(GSettingsSchema *schema, GSettings *settings, const gchar *key, const gchar *expectedType); bool checkIdleState(IdleState newStatus) const; void createWorkTask(); void startWorkTask(); void loadResourcesAfterDelay(); ~AIIndex(); private: GDBusConnection *idleSignalConnection; guint idleSignalId; GSettings *gSettings; GSettingsSchema *schema; bool isAIIndexEnable; std::shared_mutex idleLock; std::shared_mutex aiIndexLock; IdleState idleState; std::vector> taskLists; }; } // namespace DataManagement kylin-ai-data-management-service-1.2.0.0/src/aiIndex/baseTask.cpp000066400000000000000000000216741520577635400245050ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include "baseTask.h" #include #include #include #include #include #include "constant.h" #include "documentParse.h" namespace DataManagement { void BaseTask::start() { workThread = std::thread([this] { while (true) { /** * 触发条件: * 1、空闲、索引开关enable、有任务 * 2、忙碌并且有任务在执行 * 3、退出线程 */ std::unique_lock lock(taskLock); taskCV.wait(lock, [this] { return quit || (isReadyToStart()) || (isReadyToStop()); }); // 退出线程 if (quit) { PRINT_INFO("[%s] BaseTask quit.\n", taskName.c_str()); break; } if (isReadyToStart()) { // 处理任务 PRINT_INFO("[%s] BaseTask start to work.\n", taskName.c_str()); isRunning = true; handlerTask(); } else { // 暂停任务 PRINT_INFO("[%s] BaseTask stop to work.\n", taskName.c_str()); isRunning = false; stopTask(); } } }); PRINT_INFO("[%s] BaseTask start.\n", taskName.c_str()); } void BaseTask::awakeSelf() { taskCV.notify_all(); } void BaseTask::notifyOthers(const std::string &filePath) { // 通知当前已有的连接 scene::Application::getInstance().notifyAllConnections("FileIndexFinished", filePath); PRINT_INFO("[%s] notifyOthers file: %s.\n", taskName.c_str(), filePath.c_str()); } bool BaseTask::findTask(TaskType type) { /** * 0,1,2,3 状态的任务可重新进行处理 * 5 失败的任务重试次数小于 3 可重新进行 */ std::string sql = "SELECT id FROM t_ai_index_task WHERE type = ? " "and (state in (0,1,2,3) or (state = 5 and retry_num < 3)) limit 1"; SQLite::Statement query(db, sql); query.bind(1, type); if (!query.executeStep()) { PRINT_DEBUG("[%s] SQL: %s, no record.\n", taskName.c_str(), query.getExpandedSQL().c_str()); return false; } PRINT_DEBUG("[%s] SQL: %s, has record.\n", taskName.c_str(), query.getExpandedSQL().c_str()); return true; } bool BaseTask::queryTask(TaskType type) { std::string sql = "SELECT id,file_id,file_path,state,type,create_time,start_time,end_time,retry_num FROM t_ai_index_task " "WHERE type = ? and (state in (0,1,2,3) or (state = 5 and retry_num < 3)) " "order by state asc,create_time asc limit 1"; SQLite::Statement query(db, sql); query.bind(1, type); if (!query.executeStep()) { PRINT_DEBUG("[%s] SQL: %s, no record.\n", taskName.c_str(), query.getExpandedSQL().c_str()); return false; } PRINT_DEBUG("[%s] SQL: %s, has record.\n", taskName.c_str(), query.getExpandedSQL().c_str()); taskDO.id = query.getColumn(0); taskDO.fileId = query.getColumn(1); taskDO.filePath = query.getColumn(2).getString(); taskDO.state = query.getColumn(3); taskDO.type = query.getColumn(4); taskDO.createTime = query.getColumn(5).getString(); taskDO.startTime = query.getColumn(6).getString(); taskDO.endTime = query.getColumn(7).getString(); taskDO.retryNum = query.getColumn(8); return true; } bool BaseTask::modifyTaskState(TaskState state) { std::string sql; switch (state) { case TASK_STATE_PROCESSING: if (taskDO.state == TASK_STATE_SUCCESS) { sql = R"(UPDATE t_ai_index_task SET state = ?, start_time = datetime('now', 'localtime'), end_time = '0000-00-00 00:00:00', retry_num = 0 WHERE id = ?)"; } else { sql = R"(UPDATE t_ai_index_task SET state = ?, start_time = datetime('now', 'localtime'), end_time = '0000-00-00 00:00:00' WHERE id = ?)"; } break; case TASK_STATE_STOP: case TASK_STATE_SUCCESS: sql = R"(UPDATE t_ai_index_task SET state = ?, end_time = datetime('now', 'localtime') WHERE id = ?)"; break; case TASK_STATE_FAILED: sql = R"(UPDATE t_ai_index_task SET state = ?, end_time = datetime('now', 'localtime'), retry_num = retry_num+1 WHERE id = ?)"; break; default: PRINT_ERROR("[%s] BaseTask::modifyTaskState wrong state %d.\n", taskName.c_str(), state); return false; } SQLite::Statement statement(db, sql); statement.bind(1, state); statement.bind(2, taskDO.id); int rows = statement.exec(); if (rows != 1) { PRINT_ERROR("[%s] SQL: %s, state modify failed, rows: %d.\n", taskName.c_str(), statement.getExpandedSQL().c_str(), rows); return false; } PRINT_DEBUG("[%s] SQL: %s, state modify success.\n", taskName.c_str(), statement.getExpandedSQL().c_str()); return true; } bool BaseTask::successTaskAndEnableFileIndex() { SQLite::Transaction transaction(db); bool success = modifyTaskState(TASK_STATE_SUCCESS); if (!success) { PRINT_ERROR("[%s] enableTextFileIndex modifyTaskState to TASK_STATE_SUCCESS failed.\n", taskName.c_str()); return false; } // 检查这个文件的任务是否都已经做完 std::vector taskType = Configuration::getIntArray("ai_index_task_type", {1, 2, 3}); std::string sql = "SELECT count(*) FROM t_ai_index_task WHERE file_id = ? AND state != 4 AND type in (" + Utils::joinIds(std::unordered_set(taskType.begin(), taskType.end())) + ")"; SQLite::Statement query(db, sql); query.bind(1, taskDO.fileId); if (!query.executeStep()) { PRINT_WARN("[%s] SQL: %s, no record.\n", taskName.c_str(), query.getExpandedSQL().c_str()); return false; } int count = query.getColumn(0); if (count != 0) { PRINT_DEBUG("[%s] SQL: %s, has %d unfinished task.\n", taskName.c_str(), query.getExpandedSQL().c_str(), count); transaction.commit(); return true; } PRINT_DEBUG("[%s] SQL: %s, all task finished.\n", taskName.c_str(), query.getExpandedSQL().c_str()); sql = R"(UPDATE t_file_info SET enable = 1, last_modify_time = datetime('now', 'localtime') WHERE file_id = ?)"; SQLite::Statement statement(db, sql); statement.bind(1, taskDO.fileId); int rows = statement.exec(); if (rows != 1) { PRINT_ERROR("[%s] SQL: %s, enable failed, rows: %d.\n", taskName.c_str(), statement.getExpandedSQL().c_str(), rows); return false; } transaction.commit(); PRINT_DEBUG("[%s] SQL: %s, enable index success.\n", taskName.c_str(), statement.getExpandedSQL().c_str()); // 索引创建成功通知其他服务 notifyOthers(taskDO.filePath); return true; } bool BaseTask::getDocumentChunk(std::vector &chunkResults) { chunkResults.clear(); PRINT_INFO("[%s] BaseTask getDocumentChunk start, file path: %s.\n", taskName.c_str(), taskDO.filePath.c_str()); // 文档分段 DocumentParse documentParse; nlohmann::json ret = documentParse.syncChunkDocument({taskDO.filePath}); if (ret.empty()) { PRINT_ERROR("[%s] %s chunk document server something wrong.\n", taskName.c_str(), taskDO.filePath.c_str()); return false; } if (ret.size() != 1 || ret[0][FILEPATH].get() != taskDO.filePath) { PRINT_ERROR("[%s] chunk document return file path wrong, %s.\n", taskName.c_str(), ret[0][FILEPATH].get().c_str()); return false; } // 文件不能分块,返回的 ret[0][CHUNKS]是空列表,文件能分块,也有可能内容是空字符串 for (const auto &chunk : ret[0][CHUNKS]) { if (!chunk.get().empty()) { chunkResults.push_back(chunk.get()); } else { PRINT_INFO("[%s] %s chunk document parse result is empty.\n", taskName.c_str(), taskDO.filePath.c_str()); } } return true; } BaseTask::~BaseTask() { quit = true; awakeSelf(); if (workThread.joinable()) { workThread.join(); } } } // namespace DataManagement kylin-ai-data-management-service-1.2.0.0/src/aiIndex/baseTask.h000066400000000000000000000057631520577635400241530ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma once #include #include #include #include #include #include #include "fileDatabase.h" namespace DataManagement { enum TaskType : int { TASK_TYPE_TEXT_EMBEDDING = 1, // 文档向量化 TASK_TYPE_TAG, // 标签提取 TASK_TYPE_SUMMARY, // 摘要提取 TASK_TYPE_MAX }; enum TaskState : int { TASK_STATE_INIT = 0, // 任务初始化 TASK_STATE_PROCESSING, // 任务处理中 TASK_STATE_STOP, // 任务终止 TASK_STATE_PAUSE, // 任务暂停 TASK_STATE_SUCCESS, // 任务成功 TASK_STATE_FAILED, // 任务失败 TASK_STATE_DISCARD, // 任务废弃 TASK_STATE_MAX }; typedef struct taskDO { int id{}; int fileId{}; std::string filePath; int state{}; int type{}; std::string createTime; std::string startTime; std::string endTime; int retryNum{}; public: std::string toString() const { return "id:" + std::to_string(id) + ",file_id:" + std::to_string(fileId) + ",file_path:" + filePath + ",state:" + std::to_string(state) + ",type:" + std::to_string(type) + ",create_time:" + createTime + ",start_time:" + startTime + ",end_time:" + endTime + ",retry_num:" + std::to_string(retryNum); } } TaskDO; class BaseTask { public: explicit BaseTask(std::string name) : taskName(std::move(name)), isRunning(false), quit(false), db(FileDatabase::createDatabase()) {}; void start(); virtual void handlerTask() = 0; virtual void stopTask() = 0; virtual bool haveTask() { return false; }; virtual bool isReadyToStart() { return false; }; virtual bool isReadyToStop() { return false; }; void awakeSelf(); void notifyOthers(const std::string& filePath); virtual ~BaseTask(); protected: bool findTask(TaskType type); bool queryTask(TaskType type); bool modifyTaskState(TaskState state); bool successTaskAndEnableFileIndex(); bool getDocumentChunk(std::vector& chunkResults); protected: std::string taskName; bool isRunning; TaskDO taskDO; SQLite::Database db; private: std::thread workThread; std::mutex taskLock; std::condition_variable taskCV; bool quit; }; } // namespace DataManagement kylin-ai-data-management-service-1.2.0.0/src/aiIndex/cpptime.h000066400000000000000000000237041520577635400240520ustar00rootroot00000000000000/** * The MIT License (MIT) * * Copyright (c) 2015 Michael Egli * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. * * \author Michael Egli * \copyright Michael Egli * \date 11-Jul-2015 * * \file cpptime.h * * C++11 timer component * ===================== * * A portable, header-only C++11 timer component. * * Overview * -------- * * This component can be used to manage a set of timeouts. It is implemented in * pure C++11. It is therefore very portable given a compliant compiler. * * A timeout can be added with one of the `add()` functions, and removed with * the `remove()` function. A timeout can be set to be either one-shot or * periodic. If it is one-shot, the callback is invoked once and the timeout * event is then automatically removed. If the timeout is periodic, it is * always renewed and never automatically removed. * * When a timeout is removed or when a one-shot timeout expires, the handler * will be deleted to clean-up any resources. * * Removing a timeout is possible from within the callback. In this case, you * must be careful not to access any captured variables, if any, after calling * `remove()`, because they are no longer valid. * * Timeout Units * ------------- * * The preferred functions for adding timeouts are those that take a * `std::chrono::...` argument. However, for convenience, there is also an API * that takes a uint64_t. When using this API, all values are expected to be * given in microseconds (us). * * For periodic timeouts, a separate timeout can be specified for the initial * (first) timeout, and the periodicity after that. * * To avoid drifts, times are added by simply adding the period to the initially * calculated (or provided) time. Also, we use `wait until` type of API to wait * for a timeout instead of a `wait for` API. * * Data Structure * -------------- * * Internally, a std::vector is used to store timeout events. The timer_id * returned from the `add` functions are used as index to this vector. * * In addition, a std::multiset is used that holds all time points when * timeouts expire. * * Using a vector to store timeout events has some implications. It is very * fast to remove an event, because the timer_id is the vector's index. On the * other hand, this makes it also more complicated to manage the timer_ids. The * current solution is to keep track of ids that are freed in order to re-use * them. A stack is used for this. * * Examples * -------- * * More examples can be found in the `tests` folder. * * ~~~ * CppTime::Timer t; * t.add(std::chrono::seconds(1), [](CppTime::timer_id){ std::cout << "got it!"; }); * std::this_thread::sleep_for(std::chrono::seconds(2)); * ~~~ */ #pragma once #include #include #include #include #include #include #include #include #include namespace CppTime { // Public types using timer_id = std::size_t; using handler_t = std::function; using clock = std::chrono::steady_clock; using timestamp = std::chrono::time_point; using duration = std::chrono::microseconds; // Private definitions. Do not rely on this namespace. namespace detail { // The event structure that holds the information about a timer. struct Event { timer_id id; timestamp start; duration period; handler_t handler; bool valid; Event() : id(0), start(duration::zero()), period(duration::zero()), handler(nullptr), valid(false) {} template Event(timer_id id, timestamp start, duration period, Func &&handler) : id(id), start(start), period(period), handler(std::forward(handler)), valid(true) {} Event(Event &&r) = default; Event &operator=(Event &&ev) = default; Event(const Event &r) = delete; Event &operator=(const Event &r) = delete; }; // A time event structure that holds the next timeout and a reference to its // Event struct. struct Time_event { timestamp next; timer_id ref; }; inline bool operator<(const Time_event &l, const Time_event &r) { return l.next < r.next; } } // end namespace detail class Timer { using scoped_m = std::unique_lock; // Thread and locking variables. std::mutex m; std::condition_variable cond; std::thread worker; // Use to terminate the timer thread. bool done = false; // The vector that holds all active events. std::vector events; // Sorted queue that has the next timeout at its top. std::multiset time_events; // A list of ids to be re-used. If possible, ids are used from this pool. std::stack free_ids; public: Timer() : m{}, cond{}, worker{}, events{}, time_events{}, free_ids{} { scoped_m lock(m); done = false; worker = std::thread([this] { run(); }); } ~Timer() { scoped_m lock(m); done = true; lock.unlock(); cond.notify_all(); worker.join(); events.clear(); time_events.clear(); while (!free_ids.empty()) { free_ids.pop(); } } /** * Add a new timer. * * \param when The time at which the handler is invoked. * \param handler The callable that is invoked when the timer fires. * \param period The periodicity at which the timer fires. Only used for periodic timers. */ timer_id add(const timestamp &when, handler_t &&handler, const duration &period = duration::zero()) { scoped_m lock(m); timer_id id = 0; // Add a new event. Prefer an existing and free id. If none is available, add // a new one. if (free_ids.empty()) { id = events.size(); detail::Event e(id, when, period, std::move(handler)); events.push_back(std::move(e)); } else { id = free_ids.top(); free_ids.pop(); detail::Event e(id, when, period, std::move(handler)); events[id] = std::move(e); } time_events.insert(detail::Time_event{when, id}); lock.unlock(); cond.notify_all(); return id; } /** * Overloaded `add` function that uses a `std::chrono::duration` instead of a * `time_point` for the first timeout. */ template inline timer_id add(const std::chrono::duration &when, handler_t &&handler, const duration &period = duration::zero()) { return add(clock::now() + std::chrono::duration_cast(when), std::move(handler), period); } /** * Overloaded `add` function that uses a uint64_t instead of a `time_point` for * the first timeout and the period. */ inline timer_id add(const uint64_t when, handler_t &&handler, const uint64_t period = 0) { return add(duration(when), std::move(handler), duration(period)); } /** * Discard the timer with the given id. */ bool discard(timer_id id) { scoped_m lock(m); if (events.empty() || events.size() <= id) { return false; } events[id].valid = false; events[id].handler = nullptr; auto it = std::find_if(time_events.begin(), time_events.end(), [&](const detail::Time_event &te) { return te.ref == id; }); if (it != time_events.end()) { free_ids.push(it->ref); time_events.erase(it); } lock.unlock(); cond.notify_all(); return true; } private: void run() { scoped_m lock(m); while (!done) { if (time_events.empty()) { // Wait for work cond.wait(lock); } else { detail::Time_event te = *time_events.begin(); if (CppTime::clock::now() >= te.next) { // Remove time event time_events.erase(time_events.begin()); // Invoke the handler lock.unlock(); events[te.ref].handler(te.ref); lock.lock(); if (events[te.ref].valid && events[te.ref].period.count() > 0) { // The event is valid and a periodic timer. te.next += events[te.ref].period; time_events.insert(te); } else { // The event is either no longer valid because it was removed in the // callback, or it is a one-shot timer. events[te.ref].valid = false; events[te.ref].handler = nullptr; free_ids.push(te.ref); } } else { cond.wait_until(lock, te.next); } } } } }; static Timer *getGlobalTimer() { static Timer g_timer; return &g_timer; } } // end namespace CppTime kylin-ai-data-management-service-1.2.0.0/src/aiIndex/nlpBaseTask.cpp000066400000000000000000000156261520577635400251570ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include "nlpBaseTask.h" #include #include #include #include "aiIndex.h" namespace DataManagement { NlpBaseTask::NlpBaseTask(const std::string& taskName, TaskType type) : session(nullptr), taskType(type), BaseTask(taskName), canExecute(true) {} void NlpBaseTask::handlerTask() { PRINT_INFO("[%s] handlerTask start...\n", taskName.c_str()); if (!queryTask(taskType)) { PRINT_ERROR("[%s] cannot get task.\n", taskName.c_str()); isRunning = false; return; } PRINT_DEBUG("[%s] get task: %s.\n", taskName.c_str(), taskDO.toString().c_str()); // 任务状态改为处理中 bool success = modifyTaskState(TASK_STATE_PROCESSING); if (!success) { PRINT_ERROR("[%s] modify %s state to processing failed.\n", taskName.c_str(), taskDO.filePath.c_str()); modifyTaskState(TASK_STATE_FAILED); isRunning = false; return; } // 清空nlp回调的结果 nlpResult.clear(); success = connectNLP(); if (!success) { PRINT_ERROR("[%s] connect nlp failed.\n", taskName.c_str()); modifyTaskState(TASK_STATE_STOP); isRunning = false; canExecute = false; // 等待重启后,才可以继续执行 return; } success = nlpChat(); if (!success) { PRINT_ERROR("[%s] nlpChat failed.\n", taskName.c_str()); modifyTaskState(TASK_STATE_FAILED); isRunning = false; return; } PRINT_INFO("[%s] handlerTask end...\n", taskName.c_str()); } void NlpBaseTask::stopTask() { PRINT_INFO("[%s] stopTask stop...\n", taskName.c_str()); genai_text_stop_chat(session); PRINT_INFO("[%s] stopTask end...\n", taskName.c_str()); } bool NlpBaseTask::haveTask() { return findTask(taskType); } bool NlpBaseTask::isReadyToStart() { return AIIndex::getInstance().isIdle() && AIIndex::getInstance().isEnable() && !isRunning && haveTask() && canExecute; } bool NlpBaseTask::isReadyToStop() { return !AIIndex::getInstance().isIdle() && isRunning; } bool NlpBaseTask::connectNLP() { if (session != nullptr) { return true; } ChatModelConfig* config = chat_model_config_create(); if (config == nullptr) { PRINT_ERROR("[%s] Create chat model config error.\n", taskName.c_str()); return false; } chat_model_config_set_name(config, Configuration::getString("nlp_model_name").c_str()); chat_model_config_set_top_k(config, 0.5); chat_model_config_set_deploy_type(config, ModelDeployType::OnDevice); session = genai_text_create_session(); if (session == nullptr) { PRINT_ERROR("[%s] Create nlp session error.\n", taskName.c_str()); return false; } genai_text_set_model_config(session, config); if (genai_text_init_session(session) != 0) { PRINT_ERROR("[%s] Init nlp session error.\n", taskName.c_str()); genai_text_destroy_session(&session); session = nullptr; return false; } return true; } void nlpCallback(ChatResult* result, void* user_data) { auto nlpTask = static_cast(user_data); nlpTask->nlpResult += chat_result_get_assistant_message(result); int code = chat_result_get_error_code(result); if (code != AiSdkCommonErrorCode::AISDK_NO_ERROR) { PRINT_ERROR("[%s] Nlp chat end, code:%d not success.\n", nlpTask->taskName.c_str(), code); nlpTask->modifyTaskState(TASK_STATE_FAILED); nlpTask->isRunning = false; nlpTask->awakeSelf(); return; } if (!chat_result_get_is_end(result)) { PRINT_DEBUG("[%s] Nlp callback not end.\n", nlpTask->taskName.c_str()); return; } // 判断是否为 stop std::string stopFlag = "<|stopChat_8167431|>"; if (nlpTask->nlpResult.size() >= stopFlag.size() && nlpTask->nlpResult.substr(nlpTask->nlpResult.length() - stopFlag.length()) == stopFlag) { PRINT_INFO("[%s] Nlp chat stop, %s.\n", nlpTask->taskName.c_str(), nlpTask->taskDO.filePath.c_str()); nlpTask->modifyTaskState(TASK_STATE_STOP); nlpTask->isRunning = false; nlpTask->awakeSelf(); return; } PRINT_INFO("[%s] Nlp chat end success.\n", nlpTask->taskName.c_str()); bool success = nlpTask->worker(); if (!success) { PRINT_ERROR("[%s] worker failed, %s.\n", nlpTask->taskName.c_str(), nlpTask->taskDO.filePath.c_str()); nlpTask->modifyTaskState(TASK_STATE_FAILED); nlpTask->isRunning = false; nlpTask->awakeSelf(); return; } success = nlpTask->successTaskAndEnableFileIndex(); if (!success) { PRINT_ERROR("[%s] enable file index failed, %s.\n", nlpTask->taskName.c_str(), nlpTask->taskDO.filePath.c_str()); nlpTask->modifyTaskState(TASK_STATE_FAILED); } // 任务结束 nlpTask->isRunning = false; nlpTask->awakeSelf(); } bool NlpBaseTask::nlpChat() { genai_text_clear_chat_history_messages(session); setPrompt(); genai_text_result_set_callback(session, nlpCallback, this); std::vector chunkResults; bool success = getDocumentChunk(chunkResults); if (!success) { PRINT_ERROR("[%s] nlpChat get document chunk failed.\n", taskName.c_str()); return false; } // 文档分块为空,认为成功 if (chunkResults.empty() || chunkResults[0].empty()) { PRINT_DEBUG("[%s] nlpChat get document chunk is empty.\n", taskName.c_str()); nlpResult = ""; success = worker(); if (!success) { PRINT_ERROR("[%s] nlp task worker failed, %s.\n", taskName.c_str(), taskDO.filePath.c_str()); return false; } success = successTaskAndEnableFileIndex(); if (!success) { PRINT_ERROR("[%s] enable file index failed, %s.\n", taskName.c_str(), taskDO.filePath.c_str()); return false; } isRunning = false; return true; } PRINT_DEBUG("[%s] nlpChat chat text is %s.\n", taskName.c_str(), chunkResults[0].c_str()); genai_text_chat_async(session, chunkResults[0].c_str()); return true; } NlpBaseTask::~NlpBaseTask() { genai_text_destroy_session(&session); session = nullptr; } } // namespace DataManagement kylin-ai-data-management-service-1.2.0.0/src/aiIndex/nlpBaseTask.h000066400000000000000000000026411520577635400246150ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma once #include #include "baseTask.h" namespace DataManagement { class NlpBaseTask : public BaseTask { public: friend void nlpCallback(ChatResult* result, void* user_data); NlpBaseTask(const std::string& taskName, TaskType type); void handlerTask() override; void stopTask() override; bool haveTask() override; bool isReadyToStart() override; bool isReadyToStop() override; virtual void setPrompt() = 0; virtual bool worker() = 0; ~NlpBaseTask() override; private: bool connectNLP(); bool nlpChat(); protected: GenAiTextSession* session; std::string nlpResult; private: TaskType taskType; bool canExecute; }; } // namespace DataManagement kylin-ai-data-management-service-1.2.0.0/src/aiIndex/summaryTask.cpp000066400000000000000000000066101520577635400252610ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include "summaryTask.h" #include #include namespace DataManagement { SummaryTask::SummaryTask() : NlpBaseTask("summary-task", TASK_TYPE_SUMMARY) {} void SummaryTask::setPrompt() { std::string prompt = R"( #Role 你是一个文档摘要撰写助手,你的职责是根据文档内容撰写文档摘要。 #Flow Step1:仔细阅读文档,理解文档主要内容; Step2:根据文档主要内容撰写文档摘要; Step3:格式化输出结果:严格按照下面的json格式进行输出: { "文档摘要":"..." } #Rules 1.文档摘要必须包含文档的主要内容, 不能照抄原文档内容; 2.语言简洁,不要解释,不要自由发挥,不要补充内容; #Initialize 你需要严格按照定义的流程,遵循的规则,并按照中的要求的json格式输出文档摘要,最多30个字。 )"; genai_text_set_chat_system_prompt(session, prompt.c_str()); } void SummaryTask::modifyFileSummary(const std::string& summary) { std::string sql = R"(UPDATE t_file_info SET file_summary = ?, last_modify_time = datetime('now', 'localtime') WHERE file_id = ?)"; SQLite::Statement statement(db, sql); statement.bind(1, summary); statement.bind(2, taskDO.fileId); int rows = statement.exec(); PRINT_DEBUG("[%s] SQL: %s, modify summary success %d rows.\n", taskName.c_str(), statement.getExpandedSQL().c_str(), rows); } bool SummaryTask::worker() { if (nlpResult.empty()) { modifyFileSummary(nlpResult); return true; } std::string result = parseResult(); if (result.empty()) { return false; } modifyFileSummary(result); return true; } std::string SummaryTask::parseResult() { PRINT_DEBUG("[%s] summary chat parse result start:%s.\n", taskName.c_str(), nlpResult.c_str()); nlohmann::json jsonData = nlohmann::json::parse(nlpResult, nullptr, false); if (jsonData.is_discarded()) { PRINT_ERROR("[%s] summary chat parse result json format error.\n", taskName.c_str()); return ""; } if (!jsonData.contains("文档摘要") || !jsonData["文档摘要"].is_string()) { PRINT_ERROR("[%s] Parse summary json is empty, %s.\n", taskName.c_str(), jsonData.dump().c_str()); return ""; } if (jsonData["文档摘要"].size() >= 200) { PRINT_ERROR("[%s] Parse summary json result size %lu > 200 is discarded.\n", taskName.c_str(), jsonData["文档摘要"].size()); return ""; } PRINT_DEBUG("[%s] chat result parseSummary end, %s.\n", taskName.c_str(), jsonData["文档摘要"].dump().c_str()); return jsonData["文档摘要"]; } } // namespace DataManagement kylin-ai-data-management-service-1.2.0.0/src/aiIndex/summaryTask.h000066400000000000000000000017771520577635400247370ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma once #include "nlpBaseTask.h" namespace DataManagement { class SummaryTask : public NlpBaseTask { public: SummaryTask(); void setPrompt() override; bool worker() override; private: void modifyFileSummary(const std::string &summary); std::string parseResult(); }; } // namespace DataManagement kylin-ai-data-management-service-1.2.0.0/src/aiIndex/systemCallback.cpp000066400000000000000000000076551520577635400257140ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include "systemCallback.h" #include #include #include "aiIndex.h" #include "cpptime.h" #include "embedding.h" CppTime::timer_id timerId; /** * system-signal idle-status * 0 0 忙碌 * 3 1 空闲 * @param status * @return */ DataManagement::IdleState systemStatusToIdleState(guint32 status) { DataManagement::IdleState idleState = DataManagement::IDLE_STATE_MAX; switch (status) { case 0: idleState = DataManagement::IDLE_STATE_BUSY; break; case 3: idleState = DataManagement::IDLE_STATE_SYSTEM_IDLE; break; default: idleState = DataManagement::IDLE_STATE_MAX; break; } PRINT_INFO("SystemStatusToIdleState receive signal is %d, idle status is %d.\n", status, idleState); return idleState; } void judgeIsIdle(GDBusConnection* conn, const gchar* sender_name, const gchar* object_path, const gchar* interface_name, const gchar* signal_name, GVariant* parameters, gpointer user_data) { PRINT_DEBUG("JudgeIsIdle start...\n"); if (!g_str_equal(signal_name, "StatusChanged")) { PRINT_ERROR("JudgeIsIdle receive unexpected signal name, %s\n", signal_name); return; } guint32 status = UINT32_MAX; g_variant_get(parameters, "(u)", &status); DataManagement::IdleState IdleState = systemStatusToIdleState(status); if (IdleState == DataManagement::IDLE_STATE_MAX) { PRINT_WARN("JudgeIsIdle receive unexpected signal %d.\n", status); return; } DataManagement::AIIndex::getInstance().setIdleState(IdleState); if (IdleState == DataManagement::IDLE_STATE_SYSTEM_IDLE) { // 启动定时器 int idleSecond = Configuration::getInt("idle_second", 60); auto t = CppTime::getGlobalTimer(); timerId = t->add(std::chrono::seconds(idleSecond), [t](CppTime::timer_id id) { // 注:一定要先改变 IdleState 值,再唤醒所有任务 DataManagement::AIIndex::getInstance().setIdleState(DataManagement::IDLE_STATE_TRUE_IDLE); DataManagement::AIIndex::getInstance().notifyAllTask(); t->discard(id); PRINT_INFO("JudgeIsIdle ready change to true idle.\n"); }); } else { // 系统状态变为繁忙 auto t = CppTime::getGlobalTimer(); t->discard(timerId); DataManagement::AIIndex::getInstance().notifyAllTask(); PRINT_INFO("JudgeIsIdle change to busy.\n"); } PRINT_DEBUG("JudgeIsIdle end...\n"); } void judgeIsAIIndexEnable(GSettings* settings, const gchar* key, gpointer user_data) { PRINT_DEBUG("JudgeIsAIIndexEnable start...\n"); gboolean value = g_settings_get_boolean(settings, key); DataManagement::AIIndex::getInstance().setAIIndexEnable(value); if (value) { Embedding textEmbedding(DataManagementEmbeddingDataType::Text, scene::TaskPriority::MEDIUM); Embedding imageTextEmbedding(DataManagementEmbeddingDataType::ImageText, scene::TaskPriority::HIGH); PRINT_INFO("Current AIIndexEnable is: %s,load embedding model.\n", value ? "true" : "false"); } PRINT_DEBUG("JudgeIsAIIndexEnable end...\n"); }kylin-ai-data-management-service-1.2.0.0/src/aiIndex/systemCallback.h000066400000000000000000000020551520577635400253460ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma once #include extern void judgeIsIdle(GDBusConnection* conn, const gchar* sender_name, const gchar* object_path, const gchar* interface_name, const gchar* signal_name, GVariant* parameters, gpointer user_data); extern void judgeIsAIIndexEnable(GSettings* settings, const gchar* key, gpointer user_data);kylin-ai-data-management-service-1.2.0.0/src/aiIndex/tagTask.cpp000066400000000000000000000107151520577635400243400ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include "tagTask.h" #include #include #include #include "tagService.h" namespace DataManagement { TagTask::TagTask() : NlpBaseTask("tag-task", TASK_TYPE_TAG) {} void TagTask::setPrompt() { genai_text_set_chat_system_prompt_id(session, TAG_EXTRACTION); } bool TagTask::worker() { TagService tagService; if (nlpResult.empty()) { // 删除所有标签 tagService.deleteFileTags({taskDO.fileId}); return true; } std::set result = parseResult(); if (result.empty()) { return false; } tagService.updateFileTags(taskDO.fileId, result); return true; } std::set TagTask::parseResult() { PRINT_DEBUG("[%s] chat result parseTags start:%s.\n", taskName.c_str(), nlpResult.c_str()); std::multimap map; std::set result; // 解析JSON字符串 nlohmann::json jsonData = nlohmann::json::parse(nlpResult, nullptr, false); if (jsonData.is_discarded()) { PRINT_ERROR("[%s] tag chat parse result json format error.\n", taskName.c_str()); return {}; } if (!jsonData.contains("预置文档标签") || !jsonData["预置文档标签"].is_array() || jsonData["预置文档标签"].empty()) { PRINT_ERROR("[%s] Parse tags json is empty.\n", taskName.c_str()); return {}; } for (const auto& item : jsonData["预置文档标签"]) { if (!item.contains("标签") || !item.contains("置信度") || !item["标签"].is_string()) { PRINT_WARN("[%s] Parse tags json get table error, %s.\n", taskName.c_str(), jsonData.dump().c_str()); continue; } std::string table = item["标签"]; double sim = 0.0; if (item["置信度"].is_string()) { std::string confidence = item["置信度"]; sim = stod(confidence); } else { sim = item["置信度"].get(); } if (table != "其他") { map.insert(std::make_pair(sim, table)); } } // 优先存储预制标签 预制标签存储到6个提示词就直接返回,不要自由标签 for (auto it = map.rbegin(); it != map.rend(); ++it) { if (result.size() >= Configuration::getInt("extract_tag_nums", 6)) { return result; } result.insert(it->second); } if (!jsonData.contains("自由文档标签") || !jsonData["自由文档标签"].is_array() || jsonData["自由文档标签"].empty()) { PRINT_ERROR("[%s] Parse free tags json is empty.\n", taskName.c_str()); return result; } map.clear(); for (const auto& item : jsonData["自由文档标签"]) { if (!item.contains("标签") || !item.contains("置信度") || !item["标签"].is_string()) { PRINT_WARN("[%s] Parse tags json get table error, %s.\n", taskName.c_str(), jsonData.dump().c_str()); continue; } std::string table = item["标签"]; double sim = 0.0; if (item["置信度"].is_string()) { std::string confidence = item["置信度"]; sim = stod(confidence); } else { sim = item["置信度"].get(); } // 自有标签存在一定的不可靠性,所以置信度设为0.9以上才会存储 if (sim >= 0.9) { map.insert(std::make_pair(sim, table)); } } for (auto it = map.rbegin(); it != map.rend(); ++it) { if (result.size() >= Configuration::getInt("extract_tag_nums", 6)) { return result; } result.insert(it->second); } PRINT_DEBUG("[%s] chat result parseTags end.\n", taskName.c_str()); return result; } } // namespace DataManagement kylin-ai-data-management-service-1.2.0.0/src/aiIndex/tagTask.h000066400000000000000000000017301520577635400240020ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma once #include #include "nlpBaseTask.h" namespace DataManagement { class TagTask : public NlpBaseTask { public: TagTask(); void setPrompt() override; bool worker() override; private: std::set parseResult(); }; } // namespace DataManagement kylin-ai-data-management-service-1.2.0.0/src/aiIndex/textEmbeddingTask.cpp000066400000000000000000000133051520577635400263460ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include "textEmbeddingTask.h" #include #include #include "aiIndex.h" #include "dataManagementVectorDatabase.h" #include "embedding.h" #include "fileDatabase.h" namespace DataManagement { TextEmbeddingTask::TextEmbeddingTask() : BaseTask("text-embedding-task") {} /** * 每次处理一个分块,向量化暂时不支持打断 * 唤不唤醒目前都可以,因为当前线程未休眠 */ void TextEmbeddingTask::handlerTask() { PRINT_INFO("[%s] handlerTask start...\n", taskName.c_str()); bool success; // 先处理分块队列中的任务 if (!chunkLists.empty()) { // 文件可能还未处理完,又有更新,检查当前任务状态是否为处理中 if (taskDO.state != TASK_STATE_PROCESSING) { PRINT_DEBUG("[%s] task not in processing, state: %d.\n", taskName.c_str(), taskDO.state); // 清空队列 std::queue().swap(chunkLists); isRunning = false; return; } // 检查下文件id是否存在,有可能任务还没处理完,文件已经被删了 std::string filePath = FileDatabase::getFilePathById(db, taskDO.fileId); if (filePath.empty()) { PRINT_DEBUG("[%s] file id: %d not exist, clear all queue.\n", taskName.c_str(), taskDO.fileId); // 清空队列 std::queue().swap(chunkLists); isRunning = false; return; } success = handlerProcessingTask(); } else { if (!queryTask(TASK_TYPE_TEXT_EMBEDDING)) { PRINT_ERROR("[%s] cannot get task.\n", taskName.c_str()); isRunning = false; return; } PRINT_DEBUG("[%s] get task: %s.\n", taskName.c_str(), taskDO.toString().c_str()); success = handlerNewTask(); } if (!success) { modifyTaskState(TASK_STATE_FAILED); } // 文档分段为空,认为索引建立成功 if (success && chunkLists.empty()) { // 使能文件索引 success = successTaskAndEnableFileIndex(); if (!success) { PRINT_ERROR("[%s] enable file index failed, file_id:%d.\n", taskName.c_str(), taskDO.fileId); modifyTaskState(TASK_STATE_FAILED); } } isRunning = false; PRINT_INFO("[%s] handlerTask end...\n", taskName.c_str()); } void TextEmbeddingTask::stopTask() { // 文档分段做向量化,暂时不支持暂停,走不到这里 PRINT_INFO("[%s] stopTask stop...\n", taskName.c_str()); } bool TextEmbeddingTask::haveTask() { return !chunkLists.empty() || findTask(TASK_TYPE_TEXT_EMBEDDING); } bool TextEmbeddingTask::isReadyToStart() { return AIIndex::getInstance().isIdle() && AIIndex::getInstance().isEnable() && !isRunning && haveTask(); } bool TextEmbeddingTask::isReadyToStop() { return !AIIndex::getInstance().isIdle() && isRunning; } bool TextEmbeddingTask::handlerProcessingTask() { std::string chunk = chunkLists.front(); chunkLists.pop(); // 文档内容向量化 Embedding textEmbedding(DataManagementEmbeddingDataType::Text, scene::TaskPriority::MEDIUM); std::vector embedding = textEmbedding.getEmbedding(chunk); if (embedding.empty()) { // 向量化结果为空,创建索引流程继续,比如生僻字模型不识别 PRINT_WARN("[%s] text embedding file_id:%d, result is empty.\n", taskName.c_str(), taskDO.fileId); return true; } // 存入向量数据库 DataManagementVectorDatabase textDB(TEXT_FILE_CONTENT_COLLECTION_NAME, TEXT_DIMENSION); std::string uuid = Utils::generateUUID(); nlohmann::json metadata; metadata["file_id"] = taskDO.fileId; bool success = textDB.insertVectorData({DataManagementVectorInfo{uuid, embedding, metadata}}); if (!success) { PRINT_ERROR("[%s] %s insert data failed.\n", taskName.c_str(), TEXT_FILE_CONTENT_COLLECTION_NAME); return false; } return true; } bool TextEmbeddingTask::handlerNewTask() { // 任务状态改为处理中 bool success = modifyTaskState(TASK_STATE_PROCESSING); if (!success) { PRINT_ERROR("[%s] modify %s state to processing failed.\n", taskName.c_str(), taskDO.filePath.c_str()); return false; } // 文档分段 std::vector chunkResults; success = getDocumentChunk(chunkResults); if (!success) { PRINT_ERROR("[%s] %s getDocumentChunk failed.\n", taskName.c_str(), taskDO.filePath.c_str()); return false; } // 存入队列 for (const auto& chunk : chunkResults) { chunkLists.emplace(chunk); } DataManagementVectorDatabase textDB(TEXT_FILE_CONTENT_COLLECTION_NAME, TEXT_DIMENSION); std::string expression = "file_id == " + std::to_string(taskDO.fileId); success = textDB.deleteVectorData(expression); if (!success) { PRINT_ERROR("[%s] %s delete data failed.\n", taskName.c_str(), TEXT_FILE_CONTENT_COLLECTION_NAME); return false; } return true; } } // namespace DataManagement kylin-ai-data-management-service-1.2.0.0/src/aiIndex/textEmbeddingTask.h000066400000000000000000000022551520577635400260150ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma once #include #include #include "baseTask.h" namespace DataManagement { class TextEmbeddingTask : public BaseTask { public: TextEmbeddingTask(); void handlerTask() override; void stopTask() override; bool haveTask() override; bool isReadyToStart() override; bool isReadyToStop() override; private: bool handlerProcessingTask(); bool handlerNewTask(); private: std::queue chunkLists; }; } // namespace DataManagement kylin-ai-data-management-service-1.2.0.0/src/controller/000077500000000000000000000000001520577635400230345ustar00rootroot00000000000000kylin-ai-data-management-service-1.2.0.0/src/controller/handlerAddImageFiles.cpp000066400000000000000000000033171520577635400275200ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include "imageService.h" Response handleAddImageFiles(const Request& request) { PRINT_DEBUG("HandleAddImageFiles request: %s\n", request.toString().c_str()); // 解析参数 nlohmann::json values = request.getValues(); if (!values.is_array() || values.empty()) { PRINT_ERROR("HandleAddImageFiles request is not array or is empty.\n"); return Response(PARSE_PARAM_FAILED); } std::unordered_set filePaths; for (const auto& item : values) { if (!item.is_object() || !item.contains("filepath") || !item["filepath"].is_string() || !item.contains("fileformat") || !item["fileformat"].is_string()) { PRINT_ERROR("HandleAddImageFiles request json is error.\n"); continue; } filePaths.insert(item["filepath"].get()); } if (!filePaths.empty()) { ImageService().embedding(filePaths); } return Response::success(); } REGISTER_SERVICE_HANDLER("addImageFiles", handleAddImageFiles) kylin-ai-data-management-service-1.2.0.0/src/controller/handlerAddTextFiles.cpp000066400000000000000000000033431520577635400274210ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include #include "fileInfoService.h" Response handleAddTextFiles(const Request& request) { PRINT_DEBUG("HandleAddTextFiles request: %s\n", request.toString().c_str()); // 解析参数 nlohmann::json values = request.getValues(); if (!values.is_array() || values.empty()) { PRINT_ERROR("HandleAddTextFiles request is not array or is empty.\n"); return Response(PARSE_PARAM_FAILED); } std::unordered_set filePaths; for (const auto& item : values) { if (!item.is_object() || !item.contains("filepath") || !item["filepath"].is_string() || !item.contains("fileformat") || !item["fileformat"].is_string()) { PRINT_ERROR("HandleAddTextFiles request json is error.\n"); continue; } filePaths.insert(item["filepath"].get()); } if (!filePaths.empty()) { FileInfoService().insertTask(filePaths); } return Response::success(); } REGISTER_SERVICE_HANDLER("addTextFiles", handleAddTextFiles) kylin-ai-data-management-service-1.2.0.0/src/controller/handlerDeleteFiles.cpp000066400000000000000000000060711520577635400272670ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include "fileDatabase.h" #include "fileInfoService.h" #include "tagService.h" std::unordered_set getDeletedFileIds(const std::vector& filePaths) { std::unordered_set deletedFileIds; SQLite::Database database = FileDatabase::createDatabase(); // 从 sqlite 根据文件名查询 文件id for (const auto& filePath : filePaths) { std::vector fileIds = FileDatabase::getMultiFileId(database, filePath); if (fileIds.empty()) { PRINT_DEBUG("Can't get any data from path: %s\n", filePath.c_str()); continue; } deletedFileIds.insert(fileIds.begin(), fileIds.end()); } return deletedFileIds; } bool deleteFileInfo(const std::unordered_set& deletedFileIds) { FileInfoService fileInfoService; return fileInfoService.deleteFiles(deletedFileIds); } void deleteTagInfo(const std::unordered_set& deletedFileIds) { TagService tagService; tagService.deleteFileTags(deletedFileIds); } Response handleDeleteFiles(const Request& request) { PRINT_DEBUG("HandleDeleteFiles request: %s\n", request.toString().c_str()); // 解析参数 nlohmann::json values = request.getValues(); if (!values.is_array() || values.empty()) { PRINT_ERROR("HandleDeleteFiles request is not array of is empty.\n"); return Response(PARSE_PARAM_FAILED); } std::vector filePaths; for (const auto& item : values) { if (!item.is_object() || !item.contains("filepath") || !item["filepath"].is_string()) { PRINT_ERROR("HandleDeleteFiles request json is error.\n"); continue; } filePaths.push_back(item["filepath"].get()); } std::unordered_set deletedFileIds = getDeletedFileIds(filePaths); if (deletedFileIds.empty()) { PRINT_WARN("Can't get any data from input: %s\n", request.toString().c_str()); return Response::success(); } // 根据 file_id 删除文件信息 bool ret = deleteFileInfo(deletedFileIds); if (!ret) { PRINT_ERROR("HandleDeleteFiles delete vector db data failed.\n"); return Response(DELETE_VECTOR_DATABASE_FAILED); } // 根据 file_id 删除标签信息 deleteTagInfo(deletedFileIds); return Response::success(); } REGISTER_SERVICE_HANDLER("deleteFiles", handleDeleteFiles) kylin-ai-data-management-service-1.2.0.0/src/controller/handlerExtractSearchConditions.cpp000066400000000000000000000032721520577635400316740ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include Response handleExtractSearchConditions(const Request& request) { PRINT_DEBUG("handleExtractSearchConditions request: %s\n", request.toString().c_str()); // 解析参数 nlohmann::json values = request.getValues(); if (!values.is_object() || values.empty() || !values.contains("description") || !values["description"].is_string()) { PRINT_ERROR("handleExtractSearchConditions request is not object of is empty.\n"); return Response(PARSE_PARAM_FAILED); } auto description = values["description"].get(); nlohmann::json conditions; conditions["condition type"] = 3; conditions["orCondition"] = false; conditions["content"] = description; nlohmann::json labels(nlohmann::json::array()); nlohmann::json root; root["0"] = conditions; root["conditions count"] = 1; root["labels"] = labels; return Response::success(root); } REGISTER_SERVICE_HANDLER("extractSearchConditions", handleExtractSearchConditions) kylin-ai-data-management-service-1.2.0.0/src/controller/handlerGetAllFilesInfos.cpp000066400000000000000000000025441520577635400302350ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include "fileDatabase.h" Response handleGetAllFileInfos(const Request& request) { PRINT_DEBUG("handleGetAllFileInfos request: %s\n", request.toString().c_str()); SQLite::Database database = FileDatabase::createDatabase(); std::vector fileInfos = FileDatabase::getAllFileInfos(database); nlohmann::json root(nlohmann::json::array()); for (const auto& info : fileInfos) { nlohmann::json file; file["filepath"] = info.filePath; file["timestamp"] = info.modifyTime; root.push_back(file); } return Response::success(root); } REGISTER_SERVICE_HANDLER("getAllFileInfos", handleGetAllFileInfos) kylin-ai-data-management-service-1.2.0.0/src/controller/handlerGetAllTags.cpp000066400000000000000000000021311520577635400270620ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include "fileDatabase.h" Response handleGetAllTags(const Request& request) { PRINT_DEBUG("handleGetAllTags request: %s\n", request.toString().c_str()); SQLite::Database database = FileDatabase::createDatabase(); std::vector allTags = FileDatabase::getAllTags(database); return Response::success(allTags); } REGISTER_SERVICE_HANDLER("getAllTags", handleGetAllTags) kylin-ai-data-management-service-1.2.0.0/src/controller/handlerGetFeatureStatus.cpp000066400000000000000000000020051520577635400303320ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include enum FeatureStatus : int { UNKNOWN = -1, AVAILABLE = 0, NOT_INSTALLED, NOT_SUPPORTED }; Response handleGetFeatureStatus(const Request& request) { // 模型默认集成 return Response::success(nlohmann::json{{"result", AVAILABLE}}); } REGISTER_SERVICE_HANDLER("getFeatureStatus", handleGetFeatureStatus) kylin-ai-data-management-service-1.2.0.0/src/controller/handlerGetSummarysOfFiles.cpp000066400000000000000000000043261520577635400306330ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include "fileDatabase.h" Response handleGetSummarysOfFiles(const Request& request) { PRINT_DEBUG("handleGetSummarysOfFiles request: %s\n", request.toString().c_str()); // 解析参数 nlohmann::json values = request.getValues(); if (!values.is_object() || values.empty()) { PRINT_ERROR("handleGetSummarysOfFiles request is not object or is empty.\n"); return Response(PARSE_PARAM_FAILED); } if (!values.contains("files") || !values["files"].is_array()) { PRINT_ERROR("handleGetSummarysOfFiles request json is error.\n"); return Response(PARSE_PARAM_FAILED); } std::unordered_set files; for (const auto& item : values["files"]) { if (!item.is_string()) { PRINT_ERROR("handleGetSummarysOfFiles request json format is error.\n"); continue; } files.insert(item.get()); } if (files.empty()) { PRINT_ERROR("handleGetSummarysOfFiles request format is wrong.\n"); return Response(PARSE_PARAM_FAILED); } SQLite::Database database = FileDatabase::createDatabase(); nlohmann::json root(nlohmann::json::array()); for (const auto& file : files) { std::string summary = FileDatabase::getFileSummary(database, file); nlohmann::json value; value["filepath"] = file; value["summary"] = summary; root.push_back(value); } return Response::success(root); } REGISTER_SERVICE_HANDLER("getSummarysOfFiles", handleGetSummarysOfFiles) kylin-ai-data-management-service-1.2.0.0/src/controller/handlerGetTagsOfFiles.cpp000066400000000000000000000043221520577635400277050ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include "fileDatabase.h" Response handleGetTagsOfFiles(const Request& request) { PRINT_DEBUG("handleGetTagsOfFiles request: %s\n", request.toString().c_str()); // 解析参数 nlohmann::json values = request.getValues(); if (!values.is_object() || values.empty()) { PRINT_ERROR("handleGetTagsOfFiles request is not array or is empty.\n"); return Response(PARSE_PARAM_FAILED); } if (!values["files"].is_array()) { PRINT_ERROR("handleGetTagsOfFiles request json format is error.\n"); return Response(PARSE_PARAM_FAILED); } std::vector files; for (const auto& item : values["files"]) { if (!item.is_string()) { PRINT_ERROR("handleGetTagsOfFiles request json files key is error.\n"); continue; } files.push_back(item.get()); } if (files.empty()) { PRINT_ERROR("handleGetTagsOfFiles request format is wrong.\n"); return Response(PARSE_PARAM_FAILED); } nlohmann::json output(nlohmann::json::array()); SQLite::Database database = FileDatabase::createDatabase(); for (const auto& file : files) { std::vector fileTags = FileDatabase::getFileTags(database, file); nlohmann::json tags = fileTags; nlohmann::json value; value["filepath"] = file; value["tags"] = tags; output.push_back(value); } return Response::success(output); } REGISTER_SERVICE_HANDLER("getTagsOfFiles", handleGetTagsOfFiles) kylin-ai-data-management-service-1.2.0.0/src/controller/handlerIsFileRelevantToDescription.cpp000066400000000000000000000046341520577635400324700ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include "fileInfoService.h" Response handleIsFileRelevantToDescription(const Request& request) { PRINT_DEBUG("handleIsFileRelevantToDescription request: %s\n", request.toString().c_str()); // 解析参数 nlohmann::json values = request.getValues(); if (!values.is_object() || values.empty()) { PRINT_ERROR("handleIsFileRelevantToDescription request is not array or is empty.\n"); return Response(PARSE_PARAM_FAILED); } if (!values.contains("file") || !values["file"].is_string() || !values.contains("description") || !values["description"].is_string()) { PRINT_ERROR("handleIsFileRelevantToDescription request json format is error.\n"); return Response(PARSE_PARAM_FAILED); } std::string file = request.getString("file"); std::string description = request.getString("description"); if (file.empty() || description.empty()) { PRINT_ERROR("handleIsFileRelevantToDescription request format is wrong.\n"); return Response(PARSE_PARAM_FAILED); } SimilaritySearchResult result; ERROR_CODE retCode = FileInfoService().searchFilesByText(description, result); if (retCode != SUCCESS) { PRINT_ERROR("handleIsFileRelevantToDescription search files by text failed, ret: %d.\n", retCode); return Response(SEARCH_FILE_INFO_FAILED); } auto it = std::find_if(result.begin(), result.end(), [&file](const std::pair& element) { return element.first == file; }); bool isRelevant = it != result.end(); return Response::success(nlohmann::json{{"is_relevant", isRelevant}}); } REGISTER_SERVICE_HANDLER("isFileRelevantToDescription", handleIsFileRelevantToDescription) kylin-ai-data-management-service-1.2.0.0/src/controller/handlerIsFileRelevantToTags.cpp000066400000000000000000000046231520577635400311010ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include #include "tagService.h" Response handleIsFileRelevantToTags(const Request& request) { PRINT_DEBUG("handleIsFileRelevantToTags request: %s\n", request.toString().c_str()); // 解析参数 nlohmann::json values = request.getValues(); if (!values.is_object() || values.empty()) { PRINT_ERROR("handleIsFileRelevantToTags request is not array or is empty.\n"); return Response(PARSE_PARAM_FAILED); } if (!values.contains("file") || !values["file"].is_string() || !values.contains("tags") || !values["tags"].is_array()) { PRINT_ERROR("handleIsFileRelevantToTags request json format is error.\n"); return Response(PARSE_PARAM_FAILED); } std::string file = request.getString("file"); std::vector tags; for (const auto& item : values["tags"]) { if (!item.is_string()) { PRINT_ERROR("handleIsFileRelevantToTags request json tags key is error.\n"); continue; } tags.push_back(item.get()); } if (file.empty() || tags.empty()) { PRINT_ERROR("handleIsFileRelevantToTags request format is wrong.\n"); return Response(PARSE_PARAM_FAILED); } std::vector similarTagFilePaths = TagService().searchSimilarFilePaths(tags); if (similarTagFilePaths.empty()) { return Response::success(nlohmann::json{{"is_relevant", false}}); } bool isRelevant = std::find(similarTagFilePaths.begin(), similarTagFilePaths.end(), file) != similarTagFilePaths.end(); return Response::success(nlohmann::json{{"is_relevant", isRelevant}}); } REGISTER_SERVICE_HANDLER("isFileRelevantToTags", handleIsFileRelevantToTags) kylin-ai-data-management-service-1.2.0.0/src/controller/handlerSearchFilesAboutTags.cpp000066400000000000000000000037431520577635400311070ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include "tagService.h" Response handleSearchFilesAboutTags(const Request& request) { PRINT_DEBUG("handleSearchFilesAboutTags request: %s\n", request.toString().c_str()); // 解析参数 nlohmann::json values = request.getValues(); if (!values.is_object() || values.empty()) { PRINT_ERROR("handleSearchFilesAboutTags request is not array or is empty.\n"); return Response(PARSE_PARAM_FAILED); } if (!values.contains("tags") || !values["tags"].is_array()) { PRINT_ERROR("handleSearchFilesAboutTags request json format is error.\n"); return Response(PARSE_PARAM_FAILED); } std::vector tags; for (const auto& item : values["tags"]) { if (!item.is_string()) { PRINT_ERROR("handleSearchFilesAboutTags request json tags key is error.\n"); continue; } tags.push_back(item.get()); } if (tags.empty()) { PRINT_ERROR("handleSearchFilesAboutTags request format is wrong.\n"); return Response(PARSE_PARAM_FAILED); } std::vector similarTagFilePaths = TagService().searchSimilarFilePaths(tags); return Response::success(similarTagFilePaths); } REGISTER_SERVICE_HANDLER("searchFilesAboutTags", handleSearchFilesAboutTags) kylin-ai-data-management-service-1.2.0.0/src/controller/handlerSearchSimilarTags.cpp000066400000000000000000000036731520577635400304540ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include "tagService.h" Response handleSearchSimilarTags(const Request& request) { PRINT_DEBUG("handleSearchSimilarTags request: %s\n", request.toString().c_str()); // 解析参数 nlohmann::json values = request.getValues(); if (!values.is_object() || values.empty()) { PRINT_ERROR("handleSearchSimilarTags request is not array or is empty.\n"); return Response(PARSE_PARAM_FAILED); } if (!values.contains("tags") || !values["tags"].is_array()) { PRINT_ERROR("handleSearchSimilarTags request json format is error.\n"); return Response(PARSE_PARAM_FAILED); } std::vector tags; for (const auto& item : values["tags"]) { if (!item.is_string()) { PRINT_ERROR("handleSearchSimilarTags request json tags key is error.\n"); continue; } tags.push_back(item.get()); } if (tags.empty()) { PRINT_ERROR("handleSearchSimilarTags request format is wrong.\n"); return Response(PARSE_PARAM_FAILED); } std::vector similarTags = TagService().searchSimilarTagNames(tags); return Response::success(similarTags); } REGISTER_SERVICE_HANDLER("searchSimilarTags", handleSearchSimilarTags) kylin-ai-data-management-service-1.2.0.0/src/controller/handlerSimilaritySearch.cpp000066400000000000000000000043421520577635400303550ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include "fileInfoService.h" Response handleSimilaritySearch(const Request& request) { PRINT_DEBUG("handleSimilaritySearch request: %s\n", request.toString().c_str()); // 解析参数 nlohmann::json values = request.getValues(); if (!values.is_object() || values.empty()) { PRINT_ERROR("handleSimilaritySearch request is not array or is empty.\n"); return Response(PARSE_PARAM_FAILED); } if (!values.contains("text") || !values["text"].is_string()) { PRINT_ERROR("handleSimilaritySearch request json format is error.\n"); return Response(PARSE_PARAM_FAILED); } std::string text = request.getString("text"); if (text.empty()) { PRINT_ERROR("handleSimilaritySearch request text is empty.\n"); return Response(PARSE_PARAM_FAILED); } // double threshold = request.getDouble("similarity-threshold"); 目前没用传过来的阈值 SimilaritySearchResult result; ERROR_CODE retCode = FileInfoService().searchFilesByText(text, result); if (retCode != SUCCESS) { PRINT_ERROR("handleSimilaritySearch search files by text failed, ret: %d.\n", retCode); return Response(SEARCH_FILE_INFO_FAILED); } nlohmann::json root(nlohmann::json::array()); for (const auto& info : result) { nlohmann::json file; file["filepath"] = info.first; file["similarity"] = info.second; root.push_back(file); } return Response::success(root); } REGISTER_SERVICE_HANDLER("similaritySearch", handleSimilaritySearch) kylin-ai-data-management-service-1.2.0.0/src/controller/handlerUpdateFilesContentData.cpp000066400000000000000000000076711520577635400314430ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include #include "fileInfoService.h" #include "imageService.h" #include "tagService.h" namespace { bool isSupportedImageFormat(const std::string& format) { const std::vector supportFormat = {"png", "jpg", "jpeg", "jpe", "bmp", "dib"}; return std::find(supportFormat.begin(), supportFormat.end(), format) != supportFormat.end(); } bool isSupportedTextFormat(const std::string& format) { const std::vector supportFormat = {"txt", "pdf", "docx", "pptx", "xlsx", "xls", "xlsd", "html", "xml", "md", "json"}; return std::find(supportFormat.begin(), supportFormat.end(), format) != supportFormat.end(); } void parseFilePaths(const nlohmann::json& values, std::unordered_set& textFilePaths, std::unordered_set& imgFilePaths) { for (const auto& item : values) { if (!item.is_object() || !item.contains("filepath") || !item["filepath"].is_string() || !item.contains("fileformat") || !item["fileformat"].is_string()) { PRINT_ERROR("handleUpdateFilesContentData request json is error.\n"); continue; } auto filePath = item["filepath"].get(); if (isSupportedTextFormat(item["fileformat"].get())) { textFilePaths.insert(filePath); } else if (isSupportedImageFormat(item["fileformat"].get())) { imgFilePaths.insert(filePath); } else { PRINT_ERROR("handleUpdateFilesContentData unknown type: %s.\n", item["fileformat"].get().c_str()); continue; } } } } // namespace Response handleUpdateFilesContentData(const Request& request) { PRINT_DEBUG("handleUpdateFilesContentData request: %s\n", request.toString().c_str()); // 解析参数 nlohmann::json values = request.getValues(); if (!values.is_array() || values.empty()) { PRINT_ERROR("handleUpdateFilesContentData request is not array or is empty.\n"); return Response(PARSE_PARAM_FAILED); } std::unordered_set textFilePaths; // 需要添加的文本文件 std::unordered_set imgFilePaths; // 需要添加的图片 parseFilePaths(values, textFilePaths, imgFilePaths); if (!textFilePaths.empty()) { std::unordered_set newFilePaths; std::vector updateFileInfos; FileInfoService().classifyFile(textFilePaths, newFilePaths, updateFileInfos); if (!newFilePaths.empty()) { FileInfoService().insertTask(newFilePaths); } if (!updateFileInfos.empty()) { std::unordered_set updateIds = FileInfoService().updateTask(updateFileInfos); if (!updateIds.empty()) { // 需要更新的文件 先删掉,等待索引重建 FileInfoService::deleteTextFileVectorInfo(updateIds); TagService().deleteFileTags(updateIds); } } } if (!imgFilePaths.empty()) { ImageService().embedding(imgFilePaths); } return Response::success(); } REGISTER_SERVICE_HANDLER("updateFilesContentData", handleUpdateFilesContentData) kylin-ai-data-management-service-1.2.0.0/src/controller/handlerUpdateFilesName.cpp000066400000000000000000000022261520577635400301060ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include /** * 使用场景:当文件移动时,无需修改向量数据库内容,只需修改SQLITE数据库中的文件路径即可 * 上游业务识别不出来移动,暂不实现 * @param request * @return */ Response handleUpdateFilesName(const Request& request) { PRINT_DEBUG("handleUpdateFilesName request: %s\n", request.toString().c_str()); return Response::success(); } REGISTER_SERVICE_HANDLER("updateFilesName", handleUpdateFilesName) kylin-ai-data-management-service-1.2.0.0/src/dao/000077500000000000000000000000001520577635400214145ustar00rootroot00000000000000kylin-ai-data-management-service-1.2.0.0/src/dao/dataManagementVectorDatabase.cpp000066400000000000000000000133771520577635400276510ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include "dataManagementVectorDatabase.h" #include #include DataManagementVectorDatabase::DataManagementVectorDatabase(const std::string& collectionName, int dimension) : collectionName_(collectionName), dimension_(dimension) { if (!scene::VectorDatabase::getInstance().hasCollection(collectionName_)) { PRINT_DEBUG("DataManagementVectorDatabase create collection: %s.\n", collectionName_.c_str()); bool ret = createCollection(); if (!ret) { PRINT_ERROR("createCollection %s failed.\n", collectionName_.c_str()); return; } } } bool DataManagementVectorDatabase::insertVectorData(const std::vector& vectorInfos) { std::vector idVec; idVec.reserve(vectorInfos.size()); std::vector> floatVec; floatVec.reserve(vectorInfos.size()); std::vector metaVec; metaVec.reserve(vectorInfos.size()); for (const auto& info : vectorInfos) { idVec.emplace_back(info.id); floatVec.emplace_back(info.embedding); metaVec.emplace_back(info.metadata); } std::vector data{ std::make_shared(DEFAULT_ID_FIELD_NAME, idVec), std::make_shared(DEFAULT_VECTOR_FIELD_NAME, floatVec), std::make_shared(DYNAMIC_FIELD_NAME, metaVec)}; VectorDB::DmlResults results; int ret = scene::VectorDatabase::getInstance().insert(collectionName_, data, results); PRINT_DEBUG("%s vector insert rows: %zu\n", collectionName_.c_str(), results.IdArray().StrIDArray().size()); return ret; } bool DataManagementVectorDatabase::deleteVectorData(const std::string& expression) { VectorDB::DmlResults results; bool ret = scene::VectorDatabase::getInstance().deleteData(collectionName_, expression, results); if (!ret) { PRINT_ERROR("%s delete vector db failed. expression: %s.\n", collectionName_.c_str(), expression.c_str()); return false; } PRINT_DEBUG("%s vector delete result: %zu\n", collectionName_.c_str(), results.IdArray().StrIDArray().size()); return true; } SimilaritySearchMetadataResult DataManagementVectorDatabase::searchMetadata(const std::vector& embedding, int topK, float threshold) { VectorDB::SearchArguments arguments(collectionName_, topK); arguments.AddOutputField(DYNAMIC_FIELD_NAME); // 查询只输出 metadata 列 arguments.SetGuaranteeTimestamp(VectorDB::GuaranteeStrongTs()); arguments.AddTargetVector(DEFAULT_VECTOR_FIELD_NAME, embedding); VectorDB::SearchResults results; bool ret = scene::VectorDatabase::getInstance().search(arguments, results); if (!ret) { PRINT_ERROR("SearchMetadata %s failed. embedding size: %zu\n", collectionName_.c_str(), embedding.size()); return SimilaritySearchMetadataResult(); } SimilaritySearchMetadataResult output; for (const auto& result : results.Results()) { auto& ids = result.Ids().StrIDArray(); if (ids.empty()) { PRINT_INFO("SearchMetadata %s result is empty.\n", collectionName_.c_str()); continue; } auto similarity = result.Scores(); if (ids.size() != similarity.size()) { PRINT_ERROR("Illegal %s result! %zu vs %zu\n", collectionName_.c_str(), ids.size(), similarity.size()); continue; } auto dynamicField = result.OutputField(DYNAMIC_FIELD_NAME); VectorDB::JsonFieldDataPtr dynamicFieldPtr = std::static_pointer_cast(dynamicField); auto& dynamicFieldData = dynamicFieldPtr->Data(); for (size_t i = 0; i < ids.size(); ++i) { // 根据相似度阈值筛选结果,数据库查询结果是按照相似度倒排的,如果出现相似度小于阈值的,后面的结果都比阈值小 if (similarity[i] < threshold) { PRINT_INFO("SearchMetadata similarity %f less than threshold %f.\n", similarity[i], threshold); break; } PRINT_DEBUG("SearchMetadata id: %s, dynamic data: %s, similarity: %f.\n", ids[i].c_str(), dynamicFieldData[i].dump().c_str(), similarity[i]); output.emplace_back(MetadataResult{ids[i], similarity[i], dynamicFieldData[i]}); } } return output; } bool DataManagementVectorDatabase::createCollection() { VectorDB::CollectionSchema schema(collectionName_, collectionName_, true); schema.AddField( VectorDB::FieldSchema(DEFAULT_ID_FIELD_NAME, VectorDB::DataType::VARCHAR, "id", true).WithMaxLength(50)); schema.AddField(VectorDB::FieldSchema(DEFAULT_VECTOR_FIELD_NAME, VectorDB::DataType::FLOAT_VECTOR, "vector") .WithDimension(dimension_)); // create index VectorDB::IndexDesc desc(DEFAULT_VECTOR_FIELD_NAME, collectionName_ + "_vector_index", 0); return scene::VectorDatabase::getInstance().createCollection(schema, desc); } kylin-ai-data-management-service-1.2.0.0/src/dao/dataManagementVectorDatabase.h000066400000000000000000000033111520577635400273010ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma once #include #include "constant.h" typedef struct metadataResult { std::string id; float similarity; nlohmann::json metadata; } MetadataResult; using SimilaritySearchMetadataResult = std::vector; typedef struct dataManagementVectorInfo { std::string id; std::vector embedding; nlohmann::json metadata; } DataManagementVectorInfo; class DataManagementVectorDatabase { public: explicit DataManagementVectorDatabase(const std::string& collectionName, int dimension = TEXT_DIMENSION); bool insertVectorData(const std::vector& vectorInfos); bool deleteVectorData(const std::string& expression); SimilaritySearchMetadataResult searchMetadata(const std::vector& embedding, int topK = 10, float threshold = 0.0); ~DataManagementVectorDatabase() = default; private: bool createCollection(); private: std::string collectionName_; int dimension_; }; kylin-ai-data-management-service-1.2.0.0/src/dao/documentParse.cpp000066400000000000000000000077651520577635400247500ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include "documentParse.h" #include #include #include #include "constant.h" const char* DOCUMENT_CHUNK_TEXT_TYPE = "TextChunker"; const char* DOCUMENT_CHUNK_SIZE = "chunk_size"; const char* DOCUMENT_RESPONSE_DATA = "responseData"; const int DOCUMENT_CHUNK_LIMIT = 90; DocumentParse::DocumentParse() { client_ = std::make_shared(); if (client_ == nullptr) { PRINT_ERROR("Failed to create DocumentClient instance."); } } nlohmann::json DocumentParse::syncChunkDocument(const std::vector& filePaths, int32_t size) { std::vector chunkTypes(filePaths.size(), DOCUMENT_CHUNK_TEXT_TYPE); std::vector chunkSizes(filePaths.size(), size); auto result = client_->SyncChunkDocument(filePaths, chunkTypes, chunkSizes); // grpc 通信异常 if (result.errorCode != 0) { PRINT_ERROR("DocumentClient connect failed, err: %d, msg: %s.\n", result.errorCode, result.errorMsg.c_str()); return nlohmann::json::array(); } nlohmann::json root = nlohmann::json::parse(result.data); if (root.empty() || !root.contains(DOCUMENT_RESPONSE_DATA) || !root[DOCUMENT_RESPONSE_DATA].is_array() || root[DOCUMENT_RESPONSE_DATA].empty()) { PRINT_ERROR("Failed to parse DocumentClient result: %s\n", result.data.c_str()); return nlohmann::json::array(); } nlohmann::json& data = root[DOCUMENT_RESPONSE_DATA]; // 过滤掉 没有 filePath 或 chunks 的结果 data.erase(std::remove_if( data.begin(), data.end(), [](const nlohmann::json& value) { return !value.contains(FILEPATH) || !value.contains(CHUNKS); }), data.end()); for (auto& item : data) { if (item[CHUNKS].size() > DOCUMENT_CHUNK_LIMIT) { auto value = item[CHUNKS].get>(); auto res = divideChunks(value); item[CHUNKS] = res; } } return data; } nlohmann::json DocumentParse::syncChunkDocument(const std::vector& filePaths) { return syncChunkDocument(filePaths, Configuration::getInt(DOCUMENT_CHUNK_SIZE, 512)); } std::vector DocumentParse::divideChunks(const std::vector& chunks) { int n = chunks.size(); // 当DOCUMENT_CHUNK_LIMIT长度大于90时,采取分割方案:三个分块,第一个分块取前30,后两个分块随机取30 int part1_len = n / 3; int part2_len = n / 3; int part3_len = n - part1_len - part2_len; // 划分三个区段 std::vector part1(chunks.begin(), chunks.begin() + part1_len); std::vector part2(chunks.begin() + part1_len, chunks.begin() + part1_len + part2_len); std::vector part3(chunks.begin() + part1_len + part2_len, chunks.end()); // 随机数生成器 std::random_device rd; std::mt19937 rng(rd()); // 打乱后两个分块 shuffle(part2.begin(), part2.end(), rng); shuffle(part3.begin(), part3.end(), rng); // 合并结果(确保每个区段取30个) std::vector result; result.insert(result.end(), part1.begin(), part1.begin() + 30); result.insert(result.end(), part2.begin(), part2.begin() + 30); result.insert(result.end(), part3.begin(), part3.begin() + 30); return result; } kylin-ai-data-management-service-1.2.0.0/src/dao/documentParse.h000066400000000000000000000023041520577635400243750ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma once #include #include class DocumentParse { public: DocumentParse(); nlohmann::json syncChunkDocument(const std::vector& filePaths); nlohmann::json syncChunkDocument(const std::vector& filePaths, int32_t size); ~DocumentParse() = default; private: std::vector divideChunks(const std::vector& chunks); private: std::shared_ptr client_; }; kylin-ai-data-management-service-1.2.0.0/src/dao/embedding.cpp000066400000000000000000000067211520577635400240440ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include "embedding.h" #include #include #include Embedding::Embedding(DataManagementEmbeddingDataType dataType, scene::TaskPriority priority) : dataType_(dataType), priority_(priority) { switch (dataType_) { case DataManagementEmbeddingDataType::Text: if (!scene::TextEmbeddingProcessor::getInstance().init()) { PRINT_ERROR("TextEmbeddingProcessor init failed\n"); inited_ = false; return; } break; case DataManagementEmbeddingDataType::ImageFlie: case DataManagementEmbeddingDataType::ImageBase64: case DataManagementEmbeddingDataType::ImageText: if (!scene::ImageEmbeddingProcessor::getInstance().init()) { PRINT_ERROR("ImageEmbeddingProcessor init failed.\n"); inited_ = false; return; } break; default: PRINT_ERROR("Embedding input unknown data type: %d\n", dataType_); inited_ = false; return; } inited_ = true; } std::vector Embedding::getEmbedding(const std::string& data) { if (!inited_) { PRINT_ERROR("EmbeddingProcessor not inited.\n"); return std::vector(); } if (data.empty()) { PRINT_ERROR("EmbeddingProcessor input data is empty.\n"); return std::vector(); } scene::TaskID taskID; std::vector ret; switch (dataType_) { case DataManagementEmbeddingDataType::Text: taskID = scene::TextEmbeddingProcessor::getInstance().addTask(data, scene::EmbeddingDataType::Text, priority_); ret = scene::TextEmbeddingProcessor::getInstance().getResult(taskID); break; case DataManagementEmbeddingDataType::ImageFlie: case DataManagementEmbeddingDataType::ImageBase64: taskID = scene::ImageEmbeddingProcessor::getInstance().addTask(data, scene::EmbeddingDataType::ImageFlie, priority_); ret = scene::ImageEmbeddingProcessor::getInstance().getResult(taskID); break; case DataManagementEmbeddingDataType::ImageText: taskID = scene::ImageEmbeddingProcessor::getInstance().addTask(data, scene::EmbeddingDataType::Text, priority_); ret = scene::ImageEmbeddingProcessor::getInstance().getResult(taskID); break; default: PRINT_ERROR("Embedding input unknown data type: %d\n", dataType_); break; } if (ret.empty()) { PRINT_ERROR("Embedding result is empty, data: %s.\n", data.c_str()); } return ret; } kylin-ai-data-management-service-1.2.0.0/src/dao/embedding.h000066400000000000000000000022571520577635400235110ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma onec #include #include #include enum class DataManagementEmbeddingDataType { Text = 1, ImageFlie, ImageBase64, ImageText }; class Embedding { public: Embedding(DataManagementEmbeddingDataType dataType, scene::TaskPriority priority); std::vector getEmbedding(const std::string& data); ~Embedding() = default; private: DataManagementEmbeddingDataType dataType_; scene::TaskPriority priority_; bool inited_; }; kylin-ai-data-management-service-1.2.0.0/src/dao/fileDatabase.cpp000066400000000000000000000435551520577635400245000ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include "fileDatabase.h" #include #include #include std::string FileDatabase::getDbFilePath() { std::string homePath = std::getenv("HOME") ? std::string(std::getenv("HOME")) : ""; std::string databasePath = homePath + "/.local/kylin-ai-business-framework/datamanagement/database/"; if (!std::filesystem::exists(databasePath)) { // 创建目录 bool ret = std::filesystem::create_directories(databasePath); if (!ret) { PRINT_ERROR("Create db path failed.\n"); return ""; } PRINT_INFO("Create db path success.\n"); } return databasePath + "fileinfo.db"; } void FileDatabase::createFileInfoTable(SQLite::Database &database) { // 创建Files表 std::string createFileInfoTableSQL = R"( CREATE TABLE IF NOT EXISTS t_file_info ( file_id INTEGER PRIMARY KEY AUTOINCREMENT, enable INTEGER NOT NULL DEFAULT 0, file_path TEXT NOT NULL UNIQUE, file_summary TEXT NOT NULL DEFAULT '', file_modify_time TEXT NOT NULL DEFAULT '0000-00-00 00:00:00', create_time TEXT NOT NULL DEFAULT (datetime('now', 'localtime')), last_modify_time TEXT NOT NULL DEFAULT (datetime('now', 'localtime'))))"; // 创建Tags表 std::string createTagsTableSQL = R"( CREATE TABLE IF NOT EXISTS t_tag_info ( tag_id INTEGER PRIMARY KEY AUTOINCREMENT, tag_name TEXT NOT NULL UNIQUE))"; // 创建FileTags表 std::string createFileTagsTableSQL = R"( CREATE TABLE IF NOT EXISTS t_file_tag_info ( file_id INTEGER, tag_id INTEGER, PRIMARY KEY (file_id, tag_id)))"; if (!database.tableExists("t_file_info")) { PRINT_DEBUG("Create table t_file_info, SQL:%s.\n", createFileInfoTableSQL.c_str()); database.exec(createFileInfoTableSQL); } if (!database.tableExists("t_tag_info")) { PRINT_DEBUG("Create table t_tag_info, SQL:%s.\n", createTagsTableSQL.c_str()); database.exec(createTagsTableSQL); } if (!database.tableExists("t_file_tag_info")) { PRINT_DEBUG("Create table t_file_tag_info, SQL:%s.\n", createFileTagsTableSQL.c_str()); database.exec(createFileTagsTableSQL); } std::string createTaskTableSQL = R"( CREATE TABLE IF NOT EXISTS t_ai_index_task ( id INTEGER PRIMARY KEY AUTOINCREMENT, file_id INTEGER NOT NULL, file_path TEXT NOT NULL, state INTEGER NOT NULL DEFAULT 0, type INTEGER NOT NULL, create_time TEXT NOT NULL DEFAULT (datetime('now', 'localtime')), start_time TEXT NOT NULL DEFAULT '0000-00-00 00:00:00', end_time TEXT NOT NULL DEFAULT '0000-00-00 00:00:00', retry_num INTEGER NOT NULL DEFAULT 0, reserve_int INTEGER NOT NULL DEFAULT 0, reserve_char TEXT NOT NULL DEFAULT '', UNIQUE (file_id, type), UNIQUE (file_path, type) ))"; if (!database.tableExists("t_ai_index_task")) { PRINT_DEBUG("Create table t_ai_index_task, SQL:%s.\n", createTaskTableSQL.c_str()); database.exec(createTaskTableSQL); } } SQLite::Database FileDatabase::createDatabase() { SQLite::Database db(getDbFilePath(), SQLite::OPEN_READWRITE | SQLite::OPEN_CREATE, 300000); createFileInfoTable(db); return db; } std::vector FileDatabase::getMultiFileId(SQLite::Database &database, const std::string &filepath) { std::vector out; SQLite::Statement query(database, "SELECT file_id FROM t_file_info WHERE file_path LIKE ?"); query.bind(1, filepath + "%"); while (query.executeStep()) { out.emplace_back(query.getColumn(0)); } PRINT_DEBUG("SQL: %s, result size: %zu.\n", query.getExpandedSQL().c_str(), out.size()); return out; } bool FileDatabase::deleteByFileIds(SQLite::Database &database, const std::string &expression) { SQLite::Transaction transaction(database); std::string sql = "DELETE FROM t_file_info WHERE file_id IN (" + expression + ")"; SQLite::Statement deleteFiles(database, sql); int rows = deleteFiles.exec(); PRINT_DEBUG("SQL: %s, rows: %d.\n", deleteFiles.getExpandedSQL().c_str(), rows); // 删除任务 sql = "DELETE FROM t_ai_index_task WHERE file_id IN (" + expression + ")"; SQLite::Statement statement(database, sql); rows = statement.exec(); transaction.commit(); PRINT_DEBUG("SQL: %s, rows: %d.\n", statement.getExpandedSQL().c_str(), rows); return true; } bool FileDatabase::getSingleFileInfo(SQLite::Database &database, const std::string &filepath, FileInfoDO &fileInfo) { std::string sql = "SELECT file_id,enable,file_path,file_summary,file_modify_time FROM t_file_info WHERE file_path = ?"; SQLite::Statement query(database, sql); query.bind(1, filepath); // 只会有一行数据 if (!query.executeStep()) { PRINT_DEBUG("%s, %s, no record.\n", query.getExpandedSQL().c_str(), filepath.c_str()); return false; } fileInfo.fileId = query.getColumn(0); fileInfo.enable = query.getColumn(1); fileInfo.filePath = query.getColumn(2).getString(); fileInfo.fileSummary = query.getColumn(3).getString(); fileInfo.modifyTime = query.getColumn(4).getString(); PRINT_DEBUG("SQL: %s, file id:%ld, enable:%d modify time:%s.\n", query.getExpandedSQL().c_str(), fileInfo.fileId, fileInfo.enable, fileInfo.modifyTime.c_str()); return true; } std::vector FileDatabase::getAllFileInfos(SQLite::Database &database) { std::vector out; SQLite::Statement query(database, "SELECT file_id,enable,file_path,file_summary,file_modify_time FROM t_file_info"); while (query.executeStep()) { out.emplace_back(FileInfoDO{query.getColumn(0), query.getColumn(1), query.getColumn(2), query.getColumn(3), query.getColumn(4)}); } PRINT_DEBUG("SQL: %s, result: %zu.\n", query.getExpandedSQL().c_str(), out.size()); return out; } std::string FileDatabase::getFilePathById(SQLite::Database &database, int64_t fileId) { std::string sql = "SELECT file_path FROM t_file_info WHERE file_id = ?"; SQLite::Statement query(database, sql); query.bind(1, fileId); // 只会有一行数据 if (!query.executeStep()) { PRINT_ERROR("SQL: %s, %ld, no record.\n", query.getExpandedSQL().c_str(), fileId); return std::string(); } std::string filepath = query.getColumn(0); PRINT_DEBUG("SQL: %s, result: %s.\n", query.getExpandedSQL().c_str(), filepath.c_str()); return filepath; } std::vector FileDatabase::getAllTags(SQLite::Database &database) { std::vector out; std::string sqlGetTags = "SELECT t_tag_info.tag_name, COUNT(t_file_info.file_id) AS file_count " "FROM t_tag_info " "JOIN t_file_tag_info ON t_tag_info.tag_id = t_file_tag_info.tag_id " "JOIN t_file_info ON t_file_tag_info.file_id = t_file_info.file_id " "GROUP BY t_tag_info.tag_name " "ORDER BY file_count DESC"; SQLite::Statement query(database, sqlGetTags); while (query.executeStep()) { out.emplace_back(query.getColumn(0)); } PRINT_DEBUG("SQL: %s, result: %zu.\n", query.getExpandedSQL().c_str(), out.size()); return out; } std::vector FileDatabase::getFileTags(SQLite::Database &database, const std::string &filepath) { std::vector tags; std::string sqlGetTags = "SELECT tag_name " "FROM t_tag_info " "JOIN t_file_tag_info ON t_tag_info.tag_id = t_file_tag_info.tag_id " "JOIN t_file_info ON t_file_tag_info.file_id = t_file_info.file_id " "WHERE t_file_info.file_path = ?"; SQLite::Statement query(database, sqlGetTags); query.bind(1, filepath); while (query.executeStep()) { tags.push_back(query.getColumn(0)); } PRINT_DEBUG("SQL: %s, result: %zu.\n", query.getExpandedSQL().c_str(), tags.size()); return tags; } std::vector FileDatabase::getTagNamesByTagIds(SQLite::Database &database, const std::unordered_set &tagIds) { std::string expression = Utils::joinIds(tagIds); std::string sql = "SELECT tag_name FROM t_tag_info WHERE tag_id IN (" + expression + ")"; SQLite::Statement query(database, sql); std::vector result; while (query.executeStep()) { result.push_back(query.getColumn(0)); } PRINT_DEBUG("SQL: %s, result size: %zu.\n", query.getExpandedSQL().c_str(), result.size()); return result; } std::vector FileDatabase::getFilePathsByTagIds(SQLite::Database &database, const std::unordered_set &tagIds) { std::string expression = Utils::joinIds(tagIds); std::string sql = "SELECT t_file_info.file_path " "FROM t_file_info " "JOIN t_file_tag_info ON t_file_info.file_id = t_file_tag_info.file_id " "JOIN t_tag_info ON t_file_tag_info.tag_id = t_tag_info.tag_id " "WHERE t_tag_info.tag_id IN (" + expression + ")"; SQLite::Statement query(database, sql); std::vector filePaths; while (query.executeStep()) { filePaths.push_back(query.getColumn(0)); } PRINT_DEBUG("SQL: %s, result size: %zu.\n", query.getExpandedSQL().c_str(), filePaths.size()); return filePaths; } std::string FileDatabase::getFileSummary(SQLite::Database &database, const std::string &filepath) { std::string sql = "SELECT file_summary FROM t_file_info WHERE file_path = ?"; SQLite::Statement query(database, sql); query.bind(1, filepath); // 只会有一行数据 if (!query.executeStep()) { PRINT_WARN("SQL: %s, %s, no record.\n", query.getExpandedSQL().c_str(), filepath.c_str()); return std::string(); } std::string summary = query.getColumn(0); PRINT_DEBUG("SQL: %s, result: %s.\n", query.getExpandedSQL().c_str(), summary.c_str()); return summary; } bool FileDatabase::getTagIdByTagName(SQLite::Database &database, const std::string &tag, int64_t &tagId) { std::string sql = "SELECT tag_id FROM t_tag_info WHERE tag_name = ?"; SQLite::Statement query(database, sql); query.bind(1, tag); // 只会有一行数据 if (!query.executeStep()) { PRINT_WARN("SQL: %s, %s, no record.\n", query.getExpandedSQL().c_str(), tag.c_str()); return false; } tagId = query.getColumn(0); PRINT_DEBUG("SQL: %s, result: %ld.\n", query.getExpandedSQL().c_str(), tagId); return true; } std::vector FileDatabase::getTagIdsByFileId(SQLite::Database &database, int64_t fileId) { std::string sql = "SELECT tag_id FROM t_file_tag_info WHERE file_id = ?"; SQLite::Statement query(database, sql); query.bind(1, fileId); std::vector tagIds; while (query.executeStep()) { tagIds.push_back(query.getColumn(0)); } PRINT_DEBUG("SQL: %s, result: %s.\n", query.getExpandedSQL().c_str(), vectorToString(tagIds).c_str()); return tagIds; } bool FileDatabase::isTagInUse(SQLite::Database &database, int64_t tagId) { std::string sql = "SELECT count(*) FROM t_file_tag_info WHERE tag_id = ?"; SQLite::Statement query(database, sql); query.bind(1, tagId); if (!query.executeStep()) { PRINT_ERROR("SQL: %s, no record.\n", query.getExpandedSQL().c_str()); return false; } int count = query.getColumn(0).getInt(); PRINT_DEBUG("SQL: %s, result: %d.\n", query.getExpandedSQL().c_str(), count); return count != 0; } // 调用函数已经开启了事务 bool FileDatabase::insertNewTagName(SQLite::Database &database, int64_t fileId, const std::string &name, int64_t &tagId) { std::string tagSQL = "INSERT INTO t_tag_info (tag_name) VALUES (?)"; SQLite::Statement tagStatement(database, tagSQL); tagStatement.bind(1, name); int rows = tagStatement.exec(); if (rows != 1) { PRINT_ERROR("SQL: %s, insert tag name failed, rows: %d.\n", tagStatement.getExpandedSQL().c_str(), rows); return false; } tagId = database.getLastInsertRowid(); PRINT_DEBUG("SQL: %s, tagId: %ld.\n", tagStatement.getExpandedSQL().c_str(), tagId); std::string fileTagsSQL = "INSERT INTO t_file_tag_info (file_id, tag_id) VALUES (?, ?)"; SQLite::Statement fileTagStatement(database, fileTagsSQL); fileTagStatement.bind(1, fileId); fileTagStatement.bind(2, tagId); rows = fileTagStatement.exec(); if (rows != 1) { PRINT_ERROR("SQL: %s, insert file tag relation failed, rows: %d.\n", fileTagStatement.getExpandedSQL().c_str(), rows); return false; } PRINT_DEBUG("SQL: %s, success.\n", fileTagStatement.getExpandedSQL().c_str()); return true; } bool FileDatabase::deleteFileTagRelation(SQLite::Database &database, int64_t fileId, int64_t tagId) { std::string sql = "DELETE FROM t_file_tag_info WHERE file_id = ? AND tag_id = ?"; SQLite::Statement statement(database, sql); statement.bind(1, fileId); statement.bind(2, tagId); int rows = statement.exec(); PRINT_DEBUG("SQL: %s, success %d rows.\n", statement.getExpandedSQL().c_str(), rows); return true; } bool FileDatabase::insertFileTag(SQLite::Database &database, int64_t fileId, int64_t tagId) { std::string sql = "INSERT INTO t_file_tag_info VALUES(?, ?)"; SQLite::Statement statement(database, sql); statement.bind(1, fileId); statement.bind(2, tagId); int rows = statement.exec(); if (rows != 1) { PRINT_ERROR("SQL: %s, insert file tag relation failed, rows: %d.\n", statement.getExpandedSQL().c_str(), rows); return false; } PRINT_DEBUG("SQL: %s, success.\n", statement.getExpandedSQL().c_str()); return true; } bool FileDatabase::deleteTagInfo(SQLite::Database &database, int64_t tagId) { std::string sql = "DELETE FROM t_tag_info WHERE tag_id = ?"; SQLite::Statement statement(database, sql); statement.bind(1, tagId); int rows = statement.exec(); PRINT_DEBUG("SQL: %s, success %d rows.\n", statement.getExpandedSQL().c_str(), rows); return true; } bool FileDatabase::insertFileInfo(SQLite::Database &database, FileInfoDO &fileInfo) { std::string sql = "INSERT OR IGNORE INTO t_file_info(enable, file_path, file_modify_time) VALUES(?, ?, ?)"; SQLite::Statement statement(database, sql); statement.bind(1, fileInfo.enable); statement.bind(2, fileInfo.filePath); statement.bind(3, fileInfo.modifyTime); int rows = statement.exec(); if (rows != 1) { PRINT_ERROR("SQL: %s, insert file info failed, rows: %d.\n", statement.getExpandedSQL().c_str(), rows); return false; } // 回填 file_id fileInfo.fileId = database.getLastInsertRowid(); PRINT_DEBUG("SQL: %s, success, file_id = %ld.\n", statement.getExpandedSQL().c_str(), fileInfo.fileId); return true; } bool FileDatabase::updateFileInfo(SQLite::Database &database, const FileInfoDO &fileInfo) { std::string sql = "UPDATE t_file_info SET enable = ?, file_summary = ?, file_modify_time = ?, " "last_modify_time = datetime('now', 'localtime') WHERE file_id = ?"; SQLite::Statement statement(database, sql); statement.bind(1, fileInfo.enable); statement.bind(2, fileInfo.fileSummary); statement.bind(3, fileInfo.modifyTime); statement.bind(4, fileInfo.fileId); int rows = statement.exec(); PRINT_DEBUG("SQL: %s, success %d rows.\n", statement.getExpandedSQL().c_str(), rows); return true; } bool FileDatabase::insertTextTask(SQLite::Database &database, int64_t fileId, const std::string &filePath) { std::string sql = "INSERT OR IGNORE INTO t_ai_index_task(file_id, file_path, type) VALUES(?, ?, ?), (?, ?, ?), (?, ?, ?)"; SQLite::Statement statement(database, sql); statement.bind(1, fileId); statement.bind(2, filePath); statement.bind(3, 1); statement.bind(4, fileId); statement.bind(5, filePath); statement.bind(6, 2); statement.bind(7, fileId); statement.bind(8, filePath); statement.bind(9, 3); int rows = statement.exec(); if (rows != 3) { PRINT_ERROR("SQL: %s, insert text ai index task failed, rows: %d.\n", statement.getExpandedSQL().c_str(), rows); return false; } PRINT_DEBUG("SQL: %s, success %d rows.\n", statement.getExpandedSQL().c_str(), rows); return true; } bool FileDatabase::updateTextTask(SQLite::Database &database, int64_t fileId) { std::string sql = "UPDATE t_ai_index_task SET state = 0, start_time = '0000-00-00 00:00:00', end_time = '0000-00-00 00:00:00', " "retry_num = 0 WHERE file_id = ?"; SQLite::Statement statement(database, sql); statement.bind(1, fileId); int rows = statement.exec(); PRINT_DEBUG("SQL: %s, success %d rows.\n", statement.getExpandedSQL().c_str(), rows); return true; } std::string FileDatabase::vectorToString(const std::vector &input) { if (input.empty()) { return "[]"; } std::string out = "["; for (const auto &item : input) { out += (std::to_string(item) + ","); } out[out.length() - 1] = ']'; return out; } kylin-ai-data-management-service-1.2.0.0/src/dao/fileDatabase.h000066400000000000000000000066521520577635400241420ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma once #include #include #include #include typedef struct fileInfoDO { int64_t fileId; int enable; std::string filePath; std::string fileSummary; std::string modifyTime; } FileInfoDO; class FileDatabase { public: FileDatabase() = delete; static SQLite::Database createDatabase(); static void createFileInfoTable(SQLite::Database& database); static std::vector getMultiFileId(SQLite::Database& database, const std::string& filepath); static bool deleteByFileIds(SQLite::Database& database, const std::string& expression); static bool getSingleFileInfo(SQLite::Database& database, const std::string& filepath, FileInfoDO& fileInfo); static std::vector getAllFileInfos(SQLite::Database& database); static std::string getFilePathById(SQLite::Database& database, int64_t fileId); static std::vector getAllTags(SQLite::Database& database); static std::vector getFileTags(SQLite::Database& database, const std::string& filepath); static std::vector getTagNamesByTagIds(SQLite::Database& database, const std::unordered_set& tagIds); static std::vector getFilePathsByTagIds(SQLite::Database& database, const std::unordered_set& tagIds); static std::string getFileSummary(SQLite::Database& database, const std::string& filepath); static bool getTagIdByTagName(SQLite::Database& database, const std::string& tag, int64_t& tagId); static std::vector getTagIdsByFileId(SQLite::Database& database, int64_t fileId); static bool isTagInUse(SQLite::Database& database, int64_t tagId); static bool insertNewTagName(SQLite::Database& database, int64_t fileId, const std::string& name, int64_t& tagId); static bool deleteFileTagRelation(SQLite::Database& database, int64_t fileId, int64_t tagId); static bool insertFileTag(SQLite::Database& database, int64_t fileId, int64_t tagId); static bool deleteTagInfo(SQLite::Database& database, int64_t tagId); static bool insertFileInfo(SQLite::Database& database, FileInfoDO& fileInfo); static bool updateFileInfo(SQLite::Database& database, const FileInfoDO& fileInfo); static bool insertTextTask(SQLite::Database& database, int64_t fileId, const std::string& filePath); static bool updateTextTask(SQLite::Database& database, int64_t fileId); ~FileDatabase() = default; static std::string vectorToString(const std::vector& input); static std::string getDbFilePath(); }; kylin-ai-data-management-service-1.2.0.0/src/main.cpp000066400000000000000000000017321520577635400223040ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include "aiIndex.h" int main(int argc, char *argv[]) { scene::Application::getInstance().setFunc( std::bind(&DataManagement::AIIndex::exec, &DataManagement::AIIndex::getInstance())); scene::Application::getInstance().exec(argc, argv); return 0; } kylin-ai-data-management-service-1.2.0.0/src/service/000077500000000000000000000000001520577635400223115ustar00rootroot00000000000000kylin-ai-data-management-service-1.2.0.0/src/service/constant.cpp000066400000000000000000000025501520577635400246500ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ const char* FILE_TAGS_COLLECTION_NAME = "files_tags"; const char* FILE_SUMMARY_COLLECTION_NAME = "files_summary"; // 摘要向量暂时未用 const char* VISION_FILE_CONTENT_COLLECTION_NAME = "vision_files_content_vector"; const char* TEXT_FILE_CONTENT_COLLECTION_NAME = "text_files_content_vector"; extern const int TEXT_DIMENSION = 768; // 文本模型维度 1024,目前先写死 extern const int IMAGE_DIMENSION = 1024; // 图片模型维度 512,目前先写死 extern const float TEXT_SEARCH_THRESHOLD = 0.75; extern const float VISION_SEARCH_THRESHOLD = 0.5; extern const float TAG_SEARCH_THRESHOLD = 0.7; const char* CHUNKS = "chunks"; const char* FILEPATH = "filePath"; kylin-ai-data-management-service-1.2.0.0/src/service/constant.h000066400000000000000000000037101520577635400243140ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma once #include // 使用vector为了保证顺序,pair key: filepath, value: similarity using SimilaritySearchResult = std::vector>; typedef struct dataManagementTagInfo { std::string tagName; int64_t tagId; } DataManagementTagInfo; typedef struct dataManagementTagFileInfo { std::string filePath; std::vector tags; std::string summary; } DataManagementTagFileInfo; typedef struct dataManagementFileInfo { std::string filePath; std::string modifyTime; std::vector tags; std::string summary; int64_t fileId; } DataManagementFileInfo; typedef struct dataManagementChunkFileInfo { int64_t fileId; std::string filePath; std::vector chunks; } DataManagementChunkFileInfo; extern const char* FILE_TAGS_COLLECTION_NAME; extern const char* FILE_SUMMARY_COLLECTION_NAME; extern const char* VISION_FILE_CONTENT_COLLECTION_NAME; extern const char* TEXT_FILE_CONTENT_COLLECTION_NAME; extern const int TEXT_DIMENSION; extern const int IMAGE_DIMENSION; extern const float TEXT_SEARCH_THRESHOLD; extern const float VISION_SEARCH_THRESHOLD; extern const float TAG_SEARCH_THRESHOLD; extern const char* CHUNKS; extern const char* FILEPATH; kylin-ai-data-management-service-1.2.0.0/src/service/fileInfoService.cpp000066400000000000000000000257051520577635400261020ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include "fileInfoService.h" #include #include #include #include #include #include #include "constant.h" #include "embedding.h" // 该方法用于去获取新增加文件或更新文件的信息 void FileInfoService::classifyFile(const std::unordered_set& filePaths, std::unordered_set& newFilePaths, std::vector& updateFileInfos) { if (filePaths.empty()) { PRINT_WARN("FileInfoService::classifyFile input is empty.\n"); return; } for (const auto& filePath : filePaths) { FileInfoDO fileInfo; // 查询文件信息 bool success = FileDatabase::getSingleFileInfo(db, filePath, fileInfo); if (!success) { // 全新的文件 newFilePaths.insert(filePath); continue; } // 需要更新的文件 updateFileInfos.push_back(fileInfo); } } bool FileInfoService::deleteTextFileVectorInfo(const std::unordered_set& fileIds) { if (fileIds.empty()) { PRINT_WARN("deleteTextFileVectorInfo input fileIds is empty, no need delete.\n"); return true; } std::string ids = Utils::joinIds(fileIds); if (ids.empty()) { PRINT_ERROR("deleteTextFileVectorInfo join ids result is empty, fileIds size: %zu.\n", fileIds.size()); return false; } std::string expression = "file_id in [" + ids + "]"; PRINT_DEBUG("expression: %s\n", expression.c_str()); DataManagementVectorDatabase textDB(TEXT_FILE_CONTENT_COLLECTION_NAME); bool ret = textDB.deleteVectorData(expression); if (!ret) { PRINT_ERROR("deleteTextFileVectorInfo delete text vector db failed.\n"); return false; } return true; } bool FileInfoService::deleteFiles(const std::unordered_set& fileIds) { if (fileIds.empty()) { PRINT_WARN("DeleteFiles input fileIds is empty, no need delete.\n"); return true; } // 先删除 向量数据库 中的数据,以免 file_id 对应不上 std::string ids = Utils::joinIds(fileIds); if (ids.empty()) { PRINT_ERROR("DeleteFiles join ids result is empty, fileIds size: %zu.\n", fileIds.size()); return false; } bool ret = deleteVectorData(ids); if (!ret) { PRINT_ERROR("DeleteFiles delete vector db data failed.\n"); return false; } // 删除 SQLITE 中的数据 ret = FileDatabase::deleteByFileIds(db, ids); if (!ret) { PRINT_ERROR("DeleteFiles delete sqlite db data failed.\n"); return false; } return true; } bool FileInfoService::deleteVectorData(const std::string& fileIds) { std::string expression = "file_id in [" + fileIds + "]"; PRINT_DEBUG("expression: %s\n", expression.c_str()); DataManagementVectorDatabase textDB(TEXT_FILE_CONTENT_COLLECTION_NAME); bool ret = textDB.deleteVectorData(expression); if (!ret) { PRINT_ERROR("handleDeleteFiles delete text vector db failed.\n"); return false; } DataManagementVectorDatabase imageDB(VISION_FILE_CONTENT_COLLECTION_NAME, IMAGE_DIMENSION); ret = imageDB.deleteVectorData(expression); if (!ret) { PRINT_ERROR("handleDeleteFiles delete image vector db failed.\n"); return false; } return true; } ERROR_CODE FileInfoService::searchFilesByText(const std::string& text, SimilaritySearchResult& result) { // 用于搜索的向量化请求需要设置为高优先级 Embedding textEmbedding(DataManagementEmbeddingDataType::Text, scene::TaskPriority::HIGH); Embedding imageTextEmbedding(DataManagementEmbeddingDataType::ImageText, scene::TaskPriority::HIGH); PRINT_DEBUG("start embedding.\n"); std::vector textSideEmbedding = textEmbedding.getEmbedding(text); if (textSideEmbedding.empty()) { PRINT_ERROR("SearchFilesByText get text embedding failed, text: %s.\n", text.c_str()); return EMBEDDING_FAILED; } std::vector imageSideEmbedding = imageTextEmbedding.getEmbedding(text); if (imageSideEmbedding.empty()) { PRINT_ERROR("SearchFilesByText get imagetext embedding failed, text: %s.\n", text.c_str()); return EMBEDDING_FAILED; } // 用向量去向量数据库里查 DataManagementVectorDatabase textVectorDb(TEXT_FILE_CONTENT_COLLECTION_NAME, TEXT_DIMENSION); DataManagementVectorDatabase imageVectorDb(VISION_FILE_CONTENT_COLLECTION_NAME, IMAGE_DIMENSION); PRINT_DEBUG("start to search vector db.\n"); SimilaritySearchMetadataResult textMetadata = textVectorDb.searchMetadata(textSideEmbedding, 50, TEXT_SEARCH_THRESHOLD); SimilaritySearchMetadataResult imagMetadata = imageVectorDb.searchMetadata(imageSideEmbedding, 50, VISION_SEARCH_THRESHOLD); if (textMetadata.empty() && imagMetadata.empty()) { PRINT_INFO("Search nothing by text: %s.\n", text.c_str()); result = SimilaritySearchResult(); return SUCCESS; } if (!textMetadata.empty()) { SimilaritySearchResult textSearchResult = searchResultConvert(textMetadata); result.insert(result.end(), textSearchResult.begin(), textSearchResult.end()); } if (!imagMetadata.empty()) { SimilaritySearchResult visionSearchResult = searchResultConvert(imagMetadata); result.insert(result.end(), visionSearchResult.begin(), visionSearchResult.end()); } return SUCCESS; } /** * 获取文件的最后修改时间 * @param filePath * @return */ std::string FileInfoService::getLastModifyTime(const std::string& filePath) { std::string defaultTime = "0000-00-00 00:00:00"; if (!std::filesystem::exists(filePath)) { PRINT_ERROR("File %s don't exist, can't get modify time.\n", filePath.c_str()); return defaultTime; } struct stat fileStat{}; if (stat(filePath.c_str(), &fileStat) != 0) { PRINT_ERROR("File %s can't get modify time.\n", filePath.c_str()); return defaultTime; } // 将时间戳转换为本地时间 std::time_t mtime = fileStat.st_mtime; std::tm* tm_local = localtime(&mtime); if (tm_local == nullptr) { PRINT_ERROR("File %s converting time to local time failed.\n", filePath.c_str()); return defaultTime; } // 定义输出格式 char buffer[32]; memset(buffer, 0, sizeof(buffer)); // 初始化buffer为全零,以防万一 if (strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", tm_local) == 0) { PRINT_ERROR("File %s formatting time failed.\n", filePath.c_str()); return defaultTime; } return buffer; } SimilaritySearchResult FileInfoService::searchResultConvert(const SimilaritySearchMetadataResult& metadataResult) { SimilaritySearchResult output; std::set exist; for (const auto& item : metadataResult) { PRINT_DEBUG("SearchResultConvert metadata: %s, similarity: %f, id: %s.\n", item.metadata.dump().c_str(), item.similarity, item.id.c_str()); int64_t fileId = item.metadata["file_id"]; // 由于文本文件是分割后存储,搜索可能会返回同一个文件中的多个段落,合并结果,只保留同一文件最高相似度的结果 // 相同文件,结果只返回相似度的最大值,查询结果是按照距离排序的,所以相同文件只存第一个即可 auto ret = exist.insert(fileId); if (!ret.second) { PRINT_DEBUG("SearchResultConvert file_id: %ld already add.\n", fileId); continue; } std::string filePath = FileDatabase::getFilePathById(db, fileId); if (filePath.empty()) { PRINT_ERROR("Illegal result! Cannot find file path, id: %ld\n", fileId); continue; } output.emplace_back(std::make_pair(filePath, item.similarity)); } return output; } void FileInfoService::insertTask(const std::unordered_set& filePaths) { if (filePaths.empty()) { PRINT_WARN("FileInfoService::insertTask input is empty.\n"); return; } for (const auto& filePath : filePaths) { SQLite::Transaction transaction(db); // 查询文件修改时间 std::string modifyTime = FileInfoService::getLastModifyTime(filePath); FileInfoDO fileInfo; fileInfo.enable = 0; fileInfo.filePath = filePath; fileInfo.modifyTime = modifyTime; bool success = FileDatabase::insertFileInfo(db, fileInfo); if (!success) { PRINT_ERROR("%s text insert file info failed.\n", filePath.c_str()); continue; } success = FileDatabase::insertTextTask(db, fileInfo.fileId, filePath); if (!success) { PRINT_ERROR("%s text insert ai index task failed.\n", filePath.c_str()); continue; } transaction.commit(); } } std::unordered_set FileInfoService::updateTask(std::vector& fileInfos) { if (fileInfos.empty()) { PRINT_WARN("FileInfoService::updateTask input is empty.\n"); return {}; } std::unordered_set updateIds; // 首先判断判断是否是真需要修改的文件 for (auto& fileInfo : fileInfos) { SQLite::Transaction transaction(db); std::string modifyTime = FileInfoService::getLastModifyTime(fileInfo.filePath); // 有记录,更新时间相同,直接跳过 if (modifyTime == fileInfo.modifyTime) { PRINT_INFO("%s modify time %s is same, skip this file.\n", fileInfo.filePath.c_str(), modifyTime.c_str()); continue; } fileInfo.enable = 0; fileInfo.fileSummary = ""; fileInfo.modifyTime = modifyTime; bool success = FileDatabase::updateFileInfo(db, fileInfo); if (!success) { PRINT_ERROR("%s text update file info failed.\n", fileInfo.filePath.c_str()); continue; } success = FileDatabase::updateTextTask(db, fileInfo.fileId); if (!success) { PRINT_ERROR("%s text update ai index task failed.\n", fileInfo.filePath.c_str()); continue; } transaction.commit(); // 获取更新文件id updateIds.insert(fileInfo.fileId); } return updateIds; } kylin-ai-data-management-service-1.2.0.0/src/service/fileInfoService.h000066400000000000000000000035701520577635400255430ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma once #include #include #include "dataManagementVectorDatabase.h" #include "fileDatabase.h" class FileInfoService { public: FileInfoService() : db(FileDatabase::createDatabase()) {} void classifyFile(const std::unordered_set& filePaths, std::unordered_set& newFilePaths, std::vector& updateFileInfos); static bool deleteTextFileVectorInfo(const std::unordered_set& fileIds); bool deleteFiles(const std::unordered_set& fileIds); ERROR_CODE searchFilesByText(const std::string& text, SimilaritySearchResult& result); static std::string getLastModifyTime(const std::string& filePath); void insertTask(const std::unordered_set& filePaths); std::unordered_set updateTask(std::vector& fileInfos); SQLite::Database getDatabase() { return std::move(db); } ~FileInfoService() = default; private: static bool deleteVectorData(const std::string& fileIds); SimilaritySearchResult searchResultConvert(const SimilaritySearchMetadataResult& metadataResult); private: SQLite::Database db; }; kylin-ai-data-management-service-1.2.0.0/src/service/imageService.cpp000066400000000000000000000126301520577635400254220ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include "imageService.h" #include #include #include "dataManagementVectorDatabase.h" #include "embedding.h" #include "fileInfoService.h" void ImageService::embedding(const std::unordered_set& imgFilePaths) { if (imgFilePaths.empty()) { PRINT_WARN("ImageService::embedding input is empty.\n"); return; } for (const auto& filePath : imgFilePaths) { // 查询文件信息,根据更新时间判断文件是否真的有修改 std::string modifyTime = FileInfoService::getLastModifyTime(filePath); FileInfoDO fileInfo; bool success = FileDatabase::getSingleFileInfo(db, filePath, fileInfo); if (success) { // 有记录,更新时间相同,直接跳过 if (fileInfo.enable && modifyTime == fileInfo.modifyTime) { PRINT_INFO("%s modify time %s is same, skip this file.\n", filePath.c_str(), modifyTime.c_str()); continue; } // 图片向量化 Embedding imageEmbedding(DataManagementEmbeddingDataType::ImageFlie, scene::TaskPriority::MEDIUM); std::vector embedding = imageEmbedding.getEmbedding(filePath); if (embedding.empty()) { PRINT_ERROR("%s image embedding result is empty.\n", filePath.c_str()); continue; } // 向量化任务比较耗时,避免长时间占用sqlite的写锁,向量化任务结束后,在开启事务,更新文件信息和向量数据库 SQLite::Transaction transaction(db); if (!updateImageFileInfo(fileInfo, modifyTime, embedding)) { PRINT_ERROR("%s update image file info failed.\n", filePath.c_str()); continue; } transaction.commit(); continue; } // 图片向量化 Embedding imageEmbedding(DataManagementEmbeddingDataType::ImageFlie, scene::TaskPriority::MEDIUM); std::vector embedding = imageEmbedding.getEmbedding(filePath); if (embedding.empty()) { PRINT_ERROR("%s image embedding result is empty.\n", filePath.c_str()); continue; } SQLite::Transaction transaction(db); fileInfo.filePath = filePath; if (!insertImageFileInfo(fileInfo, modifyTime, embedding)) { PRINT_ERROR("%s insert image file info failed.\n", filePath.c_str()); continue; } transaction.commit(); } } bool ImageService::insertImageFileInfo(FileInfoDO& fileInfo, const std::string& modifyTime, const std::vector& embedding) { // 全新的文件,新插入记录 fileInfo.enable = 1; fileInfo.modifyTime = modifyTime; bool success = FileDatabase::insertFileInfo(db, fileInfo); if (!success) { PRINT_ERROR("%s image insert file info failed.\n", fileInfo.filePath.c_str()); return false; } if (!updateVectorDatabaseData(fileInfo, embedding)) { PRINT_ERROR("%s image update vector data failed.\n", fileInfo.filePath.c_str()); return false; } return true; } bool ImageService::updateImageFileInfo(FileInfoDO& fileInfo, const std::string& modifyTime, const std::vector& embedding) { // 图片只需更新修改时间,无摘要信息 fileInfo.enable = 1; fileInfo.modifyTime = modifyTime; bool success = FileDatabase::updateFileInfo(db, fileInfo); if (!success) { PRINT_ERROR("%s image update file info failed.\n", fileInfo.filePath.c_str()); return false; } if (!updateVectorDatabaseData(fileInfo, embedding)) { PRINT_ERROR("%s image update vector data failed.\n", fileInfo.filePath.c_str()); return false; } return true; } bool ImageService::updateVectorDatabaseData(const FileInfoDO& fileInfo, const std::vector& embedding) { // 存入向量数据库,先删除再添加 DataManagementVectorDatabase imageDB(VISION_FILE_CONTENT_COLLECTION_NAME, IMAGE_DIMENSION); std::string expression = "file_id == " + std::to_string(fileInfo.fileId); bool success = imageDB.deleteVectorData(expression); if (!success) { PRINT_ERROR("%s delete image vector data failed.\n", fileInfo.filePath.c_str()); return false; } std::string uuid = Utils::generateUUID(); nlohmann::json metadata; metadata["file_id"] = fileInfo.fileId; success = imageDB.insertVectorData({DataManagementVectorInfo{uuid, embedding, metadata}}); if (!success) { PRINT_ERROR("%s insert image vector data failed.\n", fileInfo.filePath.c_str()); return false; } return true; }kylin-ai-data-management-service-1.2.0.0/src/service/imageService.h000066400000000000000000000025211520577635400250650ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma once #include #include #include "fileDatabase.h" class ImageService { public: ImageService() : db(FileDatabase::createDatabase()) {} void embedding(const std::unordered_set& imgFilePaths); ~ImageService() = default; private: bool insertImageFileInfo(FileInfoDO& fileInfo, const std::string& modifyTime, const std::vector& embedding); bool updateImageFileInfo(FileInfoDO& fileInfo, const std::string& modifyTime, const std::vector& embedding); bool updateVectorDatabaseData(const FileInfoDO& fileInfo,const std::vector& embedding); private: SQLite::Database db; }; kylin-ai-data-management-service-1.2.0.0/src/service/tagService.cpp000066400000000000000000000154351520577635400251210ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include "tagService.h" #include #include #include #include #include "dataManagementVectorDatabase.h" #include "embedding.h" std::unordered_set TagService::searchSimilarTagIdsByTagNames(const std::vector& tags) { std::unordered_set output; Embedding embedding(DataManagementEmbeddingDataType::Text, scene::TaskPriority::HIGH); DataManagementVectorDatabase vdb(FILE_TAGS_COLLECTION_NAME); for (const auto& tag : tags) { // 对 tag 内容进行向量化 std::vector embeddingResult = embedding.getEmbedding(tag); if (embeddingResult.empty()) { PRINT_ERROR("Text embedding result is empty.\n"); return std::unordered_set(); } // 根据向量值查询相似标签ID SimilaritySearchMetadataResult metadata = vdb.searchMetadata(embeddingResult, 10, TAG_SEARCH_THRESHOLD); if (metadata.empty()) { PRINT_WARN("SearchMetadata result is empty. embedding size: %zu\n", embeddingResult.size()); return std::unordered_set(); } for (const auto& item : metadata) { int64_t tagId = item.metadata["tag_id"]; output.insert(tagId); } } return output; } std::vector TagService::searchSimilarTagNames(const std::vector& tags) { std::unordered_set tagIds = searchSimilarTagIdsByTagNames(tags); if (tagIds.empty()) { PRINT_WARN("SearchSimilarTagNames search tag id is empty.\n"); return std::vector(); } std::vector tagNames = FileDatabase::getTagNamesByTagIds(db, tagIds); if (tagNames.empty()) { PRINT_WARN("SearchSimilarTagNames search tag name is empty.\n"); return std::vector(); } return tagNames; } std::vector TagService::searchSimilarFilePaths(const std::vector& tags) { std::unordered_set tagIds = searchSimilarTagIdsByTagNames(tags); if (tagIds.empty()) { PRINT_WARN("SearchSimilarFilePaths search tag id is empty.\n"); return std::vector(); } std::vector filePaths = FileDatabase::getFilePathsByTagIds(db, tagIds); if (filePaths.empty()) { PRINT_WARN("SearchSimilarFilePaths search file path is empty.\n"); return std::vector(); } return filePaths; } void TagService::deleteFileTags(int64_t fileId, const std::vector& tagIds) { for (const auto& tagId : tagIds) { SQLite::Transaction transaction(db); FileDatabase::deleteFileTagRelation(db, fileId, tagId); if (FileDatabase::isTagInUse(db, tagId)) { transaction.commit(); continue; } // 删除 tag 信息 DataManagementVectorDatabase vdb(FILE_TAGS_COLLECTION_NAME); std::string expression = "tag_id == " + std::to_string(tagId); vdb.deleteVectorData(expression); FileDatabase::deleteTagInfo(db, tagId); transaction.commit(); } } void TagService::deleteFileTags(const std::unordered_set& deletedFileIds) { if (deletedFileIds.empty()) { PRINT_WARN("TagService::deleteFileTags input is empty.\n"); return; } for (const auto& fileId : deletedFileIds) { std::vector tagIds = FileDatabase::getTagIdsByFileId(db, fileId); deleteFileTags(fileId, tagIds); } } void TagService::updateFileTags(int64_t fileId, const std::set& newTags) { std::vector newTagIds; std::vector oldTagIds = FileDatabase::getTagIdsByFileId(db, fileId); for (const auto& tagName : newTags) { SQLite::Transaction transaction(db); int64_t tagId; bool ret = FileDatabase::getTagIdByTagName(db, tagName, tagId); if (ret) { newTagIds.push_back(tagId); // 检查 tagId 和 fileId 的关系 if (std::find(oldTagIds.begin(), oldTagIds.end(), tagId) != oldTagIds.end()) { PRINT_DEBUG("updateFileTags tag name: %s already exist.\n", tagName.c_str()); } else { FileDatabase::insertFileTag(db, fileId, tagId); } transaction.commit(); continue; } // 完全没记录,需要存入新标签 bool success = FileDatabase::insertNewTagName(db, fileId, tagName, tagId); if (!success) { PRINT_ERROR("updateFileTags insert %s failed.\n", tagName.c_str()); continue; } Embedding textEmbedding(DataManagementEmbeddingDataType::Text, scene::TaskPriority::MEDIUM); std::vector embedding = textEmbedding.getEmbedding(tagName); if (embedding.empty()) { PRINT_ERROR("tag embedding result is empty. tag: %s\n", tagName.c_str()); continue; } nlohmann::json metadata; metadata["tag_id"] = tagId; std::string uuid = Utils::generateUUID(); PRINT_DEBUG("updateFileTags uuid: %s, metadata: %s.\n", uuid.c_str(), metadata.dump().c_str()); // 存入标签数据库 DataManagementVectorDatabase tagDB(FILE_TAGS_COLLECTION_NAME, TEXT_DIMENSION); success = tagDB.insertVectorData({DataManagementVectorInfo{uuid, embedding, metadata}}); if (!success) { PRINT_ERROR("updateFileTags insert into %s failed.\n", FILE_TAGS_COLLECTION_NAME); continue; } transaction.commit(); } // 比较是否有冗余的标签 std::vector difference; std::set_difference(oldTagIds.begin(), oldTagIds.end(), newTagIds.begin(), newTagIds.end(), std::back_inserter(difference)); PRINT_DEBUG("updateFileTags %s difference %s is %s.\n", FileDatabase::vectorToString(oldTagIds).c_str(), FileDatabase::vectorToString(newTagIds).c_str(), FileDatabase::vectorToString(difference).c_str()); if (!difference.empty()) { // 删除以前多余的标签 deleteFileTags(fileId, difference); } } kylin-ai-data-management-service-1.2.0.0/src/service/tagService.h000066400000000000000000000026771520577635400245720ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma once #include #include #include #include #include "fileDatabase.h" class TagService { public: TagService() : db(FileDatabase::createDatabase()) {} std::vector searchSimilarTagNames(const std::vector& tags); std::vector searchSimilarFilePaths(const std::vector& tags); void deleteFileTags(const std::unordered_set& deletedFileIds); void updateFileTags(int64_t fileId, const std::set& newTags); private: static std::unordered_set searchSimilarTagIdsByTagNames(const std::vector& tags); void deleteFileTags(int64_t fileId, const std::vector& tagIds); private: SQLite::Database db; }; kylin-ai-data-management-service-1.2.0.0/src/sqliteRecover/000077500000000000000000000000001520577635400235005ustar00rootroot00000000000000kylin-ai-data-management-service-1.2.0.0/src/sqliteRecover/dbdata.c000066400000000000000000000716321520577635400250740ustar00rootroot00000000000000/* ** 2019-04-17 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** This file contains an implementation of two eponymous virtual tables, ** "sqlite_dbdata" and "sqlite_dbptr". Both modules require that the ** "sqlite_dbpage" eponymous virtual table be available. ** ** SQLITE_DBDATA: ** sqlite_dbdata is used to extract data directly from a database b-tree ** page and its associated overflow pages, bypassing the b-tree layer. ** The table schema is equivalent to: ** ** CREATE TABLE sqlite_dbdata( ** pgno INTEGER, ** cell INTEGER, ** field INTEGER, ** value ANY, ** schema TEXT HIDDEN ** ); ** ** IMPORTANT: THE VIRTUAL TABLE SCHEMA ABOVE IS SUBJECT TO CHANGE. IN THE ** FUTURE NEW NON-HIDDEN COLUMNS MAY BE ADDED BETWEEN "value" AND ** "schema". ** ** Each page of the database is inspected. If it cannot be interpreted as ** a b-tree page, or if it is a b-tree page containing 0 entries, the ** sqlite_dbdata table contains no rows for that page. Otherwise, the ** table contains one row for each field in the record associated with ** each cell on the page. For intkey b-trees, the key value is stored in ** field -1. ** ** For example, for the database: ** ** CREATE TABLE t1(a, b); -- root page is page 2 ** INSERT INTO t1(rowid, a, b) VALUES(5, 'v', 'five'); ** INSERT INTO t1(rowid, a, b) VALUES(10, 'x', 'ten'); ** ** the sqlite_dbdata table contains, as well as from entries related to ** page 1, content equivalent to: ** ** INSERT INTO sqlite_dbdata(pgno, cell, field, value) VALUES ** (2, 0, -1, 5 ), ** (2, 0, 0, 'v' ), ** (2, 0, 1, 'five'), ** (2, 1, -1, 10 ), ** (2, 1, 0, 'x' ), ** (2, 1, 1, 'ten' ); ** ** If database corruption is encountered, this module does not report an ** error. Instead, it attempts to extract as much data as possible and ** ignores the corruption. ** ** SQLITE_DBPTR: ** The sqlite_dbptr table has the following schema: ** ** CREATE TABLE sqlite_dbptr( ** pgno INTEGER, ** child INTEGER, ** schema TEXT HIDDEN ** ); ** ** It contains one entry for each b-tree pointer between a parent and ** child page in the database. */ #if !defined(SQLITEINT_H) #include "sqlite3.h" typedef unsigned char u8; typedef unsigned int u32; #endif #include #include #ifndef SQLITE_OMIT_VIRTUALTABLE #define DBDATA_PADDING_BYTES 100 typedef struct DbdataTable DbdataTable; typedef struct DbdataCursor DbdataCursor; typedef struct DbdataBuffer DbdataBuffer; /* ** Buffer type. */ struct DbdataBuffer { u8 *aBuf; sqlite3_int64 nBuf; }; /* Cursor object */ struct DbdataCursor { sqlite3_vtab_cursor base; /* Base class. Must be first */ sqlite3_stmt *pStmt; /* For fetching database pages */ int iPgno; /* Current page number */ u8 *aPage; /* Buffer containing page */ int nPage; /* Size of aPage[] in bytes */ int nCell; /* Number of cells on aPage[] */ int iCell; /* Current cell number */ int bOnePage; /* True to stop after one page */ int szDb; sqlite3_int64 iRowid; /* Only for the sqlite_dbdata table */ DbdataBuffer rec; sqlite3_int64 nRec; /* Size of pRec[] in bytes */ sqlite3_int64 nHdr; /* Size of header in bytes */ int iField; /* Current field number */ u8 *pHdrPtr; u8 *pPtr; u32 enc; /* Text encoding */ sqlite3_int64 iIntkey; /* Integer key value */ }; /* Table object */ struct DbdataTable { sqlite3_vtab base; /* Base class. Must be first */ sqlite3 *db; /* The database connection */ sqlite3_stmt *pStmt; /* For fetching database pages */ int bPtr; /* True for sqlite3_dbptr table */ }; /* Column and schema definitions for sqlite_dbdata */ #define DBDATA_COLUMN_PGNO 0 #define DBDATA_COLUMN_CELL 1 #define DBDATA_COLUMN_FIELD 2 #define DBDATA_COLUMN_VALUE 3 #define DBDATA_COLUMN_SCHEMA 4 #define DBDATA_SCHEMA \ "CREATE TABLE x(" \ " pgno INTEGER," \ " cell INTEGER," \ " field INTEGER," \ " value ANY," \ " schema TEXT HIDDEN" \ ")" /* Column and schema definitions for sqlite_dbptr */ #define DBPTR_COLUMN_PGNO 0 #define DBPTR_COLUMN_CHILD 1 #define DBPTR_COLUMN_SCHEMA 2 #define DBPTR_SCHEMA \ "CREATE TABLE x(" \ " pgno INTEGER," \ " child INTEGER," \ " schema TEXT HIDDEN" \ ")" /* ** Ensure the buffer passed as the first argument is at least nMin bytes ** in size. If an error occurs while attempting to resize the buffer, ** SQLITE_NOMEM is returned. Otherwise, SQLITE_OK. */ static int dbdataBufferSize(DbdataBuffer *pBuf, sqlite3_int64 nMin){ if( nMin>pBuf->nBuf ){ sqlite3_int64 nNew = nMin+16384; u8 *aNew = (u8*)sqlite3_realloc64(pBuf->aBuf, nNew); if( aNew==0 ) return SQLITE_NOMEM; pBuf->aBuf = aNew; pBuf->nBuf = nNew; } return SQLITE_OK; } /* ** Release the allocation managed by buffer pBuf. */ static void dbdataBufferFree(DbdataBuffer *pBuf){ sqlite3_free(pBuf->aBuf); memset(pBuf, 0, sizeof(*pBuf)); } /* ** Connect to an sqlite_dbdata (pAux==0) or sqlite_dbptr (pAux!=0) virtual ** table. */ static int dbdataConnect( sqlite3 *db, void *pAux, int argc, const char *const*argv, sqlite3_vtab **ppVtab, char **pzErr ){ DbdataTable *pTab = 0; int rc = sqlite3_declare_vtab(db, pAux ? DBPTR_SCHEMA : DBDATA_SCHEMA); (void)argc; (void)argv; (void)pzErr; sqlite3_vtab_config(db, SQLITE_VTAB_USES_ALL_SCHEMAS); if( rc==SQLITE_OK ){ pTab = (DbdataTable*)sqlite3_malloc64(sizeof(DbdataTable)); if( pTab==0 ){ rc = SQLITE_NOMEM; }else{ memset(pTab, 0, sizeof(DbdataTable)); pTab->db = db; pTab->bPtr = (pAux!=0); } } *ppVtab = (sqlite3_vtab*)pTab; return rc; } /* ** Disconnect from or destroy a sqlite_dbdata or sqlite_dbptr virtual table. */ static int dbdataDisconnect(sqlite3_vtab *pVtab){ DbdataTable *pTab = (DbdataTable*)pVtab; if( pTab ){ sqlite3_finalize(pTab->pStmt); sqlite3_free(pVtab); } return SQLITE_OK; } /* ** This function interprets two types of constraints: ** ** schema=? ** pgno=? ** ** If neither are present, idxNum is set to 0. If schema=? is present, ** the 0x01 bit in idxNum is set. If pgno=? is present, the 0x02 bit ** in idxNum is set. ** ** If both parameters are present, schema is in position 0 and pgno in ** position 1. */ static int dbdataBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdx){ DbdataTable *pTab = (DbdataTable*)tab; int i; int iSchema = -1; int iPgno = -1; int colSchema = (pTab->bPtr ? DBPTR_COLUMN_SCHEMA : DBDATA_COLUMN_SCHEMA); for(i=0; inConstraint; i++){ struct sqlite3_index_constraint *p = &pIdx->aConstraint[i]; if( p->op==SQLITE_INDEX_CONSTRAINT_EQ ){ if( p->iColumn==colSchema ){ if( p->usable==0 ) return SQLITE_CONSTRAINT; iSchema = i; } if( p->iColumn==DBDATA_COLUMN_PGNO && p->usable ){ iPgno = i; } } } if( iSchema>=0 ){ pIdx->aConstraintUsage[iSchema].argvIndex = 1; pIdx->aConstraintUsage[iSchema].omit = 1; } if( iPgno>=0 ){ pIdx->aConstraintUsage[iPgno].argvIndex = 1 + (iSchema>=0); pIdx->aConstraintUsage[iPgno].omit = 1; pIdx->estimatedCost = 100; pIdx->estimatedRows = 50; if( pTab->bPtr==0 && pIdx->nOrderBy && pIdx->aOrderBy[0].desc==0 ){ int iCol = pIdx->aOrderBy[0].iColumn; if( pIdx->nOrderBy==1 ){ pIdx->orderByConsumed = (iCol==0 || iCol==1); }else if( pIdx->nOrderBy==2 && pIdx->aOrderBy[1].desc==0 && iCol==0 ){ pIdx->orderByConsumed = (pIdx->aOrderBy[1].iColumn==1); } } }else{ pIdx->estimatedCost = 100000000; pIdx->estimatedRows = 1000000000; } pIdx->idxNum = (iSchema>=0 ? 0x01 : 0x00) | (iPgno>=0 ? 0x02 : 0x00); return SQLITE_OK; } /* ** Open a new sqlite_dbdata or sqlite_dbptr cursor. */ static int dbdataOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ DbdataCursor *pCsr; pCsr = (DbdataCursor*)sqlite3_malloc64(sizeof(DbdataCursor)); if( pCsr==0 ){ return SQLITE_NOMEM; }else{ memset(pCsr, 0, sizeof(DbdataCursor)); pCsr->base.pVtab = pVTab; } *ppCursor = (sqlite3_vtab_cursor *)pCsr; return SQLITE_OK; } /* ** Restore a cursor object to the state it was in when first allocated ** by dbdataOpen(). */ static void dbdataResetCursor(DbdataCursor *pCsr){ DbdataTable *pTab = (DbdataTable*)(pCsr->base.pVtab); if( pTab->pStmt==0 ){ pTab->pStmt = pCsr->pStmt; }else{ sqlite3_finalize(pCsr->pStmt); } pCsr->pStmt = 0; pCsr->iPgno = 1; pCsr->iCell = 0; pCsr->iField = 0; pCsr->bOnePage = 0; sqlite3_free(pCsr->aPage); dbdataBufferFree(&pCsr->rec); pCsr->aPage = 0; pCsr->nRec = 0; } /* ** Close an sqlite_dbdata or sqlite_dbptr cursor. */ static int dbdataClose(sqlite3_vtab_cursor *pCursor){ DbdataCursor *pCsr = (DbdataCursor*)pCursor; dbdataResetCursor(pCsr); sqlite3_free(pCsr); return SQLITE_OK; } /* ** Utility methods to decode 16 and 32-bit big-endian unsigned integers. */ static u32 get_uint16(unsigned char *a){ return (a[0]<<8)|a[1]; } static u32 get_uint32(unsigned char *a){ return ((u32)a[0]<<24) | ((u32)a[1]<<16) | ((u32)a[2]<<8) | ((u32)a[3]); } /* ** Load page pgno from the database via the sqlite_dbpage virtual table. ** If successful, set (*ppPage) to point to a buffer containing the page ** data, (*pnPage) to the size of that buffer in bytes and return ** SQLITE_OK. In this case it is the responsibility of the caller to ** eventually free the buffer using sqlite3_free(). ** ** Or, if an error occurs, set both (*ppPage) and (*pnPage) to 0 and ** return an SQLite error code. */ static int dbdataLoadPage( DbdataCursor *pCsr, /* Cursor object */ u32 pgno, /* Page number of page to load */ u8 **ppPage, /* OUT: pointer to page buffer */ int *pnPage /* OUT: Size of (*ppPage) in bytes */ ){ int rc2; int rc = SQLITE_OK; sqlite3_stmt *pStmt = pCsr->pStmt; *ppPage = 0; *pnPage = 0; if( pgno>0 ){ sqlite3_bind_int64(pStmt, 2, pgno); if( SQLITE_ROW==sqlite3_step(pStmt) ){ int nCopy = sqlite3_column_bytes(pStmt, 0); if( nCopy>0 ){ u8 *pPage; pPage = (u8*)sqlite3_malloc64(nCopy + DBDATA_PADDING_BYTES); if( pPage==0 ){ rc = SQLITE_NOMEM; }else{ const u8 *pCopy = sqlite3_column_blob(pStmt, 0); memcpy(pPage, pCopy, nCopy); memset(&pPage[nCopy], 0, DBDATA_PADDING_BYTES); } *ppPage = pPage; *pnPage = nCopy; } } rc2 = sqlite3_reset(pStmt); if( rc==SQLITE_OK ) rc = rc2; } return rc; } /* ** Read a varint. Put the value in *pVal and return the number of bytes. */ static int dbdataGetVarint(const u8 *z, sqlite3_int64 *pVal){ sqlite3_uint64 u = 0; int i; for(i=0; i<8; i++){ u = (u<<7) + (z[i]&0x7f); if( (z[i]&0x80)==0 ){ *pVal = (sqlite3_int64)u; return i+1; } } u = (u<<8) + (z[i]&0xff); *pVal = (sqlite3_int64)u; return 9; } /* ** Like dbdataGetVarint(), but set the output to 0 if it is less than 0 ** or greater than 0xFFFFFFFF. This can be used for all varints in an ** SQLite database except for key values in intkey tables. */ static int dbdataGetVarintU32(const u8 *z, sqlite3_int64 *pVal){ sqlite3_int64 val; int nRet = dbdataGetVarint(z, &val); if( val<0 || val>0xFFFFFFFF ) val = 0; *pVal = val; return nRet; } /* ** Return the number of bytes of space used by an SQLite value of type ** eType. */ static int dbdataValueBytes(int eType){ switch( eType ){ case 0: case 8: case 9: case 10: case 11: return 0; case 1: return 1; case 2: return 2; case 3: return 3; case 4: return 4; case 5: return 6; case 6: case 7: return 8; default: if( eType>0 ){ return ((eType-12) / 2); } return 0; } } /* ** Load a value of type eType from buffer pData and use it to set the ** result of context object pCtx. */ static void dbdataValue( sqlite3_context *pCtx, u32 enc, int eType, u8 *pData, sqlite3_int64 nData ){ if( eType>=0 ){ if( dbdataValueBytes(eType)<=nData ){ switch( eType ){ case 0: case 10: case 11: sqlite3_result_null(pCtx); break; case 8: sqlite3_result_int(pCtx, 0); break; case 9: sqlite3_result_int(pCtx, 1); break; case 1: case 2: case 3: case 4: case 5: case 6: case 7: { sqlite3_uint64 v = (signed char)pData[0]; pData++; switch( eType ){ case 7: case 6: v = (v<<16) + (pData[0]<<8) + pData[1]; pData += 2; case 5: v = (v<<16) + (pData[0]<<8) + pData[1]; pData += 2; case 4: v = (v<<8) + pData[0]; pData++; case 3: v = (v<<8) + pData[0]; pData++; case 2: v = (v<<8) + pData[0]; pData++; } if( eType==7 ){ double r; memcpy(&r, &v, sizeof(r)); sqlite3_result_double(pCtx, r); }else{ sqlite3_result_int64(pCtx, (sqlite3_int64)v); } break; } default: { int n = ((eType-12) / 2); if( eType % 2 ){ switch( enc ){ #ifndef SQLITE_OMIT_UTF16 case SQLITE_UTF16BE: sqlite3_result_text16be(pCtx, (void*)pData, n, SQLITE_TRANSIENT); break; case SQLITE_UTF16LE: sqlite3_result_text16le(pCtx, (void*)pData, n, SQLITE_TRANSIENT); break; #endif default: sqlite3_result_text(pCtx, (char*)pData, n, SQLITE_TRANSIENT); break; } }else{ sqlite3_result_blob(pCtx, pData, n, SQLITE_TRANSIENT); } } } }else{ if( eType==7 ){ sqlite3_result_double(pCtx, 0.0); }else if( eType<7 ){ sqlite3_result_int(pCtx, 0); }else if( eType%2 ){ sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); }else{ sqlite3_result_blob(pCtx, "", 0, SQLITE_STATIC); } } } } /* This macro is a copy of the MX_CELL() macro in the SQLite core. Given ** a page-size, it returns the maximum number of cells that may be present ** on the page. */ #define DBDATA_MX_CELL(pgsz) ((pgsz-8)/6) /* Maximum number of fields that may appear in a single record. This is ** the "hard-limit", according to comments in sqliteLimit.h. */ #define DBDATA_MX_FIELD 32676 /* ** Move an sqlite_dbdata or sqlite_dbptr cursor to the next entry. */ static int dbdataNext(sqlite3_vtab_cursor *pCursor){ DbdataCursor *pCsr = (DbdataCursor*)pCursor; DbdataTable *pTab = (DbdataTable*)pCursor->pVtab; pCsr->iRowid++; while( 1 ){ int rc; int iOff = (pCsr->iPgno==1 ? 100 : 0); int bNextPage = 0; if( pCsr->aPage==0 ){ while( 1 ){ if( pCsr->bOnePage==0 && pCsr->iPgno>pCsr->szDb ) return SQLITE_OK; rc = dbdataLoadPage(pCsr, pCsr->iPgno, &pCsr->aPage, &pCsr->nPage); if( rc!=SQLITE_OK ) return rc; if( pCsr->aPage && pCsr->nPage>=256 ) break; sqlite3_free(pCsr->aPage); pCsr->aPage = 0; if( pCsr->bOnePage ) return SQLITE_OK; pCsr->iPgno++; } assert( iOff+3+2<=pCsr->nPage ); pCsr->iCell = pTab->bPtr ? -2 : 0; pCsr->nCell = get_uint16(&pCsr->aPage[iOff+3]); if( pCsr->nCell>DBDATA_MX_CELL(pCsr->nPage) ){ pCsr->nCell = DBDATA_MX_CELL(pCsr->nPage); } } if( pTab->bPtr ){ if( pCsr->aPage[iOff]!=0x02 && pCsr->aPage[iOff]!=0x05 ){ pCsr->iCell = pCsr->nCell; } pCsr->iCell++; if( pCsr->iCell>=pCsr->nCell ){ sqlite3_free(pCsr->aPage); pCsr->aPage = 0; if( pCsr->bOnePage ) return SQLITE_OK; pCsr->iPgno++; }else{ return SQLITE_OK; } }else{ /* If there is no record loaded, load it now. */ assert( pCsr->rec.aBuf!=0 || pCsr->nRec==0 ); if( pCsr->nRec==0 ){ int bHasRowid = 0; int nPointer = 0; sqlite3_int64 nPayload = 0; sqlite3_int64 nHdr = 0; int iHdr; int U, X; int nLocal; switch( pCsr->aPage[iOff] ){ case 0x02: nPointer = 4; break; case 0x0a: break; case 0x0d: bHasRowid = 1; break; default: /* This is not a b-tree page with records on it. Continue. */ pCsr->iCell = pCsr->nCell; break; } if( pCsr->iCell>=pCsr->nCell ){ bNextPage = 1; }else{ int iCellPtr = iOff + 8 + nPointer + pCsr->iCell*2; if( iCellPtr>pCsr->nPage ){ bNextPage = 1; }else{ iOff = get_uint16(&pCsr->aPage[iCellPtr]); } /* For an interior node cell, skip past the child-page number */ iOff += nPointer; /* Load the "byte of payload including overflow" field */ if( bNextPage || iOff>pCsr->nPage || iOff<=iCellPtr ){ bNextPage = 1; }else{ iOff += dbdataGetVarintU32(&pCsr->aPage[iOff], &nPayload); if( nPayload>0x7fffff00 ) nPayload &= 0x3fff; if( nPayload==0 ) nPayload = 1; } /* If this is a leaf intkey cell, load the rowid */ if( bHasRowid && !bNextPage && iOffnPage ){ iOff += dbdataGetVarint(&pCsr->aPage[iOff], &pCsr->iIntkey); } /* Figure out how much data to read from the local page */ U = pCsr->nPage; if( bHasRowid ){ X = U-35; }else{ X = ((U-12)*64/255)-23; } if( nPayload<=X ){ nLocal = nPayload; }else{ int M, K; M = ((U-12)*32/255)-23; K = M+((nPayload-M)%(U-4)); if( K<=X ){ nLocal = K; }else{ nLocal = M; } } if( bNextPage || nLocal+iOff>pCsr->nPage ){ bNextPage = 1; }else{ /* Allocate space for payload. And a bit more to catch small buffer ** overruns caused by attempting to read a varint or similar from ** near the end of a corrupt record. */ rc = dbdataBufferSize(&pCsr->rec, nPayload+DBDATA_PADDING_BYTES); if( rc!=SQLITE_OK ) return rc; assert( pCsr->rec.aBuf!=0 ); assert( nPayload!=0 ); /* Load the nLocal bytes of payload */ memcpy(pCsr->rec.aBuf, &pCsr->aPage[iOff], nLocal); iOff += nLocal; /* Load content from overflow pages */ if( nPayload>nLocal ){ sqlite3_int64 nRem = nPayload - nLocal; u32 pgnoOvfl = get_uint32(&pCsr->aPage[iOff]); while( nRem>0 ){ u8 *aOvfl = 0; int nOvfl = 0; int nCopy; rc = dbdataLoadPage(pCsr, pgnoOvfl, &aOvfl, &nOvfl); assert( rc!=SQLITE_OK || aOvfl==0 || nOvfl==pCsr->nPage ); if( rc!=SQLITE_OK ) return rc; if( aOvfl==0 ) break; nCopy = U-4; if( nCopy>nRem ) nCopy = nRem; memcpy(&pCsr->rec.aBuf[nPayload-nRem], &aOvfl[4], nCopy); nRem -= nCopy; pgnoOvfl = get_uint32(aOvfl); sqlite3_free(aOvfl); } nPayload -= nRem; } memset(&pCsr->rec.aBuf[nPayload], 0, DBDATA_PADDING_BYTES); pCsr->nRec = nPayload; iHdr = dbdataGetVarintU32(pCsr->rec.aBuf, &nHdr); if( nHdr>nPayload ) nHdr = 0; pCsr->nHdr = nHdr; pCsr->pHdrPtr = &pCsr->rec.aBuf[iHdr]; pCsr->pPtr = &pCsr->rec.aBuf[pCsr->nHdr]; pCsr->iField = (bHasRowid ? -1 : 0); } } }else{ pCsr->iField++; if( pCsr->iField>0 ){ sqlite3_int64 iType; if( pCsr->pHdrPtr>=&pCsr->rec.aBuf[pCsr->nRec] || pCsr->iField>=DBDATA_MX_FIELD ){ bNextPage = 1; }else{ int szField = 0; pCsr->pHdrPtr += dbdataGetVarintU32(pCsr->pHdrPtr, &iType); szField = dbdataValueBytes(iType); if( (pCsr->nRec - (pCsr->pPtr - pCsr->rec.aBuf))pPtr = &pCsr->rec.aBuf[pCsr->nRec]; }else{ pCsr->pPtr += szField; } } } } if( bNextPage ){ sqlite3_free(pCsr->aPage); pCsr->aPage = 0; pCsr->nRec = 0; if( pCsr->bOnePage ) return SQLITE_OK; pCsr->iPgno++; }else{ if( pCsr->iField<0 || pCsr->pHdrPtr<&pCsr->rec.aBuf[pCsr->nHdr] ){ return SQLITE_OK; } /* Advance to the next cell. The next iteration of the loop will load ** the record and so on. */ pCsr->nRec = 0; pCsr->iCell++; } } } assert( !"can't get here" ); return SQLITE_OK; } /* ** Return true if the cursor is at EOF. */ static int dbdataEof(sqlite3_vtab_cursor *pCursor){ DbdataCursor *pCsr = (DbdataCursor*)pCursor; return pCsr->aPage==0; } /* ** Return true if nul-terminated string zSchema ends in "()". Or false ** otherwise. */ static int dbdataIsFunction(const char *zSchema){ size_t n = strlen(zSchema); if( n>2 && zSchema[n-2]=='(' && zSchema[n-1]==')' ){ return (int)n-2; } return 0; } /* ** Determine the size in pages of database zSchema (where zSchema is ** "main", "temp" or the name of an attached database) and set ** pCsr->szDb accordingly. If successful, return SQLITE_OK. Otherwise, ** an SQLite error code. */ static int dbdataDbsize(DbdataCursor *pCsr, const char *zSchema){ DbdataTable *pTab = (DbdataTable*)pCsr->base.pVtab; char *zSql = 0; int rc, rc2; int nFunc = 0; sqlite3_stmt *pStmt = 0; if( (nFunc = dbdataIsFunction(zSchema))>0 ){ zSql = sqlite3_mprintf("SELECT %.*s(0)", nFunc, zSchema); }else{ zSql = sqlite3_mprintf("PRAGMA %Q.page_count", zSchema); } if( zSql==0 ) return SQLITE_NOMEM; rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pStmt, 0); sqlite3_free(zSql); if( rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW ){ pCsr->szDb = sqlite3_column_int(pStmt, 0); } rc2 = sqlite3_finalize(pStmt); if( rc==SQLITE_OK ) rc = rc2; return rc; } /* ** Attempt to figure out the encoding of the database by retrieving page 1 ** and inspecting the header field. If successful, set the pCsr->enc variable ** and return SQLITE_OK. Otherwise, return an SQLite error code. */ static int dbdataGetEncoding(DbdataCursor *pCsr){ int rc = SQLITE_OK; int nPg1 = 0; u8 *aPg1 = 0; rc = dbdataLoadPage(pCsr, 1, &aPg1, &nPg1); if( rc==SQLITE_OK && nPg1>=(56+4) ){ pCsr->enc = get_uint32(&aPg1[56]); } sqlite3_free(aPg1); return rc; } /* ** xFilter method for sqlite_dbdata and sqlite_dbptr. */ static int dbdataFilter( sqlite3_vtab_cursor *pCursor, int idxNum, const char *idxStr, int argc, sqlite3_value **argv ){ DbdataCursor *pCsr = (DbdataCursor*)pCursor; DbdataTable *pTab = (DbdataTable*)pCursor->pVtab; int rc = SQLITE_OK; const char *zSchema = "main"; (void)idxStr; (void)argc; dbdataResetCursor(pCsr); assert( pCsr->iPgno==1 ); if( idxNum & 0x01 ){ zSchema = (const char*)sqlite3_value_text(argv[0]); if( zSchema==0 ) zSchema = ""; } if( idxNum & 0x02 ){ pCsr->iPgno = sqlite3_value_int(argv[(idxNum & 0x01)]); pCsr->bOnePage = 1; }else{ rc = dbdataDbsize(pCsr, zSchema); } if( rc==SQLITE_OK ){ int nFunc = 0; if( pTab->pStmt ){ pCsr->pStmt = pTab->pStmt; pTab->pStmt = 0; }else if( (nFunc = dbdataIsFunction(zSchema))>0 ){ char *zSql = sqlite3_mprintf("SELECT %.*s(?2)", nFunc, zSchema); if( zSql==0 ){ rc = SQLITE_NOMEM; }else{ rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pCsr->pStmt, 0); sqlite3_free(zSql); } }else{ rc = sqlite3_prepare_v2(pTab->db, "SELECT data FROM sqlite_dbpage(?) WHERE pgno=?", -1, &pCsr->pStmt, 0 ); } } if( rc==SQLITE_OK ){ rc = sqlite3_bind_text(pCsr->pStmt, 1, zSchema, -1, SQLITE_TRANSIENT); } /* Try to determine the encoding of the db by inspecting the header ** field on page 1. */ if( rc==SQLITE_OK ){ rc = dbdataGetEncoding(pCsr); } if( rc!=SQLITE_OK ){ pTab->base.zErrMsg = sqlite3_mprintf("%s", sqlite3_errmsg(pTab->db)); } if( rc==SQLITE_OK ){ rc = dbdataNext(pCursor); } return rc; } /* ** Return a column for the sqlite_dbdata or sqlite_dbptr table. */ static int dbdataColumn( sqlite3_vtab_cursor *pCursor, sqlite3_context *ctx, int i ){ DbdataCursor *pCsr = (DbdataCursor*)pCursor; DbdataTable *pTab = (DbdataTable*)pCursor->pVtab; if( pTab->bPtr ){ switch( i ){ case DBPTR_COLUMN_PGNO: sqlite3_result_int64(ctx, pCsr->iPgno); break; case DBPTR_COLUMN_CHILD: { int iOff = pCsr->iPgno==1 ? 100 : 0; if( pCsr->iCell<0 ){ iOff += 8; }else{ iOff += 12 + pCsr->iCell*2; if( iOff>pCsr->nPage ) return SQLITE_OK; iOff = get_uint16(&pCsr->aPage[iOff]); } if( iOff<=pCsr->nPage ){ sqlite3_result_int64(ctx, get_uint32(&pCsr->aPage[iOff])); } break; } } }else{ switch( i ){ case DBDATA_COLUMN_PGNO: sqlite3_result_int64(ctx, pCsr->iPgno); break; case DBDATA_COLUMN_CELL: sqlite3_result_int(ctx, pCsr->iCell); break; case DBDATA_COLUMN_FIELD: sqlite3_result_int(ctx, pCsr->iField); break; case DBDATA_COLUMN_VALUE: { if( pCsr->iField<0 ){ sqlite3_result_int64(ctx, pCsr->iIntkey); }else if( &pCsr->rec.aBuf[pCsr->nRec] >= pCsr->pPtr ){ sqlite3_int64 iType; dbdataGetVarintU32(pCsr->pHdrPtr, &iType); dbdataValue( ctx, pCsr->enc, iType, pCsr->pPtr, &pCsr->rec.aBuf[pCsr->nRec] - pCsr->pPtr ); } break; } } } return SQLITE_OK; } /* ** Return the rowid for an sqlite_dbdata or sqlite_dptr table. */ static int dbdataRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ DbdataCursor *pCsr = (DbdataCursor*)pCursor; *pRowid = pCsr->iRowid; return SQLITE_OK; } /* ** Invoke this routine to register the "sqlite_dbdata" virtual table module */ static int sqlite3DbdataRegister(sqlite3 *db){ static sqlite3_module dbdata_module = { 0, /* iVersion */ 0, /* xCreate */ dbdataConnect, /* xConnect */ dbdataBestIndex, /* xBestIndex */ dbdataDisconnect, /* xDisconnect */ 0, /* xDestroy */ dbdataOpen, /* xOpen - open a cursor */ dbdataClose, /* xClose - close a cursor */ dbdataFilter, /* xFilter - configure scan constraints */ dbdataNext, /* xNext - advance a cursor */ dbdataEof, /* xEof - check for end of scan */ dbdataColumn, /* xColumn - read data */ dbdataRowid, /* xRowid - read data */ 0, /* xUpdate */ 0, /* xBegin */ 0, /* xSync */ 0, /* xCommit */ 0, /* xRollback */ 0, /* xFindMethod */ 0, /* xRename */ 0, /* xSavepoint */ 0, /* xRelease */ 0, /* xRollbackTo */ 0, /* xShadowName */ 0 /* xIntegrity */ }; int rc = sqlite3_create_module(db, "sqlite_dbdata", &dbdata_module, 0); if( rc==SQLITE_OK ){ rc = sqlite3_create_module(db, "sqlite_dbptr", &dbdata_module, (void*)1); } return rc; } #ifdef _WIN32 __declspec(dllexport) #endif int sqlite3_dbdata_init( sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi ){ (void)pzErrMsg; return sqlite3DbdataRegister(db); } #endif /* ifndef SQLITE_OMIT_VIRTUALTABLE */kylin-ai-data-management-service-1.2.0.0/src/sqliteRecover/recoverdb.cpp000066400000000000000000000123761520577635400261700ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include "recoverdb.h" #include #include #include #include #include "sqlite3recover.h" bool Sqlite3Recover::checkDatabaseIntegrity(const std::string &dbPath) { std::error_code ec; if (!std::filesystem::exists(std::filesystem::path(dbPath), ec) || ec) { PRINT_ERROR("DB filePath:%s not exists, check database integrity failed,or error:%s\n", dbPath.c_str(), ec.message().c_str()); return false; } try { SQLite::Database db(dbPath, SQLite::OPEN_READONLY); // 执行完整性检查(PRAGMA) SQLite::Statement query(db, "PRAGMA integrity_check;"); while (query.executeStep()) { std::string result = query.getColumn(0); if (result != "ok") { PRINT_ERROR("db is corrupted:%s.\n", result.c_str()); return false; } } } catch (const std::exception &e) { PRINT_ERROR("caught a sqlite3 exception. [error_string: %s].\n", e.what()); return false; } catch (...) { PRINT_ERROR("caught an unknown exception.\n"); return false; } return true; } /* ** This function is called to recover data from the database. */ bool Sqlite3Recover::recoverDatabase(const std::string &dbFilePath) { PRINT_INFO("Sqlite3Recover recover malformed database start.\n"); int rc; /* Return code from this routine */ const char *zLAF = "lost_and_found"; /* Name of "lost_and_found" table */ int bFreelist = 1; /* True to scan the freelist */ int bRowids = 1; /* True to restore ROWID values */ sqlite3_recover *p; /* The recovery object */ sqlite3 *db; std::error_code ec; const std::filesystem::path filePath = dbFilePath; if (!std::filesystem::exists(filePath, ec) || ec) { // 既然是恢复数据库,所以输入文件必须是有效数据库路径 PRINT_ERROR("DB filePath:%s not exists, recover database failed,or error:%s\n", dbFilePath.c_str(), ec.message().c_str()); return false; } // 在输入目录下创建临时数据库 std::filesystem::path directory = filePath.parent_path(); std::string tmpDbFilePath = directory.string() + "/data-management-recovered.db"; const char *tmpDB = tmpDbFilePath.c_str(); rc = sqlite3_open(dbFilePath.c_str(), &db); p = sqlite3_recover_init(db, "main", tmpDB); sqlite3_recover_config(p, SQLITE_RECOVER_LOST_AND_FOUND, (void *)zLAF); sqlite3_recover_config(p, SQLITE_RECOVER_ROWIDS, (void *)&bRowids); sqlite3_recover_config(p, SQLITE_RECOVER_FREELIST_CORRUPT, (void *)&bFreelist); sqlite3_recover_run(p); if (sqlite3_recover_errcode(p) != SQLITE_OK) { const char *zErr = sqlite3_recover_errmsg(p); int errCode = sqlite3_recover_errcode(p); PRINT_ERROR("sqlite3 recovery error:%s (%d).\n", zErr, errCode); sqlite3_close(db); return false; } rc = sqlite3_recover_finish(p); if (rc != SQLITE_OK) { const char *zErr = sqlite3_errmsg(db); PRINT_ERROR("sqlite3_recover_finish returns error:%s (%d).\n", zErr, rc); sqlite3_close(db); return false; } sqlite3_close(db); if (!replaceDatabase(dbFilePath, tmpDbFilePath)) { return false; } PRINT_INFO("Sqlite3Recover recover malformed database success.\n"); return true; } bool Sqlite3Recover::replaceDatabase(const std::string &targetFilePath, const std::string &tmpFilePath) { std::filesystem::path tmp = tmpFilePath; std::filesystem::path target = targetFilePath; std::error_code ec; if (!std::filesystem::exists(tmp, ec) || ec) { PRINT_ERROR("Temporary filePath:%s does not exist or error: %s\n", tmp.c_str(), ec.message().c_str()); return false; } if (std::filesystem::exists(target, ec) && !ec) { if (!std::filesystem::remove(target, ec) || ec) { PRINT_ERROR("Failed to remove old file: %s, error: %s\n", target.c_str(), ec.message().c_str()); return false; } } else if (ec) { PRINT_ERROR("Failed to check target file existence: %s\n", ec.message().c_str()); return false; } std::filesystem::rename(tmp, target, ec); if (ec) { PRINT_ERROR("Failed to rename file from %s to %s, error: %s\n", tmp.c_str(), target.c_str(), ec.message().c_str()); return false; } return true; } kylin-ai-data-management-service-1.2.0.0/src/sqliteRecover/recoverdb.h000066400000000000000000000017511520577635400256300ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #pragma once #include class Sqlite3Recover { public: static bool checkDatabaseIntegrity(const std::string& dbPath); static bool recoverDatabase(const std::string& dbFilePath); private: static bool replaceDatabase(const std::string& targetFilePath, const std::string& tmpFilePath); };kylin-ai-data-management-service-1.2.0.0/src/sqliteRecover/sqlite3recover.c000066400000000000000000002570371520577635400266340ustar00rootroot00000000000000/* ** 2022-08-27 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** */ #include "sqlite3recover.h" #include #include #ifndef SQLITE_OMIT_VIRTUALTABLE /* ** Declaration for public API function in file dbdata.c. This may be called ** with NULL as the final two arguments to register the sqlite_dbptr and ** sqlite_dbdata virtual tables with a database handle. */ #ifdef _WIN32 __declspec(dllexport) #endif int sqlite3_dbdata_init(sqlite3*, char**, const sqlite3_api_routines*); typedef unsigned int u32; typedef unsigned char u8; typedef sqlite3_int64 i64; /* ** Work around C99 "flex-array" syntax for pre-C99 compilers, so as ** to avoid complaints from -fsanitize=strict-bounds. */ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) # define FLEXARRAY #else # define FLEXARRAY 1 #endif typedef struct RecoverTable RecoverTable; typedef struct RecoverColumn RecoverColumn; /* ** When recovering rows of data that can be associated with table ** definitions recovered from the sqlite_schema table, each table is ** represented by an instance of the following object. ** ** iRoot: ** The root page in the original database. Not necessarily (and usually ** not) the same in the recovered database. ** ** zTab: ** Name of the table. ** ** nCol/aCol[]: ** aCol[] is an array of nCol columns. In the order in which they appear ** in the table. ** ** bIntkey: ** Set to true for intkey tables, false for WITHOUT ROWID. ** ** iRowidBind: ** Each column in the aCol[] array has associated with it the index of ** the bind parameter its values will be bound to in the INSERT statement ** used to construct the output database. If the table does has a rowid ** but not an INTEGER PRIMARY KEY column, then iRowidBind contains the ** index of the bind paramater to which the rowid value should be bound. ** Otherwise, it contains -1. If the table does contain an INTEGER PRIMARY ** KEY column, then the rowid value should be bound to the index associated ** with the column. ** ** pNext: ** All RecoverTable objects used by the recovery operation are allocated ** and populated as part of creating the recovered database schema in ** the output database, before any non-schema data are recovered. They ** are then stored in a singly-linked list linked by this variable beginning ** at sqlite3_recover.pTblList. */ struct RecoverTable { u32 iRoot; /* Root page in original database */ char *zTab; /* Name of table */ int nCol; /* Number of columns in table */ RecoverColumn *aCol; /* Array of columns */ int bIntkey; /* True for intkey, false for without rowid */ int iRowidBind; /* If >0, bind rowid to INSERT here */ RecoverTable *pNext; }; /* ** Each database column is represented by an instance of the following object ** stored in the RecoverTable.aCol[] array of the associated table. ** ** iField: ** The index of the associated field within database records. Or -1 if ** there is no associated field (e.g. for virtual generated columns). ** ** iBind: ** The bind index of the INSERT statement to bind this columns values ** to. Or 0 if there is no such index (iff (iField<0)). ** ** bIPK: ** True if this is the INTEGER PRIMARY KEY column. ** ** zCol: ** Name of column. ** ** eHidden: ** A RECOVER_EHIDDEN_* constant value (see below for interpretation of each). */ struct RecoverColumn { int iField; /* Field in record on disk */ int iBind; /* Binding to use in INSERT */ int bIPK; /* True for IPK column */ char *zCol; int eHidden; }; #define RECOVER_EHIDDEN_NONE 0 /* Normal database column */ #define RECOVER_EHIDDEN_HIDDEN 1 /* Column is __HIDDEN__ */ #define RECOVER_EHIDDEN_VIRTUAL 2 /* Virtual generated column */ #define RECOVER_EHIDDEN_STORED 3 /* Stored generated column */ /* ** Bitmap object used to track pages in the input database. Allocated ** and manipulated only by the following functions: ** ** recoverBitmapAlloc() ** recoverBitmapFree() ** recoverBitmapSet() ** recoverBitmapQuery() ** ** nPg: ** Largest page number that may be stored in the bitmap. The range ** of valid keys is 1 to nPg, inclusive. ** ** aElem[]: ** Array large enough to contain a bit for each key. For key value ** iKey, the associated bit is the bit (iKey%32) of aElem[iKey/32]. ** In other words, the following is true if bit iKey is set, or ** false if it is clear: ** ** (aElem[iKey/32] & (1 << (iKey%32))) ? 1 : 0 */ typedef struct RecoverBitmap RecoverBitmap; struct RecoverBitmap { i64 nPg; /* Size of bitmap */ u32 aElem[FLEXARRAY]; /* Array of 32-bit bitmasks */ }; /* Size in bytes of a RecoverBitmap object sufficient to cover 32 pages */ #define SZ_RECOVERBITMAP_32 (16) /* ** State variables (part of the sqlite3_recover structure) used while ** recovering data for tables identified in the recovered schema (state ** RECOVER_STATE_WRITING). */ typedef struct RecoverStateW1 RecoverStateW1; struct RecoverStateW1 { sqlite3_stmt *pTbls; sqlite3_stmt *pSel; sqlite3_stmt *pInsert; int nInsert; RecoverTable *pTab; /* Table currently being written */ int nMax; /* Max column count in any schema table */ sqlite3_value **apVal; /* Array of nMax values */ int nVal; /* Number of valid entries in apVal[] */ int bHaveRowid; i64 iRowid; i64 iPrevPage; int iPrevCell; }; /* ** State variables (part of the sqlite3_recover structure) used while ** recovering data destined for the lost and found table (states ** RECOVER_STATE_LOSTANDFOUND[123]). */ typedef struct RecoverStateLAF RecoverStateLAF; struct RecoverStateLAF { RecoverBitmap *pUsed; i64 nPg; /* Size of db in pages */ sqlite3_stmt *pAllAndParent; sqlite3_stmt *pMapInsert; sqlite3_stmt *pMaxField; sqlite3_stmt *pUsedPages; sqlite3_stmt *pFindRoot; sqlite3_stmt *pInsert; /* INSERT INTO lost_and_found ... */ sqlite3_stmt *pAllPage; sqlite3_stmt *pPageData; sqlite3_value **apVal; int nMaxField; }; /* ** Main recover handle structure. */ struct sqlite3_recover { /* Copies of sqlite3_recover_init[_sql]() parameters */ sqlite3 *dbIn; /* Input database */ char *zDb; /* Name of input db ("main" etc.) */ char *zUri; /* URI for output database */ void *pSqlCtx; /* SQL callback context */ int (*xSql)(void*,const char*); /* Pointer to SQL callback function */ /* Values configured by sqlite3_recover_config() */ char *zStateDb; /* State database to use (or NULL) */ char *zLostAndFound; /* Name of lost-and-found table (or NULL) */ int bFreelistCorrupt; /* SQLITE_RECOVER_FREELIST_CORRUPT setting */ int bRecoverRowid; /* SQLITE_RECOVER_ROWIDS setting */ int bSlowIndexes; /* SQLITE_RECOVER_SLOWINDEXES setting */ int pgsz; int detected_pgsz; int nReserve; u8 *pPage1Disk; u8 *pPage1Cache; /* Error code and error message */ int errCode; /* For sqlite3_recover_errcode() */ char *zErrMsg; /* For sqlite3_recover_errmsg() */ int eState; int bCloseTransaction; /* Variables used with eState==RECOVER_STATE_WRITING */ RecoverStateW1 w1; /* Variables used with states RECOVER_STATE_LOSTANDFOUND[123] */ RecoverStateLAF laf; /* Fields used within sqlite3_recover_run() */ sqlite3 *dbOut; /* Output database */ sqlite3_stmt *pGetPage; /* SELECT against input db sqlite_dbdata */ RecoverTable *pTblList; /* List of tables recovered from schema */ }; /* ** The various states in which an sqlite3_recover object may exist: ** ** RECOVER_STATE_INIT: ** The object is initially created in this state. sqlite3_recover_step() ** has yet to be called. This is the only state in which it is permitted ** to call sqlite3_recover_config(). ** ** RECOVER_STATE_WRITING: ** ** RECOVER_STATE_LOSTANDFOUND1: ** State to populate the bitmap of pages used by other tables or the ** database freelist. ** ** RECOVER_STATE_LOSTANDFOUND2: ** Populate the recovery.map table - used to figure out a "root" page ** for each lost page from in the database from which records are ** extracted. ** ** RECOVER_STATE_LOSTANDFOUND3: ** Populate the lost-and-found table itself. */ #define RECOVER_STATE_INIT 0 #define RECOVER_STATE_WRITING 1 #define RECOVER_STATE_LOSTANDFOUND1 2 #define RECOVER_STATE_LOSTANDFOUND2 3 #define RECOVER_STATE_LOSTANDFOUND3 4 #define RECOVER_STATE_SCHEMA2 5 #define RECOVER_STATE_DONE 6 /* ** Global variables used by this extension. */ typedef struct RecoverGlobal RecoverGlobal; struct RecoverGlobal { const sqlite3_io_methods *pMethods; sqlite3_recover *p; }; static RecoverGlobal recover_g; /* ** Use this static SQLite mutex to protect the globals during the ** first call to sqlite3_recover_step(). */ #define RECOVER_MUTEX_ID SQLITE_MUTEX_STATIC_APP2 /* ** Default value for SQLITE_RECOVER_ROWIDS (sqlite3_recover.bRecoverRowid). */ #define RECOVER_ROWID_DEFAULT 1 /* ** Mutex handling: ** ** recoverEnterMutex() - Enter the recovery mutex ** recoverLeaveMutex() - Leave the recovery mutex ** recoverAssertMutexHeld() - Assert that the recovery mutex is held */ #if defined(SQLITE_THREADSAFE) && SQLITE_THREADSAFE==0 # define recoverEnterMutex() # define recoverLeaveMutex() #else static void recoverEnterMutex(void){ sqlite3_mutex_enter(sqlite3_mutex_alloc(RECOVER_MUTEX_ID)); } static void recoverLeaveMutex(void){ sqlite3_mutex_leave(sqlite3_mutex_alloc(RECOVER_MUTEX_ID)); } #endif #if SQLITE_THREADSAFE+0>=1 && defined(SQLITE_DEBUG) static void recoverAssertMutexHeld(void){ assert( sqlite3_mutex_held(sqlite3_mutex_alloc(RECOVER_MUTEX_ID)) ); } #else # define recoverAssertMutexHeld() #endif /* ** Like strlen(). But handles NULL pointer arguments. */ static int recoverStrlen(const char *zStr){ if( zStr==0 ) return 0; return (int)(strlen(zStr)&0x7fffffff); } /* ** This function is a no-op if the recover handle passed as the first ** argument already contains an error (if p->errCode!=SQLITE_OK). ** ** Otherwise, an attempt is made to allocate, zero and return a buffer nByte ** bytes in size. If successful, a pointer to the new buffer is returned. Or, ** if an OOM error occurs, NULL is returned and the handle error code ** (p->errCode) set to SQLITE_NOMEM. */ static void *recoverMalloc(sqlite3_recover *p, i64 nByte){ void *pRet = 0; assert( nByte>0 ); if( p->errCode==SQLITE_OK ){ pRet = sqlite3_malloc64(nByte); if( pRet ){ memset(pRet, 0, nByte); }else{ p->errCode = SQLITE_NOMEM; } } return pRet; } /* ** Set the error code and error message for the recover handle passed as ** the first argument. The error code is set to the value of parameter ** errCode. ** ** Parameter zFmt must be a printf() style formatting string. The handle ** error message is set to the result of using any trailing arguments for ** parameter substitutions in the formatting string. ** ** For example: ** ** recoverError(p, SQLITE_ERROR, "no such table: %s", zTablename); */ static int recoverError( sqlite3_recover *p, int errCode, const char *zFmt, ... ){ char *z = 0; va_list ap; va_start(ap, zFmt); if( zFmt ){ z = sqlite3_vmprintf(zFmt, ap); } va_end(ap); sqlite3_free(p->zErrMsg); p->zErrMsg = z; p->errCode = errCode; return errCode; } /* ** This function is a no-op if p->errCode is initially other than SQLITE_OK. ** In this case it returns NULL. ** ** Otherwise, an attempt is made to allocate and return a bitmap object ** large enough to store a bit for all page numbers between 1 and nPg, ** inclusive. The bitmap is initially zeroed. */ static RecoverBitmap *recoverBitmapAlloc(sqlite3_recover *p, i64 nPg){ int nElem = (nPg+1+31) / 32; int nByte = SZ_RECOVERBITMAP_32 + nElem*sizeof(u32); RecoverBitmap *pRet = (RecoverBitmap*)recoverMalloc(p, nByte); if( pRet ){ pRet->nPg = nPg; } return pRet; } /* ** Free a bitmap object allocated by recoverBitmapAlloc(). */ static void recoverBitmapFree(RecoverBitmap *pMap){ sqlite3_free(pMap); } /* ** Set the bit associated with page iPg in bitvec pMap. */ static void recoverBitmapSet(RecoverBitmap *pMap, i64 iPg){ if( iPg<=pMap->nPg ){ int iElem = (iPg / 32); int iBit = (iPg % 32); pMap->aElem[iElem] |= (((u32)1) << iBit); } } /* ** Query bitmap object pMap for the state of the bit associated with page ** iPg. Return 1 if it is set, or 0 otherwise. */ static int recoverBitmapQuery(RecoverBitmap *pMap, i64 iPg){ int ret = 1; if( iPg<=pMap->nPg && iPg>0 ){ int iElem = (iPg / 32); int iBit = (iPg % 32); ret = (pMap->aElem[iElem] & (((u32)1) << iBit)) ? 1 : 0; } return ret; } /* ** Set the recover handle error to the error code and message returned by ** calling sqlite3_errcode() and sqlite3_errmsg(), respectively, on database ** handle db. */ static int recoverDbError(sqlite3_recover *p, sqlite3 *db){ return recoverError(p, sqlite3_errcode(db), "%s", sqlite3_errmsg(db)); } /* ** This function is a no-op if recover handle p already contains an error ** (if p->errCode!=SQLITE_OK). ** ** Otherwise, it attempts to prepare the SQL statement in zSql against ** database handle db. If successful, the statement handle is returned. ** Or, if an error occurs, NULL is returned and an error left in the ** recover handle. */ static sqlite3_stmt *recoverPrepare( sqlite3_recover *p, sqlite3 *db, const char *zSql ){ sqlite3_stmt *pStmt = 0; if( p->errCode==SQLITE_OK ){ if( sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0) ){ recoverDbError(p, db); } } return pStmt; } /* ** This function is a no-op if recover handle p already contains an error ** (if p->errCode!=SQLITE_OK). ** ** Otherwise, argument zFmt is used as a printf() style format string, ** along with any trailing arguments, to create an SQL statement. This ** SQL statement is prepared against database handle db and, if successful, ** the statment handle returned. Or, if an error occurs - either during ** the printf() formatting or when preparing the resulting SQL - an ** error code and message are left in the recover handle. */ static sqlite3_stmt *recoverPreparePrintf( sqlite3_recover *p, sqlite3 *db, const char *zFmt, ... ){ sqlite3_stmt *pStmt = 0; if( p->errCode==SQLITE_OK ){ va_list ap; char *z; va_start(ap, zFmt); z = sqlite3_vmprintf(zFmt, ap); va_end(ap); if( z==0 ){ p->errCode = SQLITE_NOMEM; }else{ pStmt = recoverPrepare(p, db, z); sqlite3_free(z); } } return pStmt; } /* ** Reset SQLite statement handle pStmt. If the call to sqlite3_reset() ** indicates that an error occurred, and there is not already an error ** in the recover handle passed as the first argument, set the error ** code and error message appropriately. ** ** This function returns a copy of the statement handle pointer passed ** as the second argument. */ static sqlite3_stmt *recoverReset(sqlite3_recover *p, sqlite3_stmt *pStmt){ int rc = sqlite3_reset(pStmt); if( rc!=SQLITE_OK && rc!=SQLITE_CONSTRAINT && p->errCode==SQLITE_OK ){ recoverDbError(p, sqlite3_db_handle(pStmt)); } return pStmt; } /* ** Finalize SQLite statement handle pStmt. If the call to sqlite3_reset() ** indicates that an error occurred, and there is not already an error ** in the recover handle passed as the first argument, set the error ** code and error message appropriately. */ static void recoverFinalize(sqlite3_recover *p, sqlite3_stmt *pStmt){ sqlite3 *db = sqlite3_db_handle(pStmt); int rc = sqlite3_finalize(pStmt); if( rc!=SQLITE_OK && p->errCode==SQLITE_OK ){ recoverDbError(p, db); } } /* ** This function is a no-op if recover handle p already contains an error ** (if p->errCode!=SQLITE_OK). A copy of p->errCode is returned in this ** case. ** ** Otherwise, execute SQL script zSql. If successful, return SQLITE_OK. ** Or, if an error occurs, leave an error code and message in the recover ** handle and return a copy of the error code. */ static int recoverExec(sqlite3_recover *p, sqlite3 *db, const char *zSql){ if( p->errCode==SQLITE_OK ){ int rc = sqlite3_exec(db, zSql, 0, 0, 0); if( rc ){ recoverDbError(p, db); } } return p->errCode; } /* ** Bind the value pVal to parameter iBind of statement pStmt. Leave an ** error in the recover handle passed as the first argument if an error ** (e.g. an OOM) occurs. */ static void recoverBindValue( sqlite3_recover *p, sqlite3_stmt *pStmt, int iBind, sqlite3_value *pVal ){ if( p->errCode==SQLITE_OK ){ int rc = sqlite3_bind_value(pStmt, iBind, pVal); if( rc ) recoverError(p, rc, 0); } } /* ** This function is a no-op if recover handle p already contains an error ** (if p->errCode!=SQLITE_OK). NULL is returned in this case. ** ** Otherwise, an attempt is made to interpret zFmt as a printf() style ** formatting string and the result of using the trailing arguments for ** parameter substitution with it written into a buffer obtained from ** sqlite3_malloc(). If successful, a pointer to the buffer is returned. ** It is the responsibility of the caller to eventually free the buffer ** using sqlite3_free(). ** ** Or, if an error occurs, an error code and message is left in the recover ** handle and NULL returned. */ static char *recoverMPrintf(sqlite3_recover *p, const char *zFmt, ...){ va_list ap; char *z; va_start(ap, zFmt); z = sqlite3_vmprintf(zFmt, ap); va_end(ap); if( p->errCode==SQLITE_OK ){ if( z==0 ) p->errCode = SQLITE_NOMEM; }else{ sqlite3_free(z); z = 0; } return z; } /* ** This function is a no-op if recover handle p already contains an error ** (if p->errCode!=SQLITE_OK). Zero is returned in this case. ** ** Otherwise, execute "PRAGMA page_count" against the input database. If ** successful, return the integer result. Or, if an error occurs, leave an ** error code and error message in the sqlite3_recover handle and return ** zero. */ static i64 recoverPageCount(sqlite3_recover *p){ i64 nPg = 0; if( p->errCode==SQLITE_OK ){ sqlite3_stmt *pStmt = 0; pStmt = recoverPreparePrintf(p, p->dbIn, "PRAGMA %Q.page_count", p->zDb); if( pStmt ){ sqlite3_step(pStmt); nPg = sqlite3_column_int64(pStmt, 0); } recoverFinalize(p, pStmt); } return nPg; } /* ** Implementation of SQL scalar function "read_i32". The first argument to ** this function must be a blob. The second a non-negative integer. This ** function reads and returns a 32-bit big-endian integer from byte ** offset (4*) of the blob. ** ** SELECT read_i32(, ) */ static void recoverReadI32( sqlite3_context *context, int argc, sqlite3_value **argv ){ const unsigned char *pBlob; int nBlob; int iInt; assert( argc==2 ); nBlob = sqlite3_value_bytes(argv[0]); pBlob = (const unsigned char*)sqlite3_value_blob(argv[0]); iInt = sqlite3_value_int(argv[1]) & 0xFFFF; if( (iInt+1)*4<=nBlob ){ const unsigned char *a = &pBlob[iInt*4]; i64 iVal = ((i64)a[0]<<24) + ((i64)a[1]<<16) + ((i64)a[2]<< 8) + ((i64)a[3]<< 0); sqlite3_result_int64(context, iVal); } } /* ** Implementation of SQL scalar function "page_is_used". This function ** is used as part of the procedure for locating orphan rows for the ** lost-and-found table, and it depends on those routines having populated ** the sqlite3_recover.laf.pUsed variable. ** ** The only argument to this function is a page-number. It returns true ** if the page has already been used somehow during data recovery, or false ** otherwise. ** ** SELECT page_is_used(); */ static void recoverPageIsUsed( sqlite3_context *pCtx, int nArg, sqlite3_value **apArg ){ sqlite3_recover *p = (sqlite3_recover*)sqlite3_user_data(pCtx); i64 pgno = sqlite3_value_int64(apArg[0]); assert( nArg==1 ); sqlite3_result_int(pCtx, recoverBitmapQuery(p->laf.pUsed, pgno)); } /* ** The implementation of a user-defined SQL function invoked by the ** sqlite_dbdata and sqlite_dbptr virtual table modules to access pages ** of the database being recovered. ** ** This function always takes a single integer argument. If the argument ** is zero, then the value returned is the number of pages in the db being ** recovered. If the argument is greater than zero, it is a page number. ** The value returned in this case is an SQL blob containing the data for ** the identified page of the db being recovered. e.g. ** ** SELECT getpage(0); -- return number of pages in db ** SELECT getpage(4); -- return page 4 of db as a blob of data */ static void recoverGetPage( sqlite3_context *pCtx, int nArg, sqlite3_value **apArg ){ sqlite3_recover *p = (sqlite3_recover*)sqlite3_user_data(pCtx); i64 pgno = sqlite3_value_int64(apArg[0]); sqlite3_stmt *pStmt = 0; assert( nArg==1 ); if( pgno==0 ){ i64 nPg = recoverPageCount(p); sqlite3_result_int64(pCtx, nPg); return; }else{ if( p->pGetPage==0 ){ pStmt = p->pGetPage = recoverPreparePrintf( p, p->dbIn, "SELECT data FROM sqlite_dbpage(%Q) WHERE pgno=?", p->zDb ); }else if( p->errCode==SQLITE_OK ){ pStmt = p->pGetPage; } if( pStmt ){ sqlite3_bind_int64(pStmt, 1, pgno); if( SQLITE_ROW==sqlite3_step(pStmt) ){ const u8 *aPg; int nPg; assert( p->errCode==SQLITE_OK ); aPg = sqlite3_column_blob(pStmt, 0); nPg = sqlite3_column_bytes(pStmt, 0); if( pgno==1 && nPg==p->pgsz && 0==memcmp(p->pPage1Cache, aPg, nPg) ){ aPg = p->pPage1Disk; } sqlite3_result_blob(pCtx, aPg, nPg-p->nReserve, SQLITE_TRANSIENT); } recoverReset(p, pStmt); } } if( p->errCode ){ if( p->zErrMsg ) sqlite3_result_error(pCtx, p->zErrMsg, -1); sqlite3_result_error_code(pCtx, p->errCode); } } /* ** Find a string that is not found anywhere in z[]. Return a pointer ** to that string. ** ** Try to use zA and zB first. If both of those are already found in z[] ** then make up some string and store it in the buffer zBuf. */ static const char *recoverUnusedString( const char *z, /* Result must not appear anywhere in z */ const char *zA, const char *zB, /* Try these first */ char *zBuf /* Space to store a generated string */ ){ unsigned i = 0; if( strstr(z, zA)==0 ) return zA; if( strstr(z, zB)==0 ) return zB; do{ sqlite3_snprintf(20,zBuf,"(%s%u)", zA, i++); }while( strstr(z,zBuf)!=0 ); return zBuf; } /* ** Implementation of scalar SQL function "escape_crlf". The argument passed to ** this function is the output of built-in function quote(). If the first ** character of the input is "'", indicating that the value passed to quote() ** was a text value, then this function searches the input for "\n" and "\r" ** characters and adds a wrapper similar to the following: ** ** replace(replace(, '\n', char(10), '\r', char(13)); ** ** Or, if the first character of the input is not "'", then a copy of the input ** is returned. */ static void recoverEscapeCrlf( sqlite3_context *context, int argc, sqlite3_value **argv ){ const char *zText = (const char*)sqlite3_value_text(argv[0]); (void)argc; if( zText && zText[0]=='\'' ){ int nText = sqlite3_value_bytes(argv[0]); int i; char zBuf1[20]; char zBuf2[20]; const char *zNL = 0; const char *zCR = 0; int nCR = 0; int nNL = 0; for(i=0; zText[i]; i++){ if( zNL==0 && zText[i]=='\n' ){ zNL = recoverUnusedString(zText, "\\n", "\\012", zBuf1); nNL = (int)strlen(zNL); } if( zCR==0 && zText[i]=='\r' ){ zCR = recoverUnusedString(zText, "\\r", "\\015", zBuf2); nCR = (int)strlen(zCR); } } if( zNL || zCR ){ int iOut = 0; i64 nMax = (nNL > nCR) ? nNL : nCR; i64 nAlloc = nMax * nText + (nMax+64)*2; char *zOut = (char*)sqlite3_malloc64(nAlloc); if( zOut==0 ){ sqlite3_result_error_nomem(context); return; } if( zNL && zCR ){ memcpy(&zOut[iOut], "replace(replace(", 16); iOut += 16; }else{ memcpy(&zOut[iOut], "replace(", 8); iOut += 8; } for(i=0; zText[i]; i++){ if( zText[i]=='\n' ){ memcpy(&zOut[iOut], zNL, nNL); iOut += nNL; }else if( zText[i]=='\r' ){ memcpy(&zOut[iOut], zCR, nCR); iOut += nCR; }else{ zOut[iOut] = zText[i]; iOut++; } } if( zNL ){ memcpy(&zOut[iOut], ",'", 2); iOut += 2; memcpy(&zOut[iOut], zNL, nNL); iOut += nNL; memcpy(&zOut[iOut], "', char(10))", 12); iOut += 12; } if( zCR ){ memcpy(&zOut[iOut], ",'", 2); iOut += 2; memcpy(&zOut[iOut], zCR, nCR); iOut += nCR; memcpy(&zOut[iOut], "', char(13))", 12); iOut += 12; } sqlite3_result_text(context, zOut, iOut, SQLITE_TRANSIENT); sqlite3_free(zOut); return; } } sqlite3_result_value(context, argv[0]); } /* ** This function is a no-op if recover handle p already contains an error ** (if p->errCode!=SQLITE_OK). A copy of the error code is returned in ** this case. ** ** Otherwise, attempt to populate temporary table "recovery.schema" with the ** parts of the database schema that can be extracted from the input database. ** ** If no error occurs, SQLITE_OK is returned. Otherwise, an error code ** and error message are left in the recover handle and a copy of the ** error code returned. It is not considered an error if part of all of ** the database schema cannot be recovered due to corruption. */ static int recoverCacheSchema(sqlite3_recover *p){ return recoverExec(p, p->dbOut, "WITH RECURSIVE pages(p) AS (" " SELECT 1" " UNION" " SELECT child FROM sqlite_dbptr('getpage()'), pages WHERE pgno=p" ")" "INSERT INTO recovery.schema SELECT" " max(CASE WHEN field=0 THEN value ELSE NULL END)," " max(CASE WHEN field=1 THEN value ELSE NULL END)," " max(CASE WHEN field=2 THEN value ELSE NULL END)," " max(CASE WHEN field=3 THEN value ELSE NULL END)," " max(CASE WHEN field=4 THEN value ELSE NULL END)" "FROM sqlite_dbdata('getpage()') WHERE pgno IN (" " SELECT p FROM pages" ") GROUP BY pgno, cell" ); } /* ** If this recover handle is not in SQL callback mode (i.e. was not created ** using sqlite3_recover_init_sql()) of if an error has already occurred, ** this function is a no-op. Otherwise, issue a callback with SQL statement ** zSql as the parameter. ** ** If the callback returns non-zero, set the recover handle error code to ** the value returned (so that the caller will abandon processing). */ static void recoverSqlCallback(sqlite3_recover *p, const char *zSql){ if( p->errCode==SQLITE_OK && p->xSql ){ int res = p->xSql(p->pSqlCtx, zSql); if( res ){ recoverError(p, SQLITE_ERROR, "callback returned an error - %d", res); } } } /* ** Transfer the following settings from the input database to the output ** database: ** ** + page-size, ** + auto-vacuum settings, ** + database encoding, ** + user-version (PRAGMA user_version), and ** + application-id (PRAGMA application_id), and */ static void recoverTransferSettings(sqlite3_recover *p){ const char *aPragma[] = { "encoding", "page_size", "auto_vacuum", "user_version", "application_id" }; int ii; /* Truncate the output database to 0 pages in size. This is done by ** opening a new, empty, temp db, then using the backup API to clobber ** any existing output db with a copy of it. */ if( p->errCode==SQLITE_OK ){ sqlite3 *db2 = 0; int rc = sqlite3_open("", &db2); if( rc!=SQLITE_OK ){ recoverDbError(p, db2); return; } for(ii=0; ii<(int)(sizeof(aPragma)/sizeof(aPragma[0])); ii++){ const char *zPrag = aPragma[ii]; sqlite3_stmt *p1 = 0; p1 = recoverPreparePrintf(p, p->dbIn, "PRAGMA %Q.%s", p->zDb, zPrag); if( p->errCode==SQLITE_OK && sqlite3_step(p1)==SQLITE_ROW ){ const char *zArg = (const char*)sqlite3_column_text(p1, 0); char *z2 = recoverMPrintf(p, "PRAGMA %s = %Q", zPrag, zArg); recoverSqlCallback(p, z2); recoverExec(p, db2, z2); sqlite3_free(z2); if( zArg==0 ){ recoverError(p, SQLITE_NOMEM, 0); } } recoverFinalize(p, p1); } recoverExec(p, db2, "CREATE TABLE t1(a); DROP TABLE t1;"); if( p->errCode==SQLITE_OK ){ sqlite3 *db = p->dbOut; sqlite3_backup *pBackup = sqlite3_backup_init(db, "main", db2, "main"); if( pBackup ){ sqlite3_backup_step(pBackup, -1); p->errCode = sqlite3_backup_finish(pBackup); }else{ recoverDbError(p, db); } } sqlite3_close(db2); } } /* ** This function is a no-op if recover handle p already contains an error ** (if p->errCode!=SQLITE_OK). A copy of the error code is returned in ** this case. ** ** Otherwise, an attempt is made to open the output database, attach ** and create the schema of the temporary database used to store ** intermediate data, and to register all required user functions and ** virtual table modules with the output handle. ** ** If no error occurs, SQLITE_OK is returned. Otherwise, an error code ** and error message are left in the recover handle and a copy of the ** error code returned. */ static int recoverOpenOutput(sqlite3_recover *p){ struct Func { const char *zName; int nArg; void (*xFunc)(sqlite3_context*,int,sqlite3_value **); } aFunc[] = { { "getpage", 1, recoverGetPage }, { "page_is_used", 1, recoverPageIsUsed }, { "read_i32", 2, recoverReadI32 }, { "escape_crlf", 1, recoverEscapeCrlf }, }; const int flags = SQLITE_OPEN_URI|SQLITE_OPEN_CREATE|SQLITE_OPEN_READWRITE; sqlite3 *db = 0; /* New database handle */ int ii; /* For iterating through aFunc[] */ assert( p->dbOut==0 ); if( sqlite3_open_v2(p->zUri, &db, flags, 0) ){ recoverDbError(p, db); } /* Register the sqlite_dbdata and sqlite_dbptr virtual table modules. ** These two are registered with the output database handle - this ** module depends on the input handle supporting the sqlite_dbpage ** virtual table only. */ if( p->errCode==SQLITE_OK ){ p->errCode = sqlite3_dbdata_init(db, 0, 0); } /* Register the custom user-functions with the output handle. */ for(ii=0; p->errCode==SQLITE_OK && ii<(int)(sizeof(aFunc)/sizeof(aFunc[0])); ii++){ p->errCode = sqlite3_create_function(db, aFunc[ii].zName, aFunc[ii].nArg, SQLITE_UTF8, (void*)p, aFunc[ii].xFunc, 0, 0 ); } p->dbOut = db; return p->errCode; } /* ** Attach the auxiliary database 'recovery' to the output database handle. ** This temporary database is used during the recovery process and then ** discarded. */ static void recoverOpenRecovery(sqlite3_recover *p){ char *zSql = recoverMPrintf(p, "ATTACH %Q AS recovery;", p->zStateDb); recoverExec(p, p->dbOut, zSql); recoverExec(p, p->dbOut, "PRAGMA writable_schema = 1;" "CREATE TABLE recovery.map(pgno INTEGER PRIMARY KEY, parent INT);" "CREATE TABLE recovery.schema(type, name, tbl_name, rootpage, sql);" ); sqlite3_free(zSql); } /* ** This function is a no-op if recover handle p already contains an error ** (if p->errCode!=SQLITE_OK). ** ** Otherwise, argument zName must be the name of a table that has just been ** created in the output database. This function queries the output db ** for the schema of said table, and creates a RecoverTable object to ** store the schema in memory. The new RecoverTable object is linked into ** the list at sqlite3_recover.pTblList. ** ** Parameter iRoot must be the root page of table zName in the INPUT ** database. */ static void recoverAddTable( sqlite3_recover *p, const char *zName, /* Name of table created in output db */ i64 iRoot /* Root page of same table in INPUT db */ ){ sqlite3_stmt *pStmt = recoverPreparePrintf(p, p->dbOut, "PRAGMA table_xinfo(%Q)", zName ); if( pStmt ){ int iPk = -1; int iBind = 1; RecoverTable *pNew = 0; int nCol = 0; int nName = recoverStrlen(zName); int nByte = 0; while( sqlite3_step(pStmt)==SQLITE_ROW ){ nCol++; nByte += (sqlite3_column_bytes(pStmt, 1)+1); } nByte += sizeof(RecoverTable) + nCol*sizeof(RecoverColumn) + nName+1; recoverReset(p, pStmt); pNew = recoverMalloc(p, nByte); if( pNew ){ int i = 0; int iField = 0; char *csr = 0; pNew->aCol = (RecoverColumn*)&pNew[1]; pNew->zTab = csr = (char*)&pNew->aCol[nCol]; pNew->nCol = nCol; pNew->iRoot = iRoot; memcpy(csr, zName, nName); csr += nName+1; for(i=0; sqlite3_step(pStmt)==SQLITE_ROW; i++){ int iPKF = sqlite3_column_int(pStmt, 5); int n = sqlite3_column_bytes(pStmt, 1); const char *z = (const char*)sqlite3_column_text(pStmt, 1); const char *zType = (const char*)sqlite3_column_text(pStmt, 2); int eHidden = sqlite3_column_int(pStmt, 6); if( iPk==-1 && iPKF==1 && !sqlite3_stricmp("integer", zType) ) iPk = i; if( iPKF>1 ) iPk = -2; pNew->aCol[i].zCol = csr; pNew->aCol[i].eHidden = eHidden; if( eHidden==RECOVER_EHIDDEN_VIRTUAL ){ pNew->aCol[i].iField = -1; }else{ pNew->aCol[i].iField = iField++; } if( eHidden!=RECOVER_EHIDDEN_VIRTUAL && eHidden!=RECOVER_EHIDDEN_STORED ){ pNew->aCol[i].iBind = iBind++; } memcpy(csr, z, n); csr += (n+1); } pNew->pNext = p->pTblList; p->pTblList = pNew; pNew->bIntkey = 1; } recoverFinalize(p, pStmt); pStmt = recoverPreparePrintf(p, p->dbOut, "PRAGMA index_xinfo(%Q)", zName); while( pStmt && sqlite3_step(pStmt)==SQLITE_ROW ){ int iField = sqlite3_column_int(pStmt, 0); int iCol = sqlite3_column_int(pStmt, 1); assert( iColnCol ); pNew->aCol[iCol].iField = iField; pNew->bIntkey = 0; iPk = -2; } recoverFinalize(p, pStmt); if( p->errCode==SQLITE_OK ){ if( iPk>=0 ){ pNew->aCol[iPk].bIPK = 1; }else if( pNew->bIntkey ){ pNew->iRowidBind = iBind++; } } } } /* ** This function is called after recoverCacheSchema() has cached those parts ** of the input database schema that could be recovered in temporary table ** "recovery.schema". This function creates in the output database copies ** of all parts of that schema that must be created before the tables can ** be populated. Specifically, this means: ** ** * all tables that are not VIRTUAL, and ** * UNIQUE indexes. ** ** If the recovery handle uses SQL callbacks, then callbacks containing ** the associated "CREATE TABLE" and "CREATE INDEX" statements are made. ** ** Additionally, records are added to the sqlite_schema table of the ** output database for any VIRTUAL tables. The CREATE VIRTUAL TABLE ** records are written directly to sqlite_schema, not actually executed. ** If the handle is in SQL callback mode, then callbacks are invoked ** with equivalent SQL statements. */ static int recoverWriteSchema1(sqlite3_recover *p){ sqlite3_stmt *pSelect = 0; sqlite3_stmt *pTblname = 0; pSelect = recoverPrepare(p, p->dbOut, "WITH dbschema(rootpage, name, sql, tbl, isVirtual, isIndex) AS (" " SELECT rootpage, name, sql, " " type='table', " " sql LIKE 'create virtual%'," " (type='index' AND (sql LIKE '%unique%' OR ?1))" " FROM recovery.schema" ")" "SELECT rootpage, tbl, isVirtual, name, sql" " FROM dbschema " " WHERE tbl OR isIndex" " ORDER BY tbl DESC, name=='sqlite_sequence' DESC" ); pTblname = recoverPrepare(p, p->dbOut, "SELECT name FROM sqlite_schema " "WHERE type='table' ORDER BY rowid DESC LIMIT 1" ); if( pSelect ){ sqlite3_bind_int(pSelect, 1, p->bSlowIndexes); while( sqlite3_step(pSelect)==SQLITE_ROW ){ i64 iRoot = sqlite3_column_int64(pSelect, 0); int bTable = sqlite3_column_int(pSelect, 1); int bVirtual = sqlite3_column_int(pSelect, 2); const char *zName = (const char*)sqlite3_column_text(pSelect, 3); const char *zSql = (const char*)sqlite3_column_text(pSelect, 4); char *zFree = 0; int rc = SQLITE_OK; if( bVirtual ){ zSql = (const char*)(zFree = recoverMPrintf(p, "INSERT INTO sqlite_schema VALUES('table', %Q, %Q, 0, %Q)", zName, zName, zSql )); } rc = sqlite3_exec(p->dbOut, zSql, 0, 0, 0); if( rc==SQLITE_OK ){ recoverSqlCallback(p, zSql); if( bTable && !bVirtual ){ if( SQLITE_ROW==sqlite3_step(pTblname) ){ const char *zTbl = (const char*)sqlite3_column_text(pTblname, 0); if( zTbl ) recoverAddTable(p, zTbl, iRoot); } recoverReset(p, pTblname); } }else if( rc!=SQLITE_ERROR ){ recoverDbError(p, p->dbOut); } sqlite3_free(zFree); } } recoverFinalize(p, pSelect); recoverFinalize(p, pTblname); return p->errCode; } /* ** This function is called after the output database has been populated. It ** adds all recovered schema elements that were not created in the output ** database by recoverWriteSchema1() - everything except for tables and ** UNIQUE indexes. Specifically: ** ** * views, ** * triggers, ** * non-UNIQUE indexes. ** ** If the recover handle is in SQL callback mode, then equivalent callbacks ** are issued to create the schema elements. */ static int recoverWriteSchema2(sqlite3_recover *p){ sqlite3_stmt *pSelect = 0; pSelect = recoverPrepare(p, p->dbOut, p->bSlowIndexes ? "SELECT rootpage, sql FROM recovery.schema " " WHERE type!='table' AND type!='index'" : "SELECT rootpage, sql FROM recovery.schema " " WHERE type!='table' AND (type!='index' OR sql NOT LIKE '%unique%')" ); if( pSelect ){ while( sqlite3_step(pSelect)==SQLITE_ROW ){ const char *zSql = (const char*)sqlite3_column_text(pSelect, 1); int rc = sqlite3_exec(p->dbOut, zSql, 0, 0, 0); if( rc==SQLITE_OK ){ recoverSqlCallback(p, zSql); }else if( rc!=SQLITE_ERROR ){ recoverDbError(p, p->dbOut); } } } recoverFinalize(p, pSelect); return p->errCode; } /* ** This function is a no-op if recover handle p already contains an error ** (if p->errCode!=SQLITE_OK). In this case it returns NULL. ** ** Otherwise, if the recover handle is configured to create an output ** database (was created by sqlite3_recover_init()), then this function ** prepares and returns an SQL statement to INSERT a new record into table ** pTab, assuming the first nField fields of a record extracted from disk ** are valid. ** ** For example, if table pTab is: ** ** CREATE TABLE name(a, b GENERATED ALWAYS AS (a+1) STORED, c, d, e); ** ** And nField is 4, then the SQL statement prepared and returned is: ** ** INSERT INTO (a, c, d) VALUES (?1, ?2, ?3); ** ** In this case even though 4 values were extracted from the input db, ** only 3 are written to the output, as the generated STORED column ** cannot be written. ** ** If the recover handle is in SQL callback mode, then the SQL statement ** prepared is such that evaluating it returns a single row containing ** a single text value - itself an SQL statement similar to the above, ** except with SQL literals in place of the variables. For example: ** ** SELECT 'INSERT INTO (a, c, d) VALUES (' ** || quote(?1) || ', ' ** || quote(?2) || ', ' ** || quote(?3) || ')'; ** ** In either case, it is the responsibility of the caller to eventually ** free the statement handle using sqlite3_finalize(). */ static sqlite3_stmt *recoverInsertStmt( sqlite3_recover *p, RecoverTable *pTab, int nField ){ sqlite3_stmt *pRet = 0; const char *zSep = ""; const char *zSqlSep = ""; char *zSql = 0; char *zFinal = 0; char *zBind = 0; int ii; int bSql = p->xSql ? 1 : 0; if( nField<=0 ) return 0; assert( nField<=pTab->nCol ); zSql = recoverMPrintf(p, "INSERT OR IGNORE INTO %Q(", pTab->zTab); if( pTab->iRowidBind ){ assert( pTab->bIntkey ); zSql = recoverMPrintf(p, "%z_rowid_", zSql); if( bSql ){ zBind = recoverMPrintf(p, "%zquote(?%d)", zBind, pTab->iRowidBind); }else{ zBind = recoverMPrintf(p, "%z?%d", zBind, pTab->iRowidBind); } zSqlSep = "||', '||"; zSep = ", "; } for(ii=0; iiaCol[ii].eHidden; if( eHidden!=RECOVER_EHIDDEN_VIRTUAL && eHidden!=RECOVER_EHIDDEN_STORED ){ assert( pTab->aCol[ii].iField>=0 && pTab->aCol[ii].iBind>=1 ); zSql = recoverMPrintf(p, "%z%s%Q", zSql, zSep, pTab->aCol[ii].zCol); if( bSql ){ zBind = recoverMPrintf(p, "%z%sescape_crlf(quote(?%d))", zBind, zSqlSep, pTab->aCol[ii].iBind ); zSqlSep = "||', '||"; }else{ zBind = recoverMPrintf(p, "%z%s?%d", zBind, zSep, pTab->aCol[ii].iBind); } zSep = ", "; } } if( bSql ){ zFinal = recoverMPrintf(p, "SELECT %Q || ') VALUES (' || %s || ')'", zSql, zBind ); }else{ zFinal = recoverMPrintf(p, "%s) VALUES (%s)", zSql, zBind); } pRet = recoverPrepare(p, p->dbOut, zFinal); sqlite3_free(zSql); sqlite3_free(zBind); sqlite3_free(zFinal); return pRet; } /* ** Search the list of RecoverTable objects at p->pTblList for one that ** has root page iRoot in the input database. If such an object is found, ** return a pointer to it. Otherwise, return NULL. */ static RecoverTable *recoverFindTable(sqlite3_recover *p, u32 iRoot){ RecoverTable *pRet = 0; for(pRet=p->pTblList; pRet && pRet->iRoot!=iRoot; pRet=pRet->pNext); return pRet; } /* ** This function attempts to create a lost and found table within the ** output db. If successful, it returns a pointer to a buffer containing ** the name of the new table. It is the responsibility of the caller to ** eventually free this buffer using sqlite3_free(). ** ** If an error occurs, NULL is returned and an error code and error ** message left in the recover handle. */ static char *recoverLostAndFoundCreate( sqlite3_recover *p, /* Recover object */ int nField /* Number of column fields in new table */ ){ char *zTbl = 0; sqlite3_stmt *pProbe = 0; int ii = 0; pProbe = recoverPrepare(p, p->dbOut, "SELECT 1 FROM sqlite_schema WHERE name=?" ); for(ii=-1; zTbl==0 && p->errCode==SQLITE_OK && ii<1000; ii++){ int bFail = 0; if( ii<0 ){ zTbl = recoverMPrintf(p, "%s", p->zLostAndFound); }else{ zTbl = recoverMPrintf(p, "%s_%d", p->zLostAndFound, ii); } if( p->errCode==SQLITE_OK ){ sqlite3_bind_text(pProbe, 1, zTbl, -1, SQLITE_STATIC); if( SQLITE_ROW==sqlite3_step(pProbe) ){ bFail = 1; } recoverReset(p, pProbe); } if( bFail ){ sqlite3_clear_bindings(pProbe); sqlite3_free(zTbl); zTbl = 0; } } recoverFinalize(p, pProbe); if( zTbl ){ const char *zSep = 0; char *zField = 0; char *zSql = 0; zSep = "rootpgno INTEGER, pgno INTEGER, nfield INTEGER, id INTEGER, "; for(ii=0; p->errCode==SQLITE_OK && iidbOut, zSql); recoverSqlCallback(p, zSql); sqlite3_free(zSql); }else if( p->errCode==SQLITE_OK ){ recoverError( p, SQLITE_ERROR, "failed to create %s output table", p->zLostAndFound ); } return zTbl; } /* ** Synthesize and prepare an INSERT statement to write to the lost_and_found ** table in the output database. The name of the table is zTab, and it has ** nField c* fields. */ static sqlite3_stmt *recoverLostAndFoundInsert( sqlite3_recover *p, const char *zTab, int nField ){ int nTotal = nField + 4; int ii; char *zBind = 0; sqlite3_stmt *pRet = 0; if( p->xSql==0 ){ for(ii=0; iidbOut, "INSERT INTO %s VALUES(%s)", zTab, zBind ); }else{ const char *zSep = ""; for(ii=0; iidbOut, "SELECT 'INSERT INTO %s VALUES(' || %s || ')'", zTab, zBind ); } sqlite3_free(zBind); return pRet; } /* ** Input database page iPg contains data that will be written to the ** lost-and-found table of the output database. This function attempts ** to identify the root page of the tree that page iPg belonged to. ** If successful, it sets output variable (*piRoot) to the page number ** of the root page and returns SQLITE_OK. Otherwise, if an error occurs, ** an SQLite error code is returned and the final value of *piRoot ** undefined. */ static int recoverLostAndFoundFindRoot( sqlite3_recover *p, i64 iPg, i64 *piRoot ){ RecoverStateLAF *pLaf = &p->laf; if( pLaf->pFindRoot==0 ){ pLaf->pFindRoot = recoverPrepare(p, p->dbOut, "WITH RECURSIVE p(pgno) AS (" " SELECT ?" " UNION" " SELECT parent FROM recovery.map AS m, p WHERE m.pgno=p.pgno" ") " "SELECT p.pgno FROM p, recovery.map m WHERE m.pgno=p.pgno " " AND m.parent IS NULL" ); } if( p->errCode==SQLITE_OK ){ sqlite3_bind_int64(pLaf->pFindRoot, 1, iPg); if( sqlite3_step(pLaf->pFindRoot)==SQLITE_ROW ){ *piRoot = sqlite3_column_int64(pLaf->pFindRoot, 0); }else{ *piRoot = iPg; } recoverReset(p, pLaf->pFindRoot); } return p->errCode; } /* ** Recover data from page iPage of the input database and write it to ** the lost-and-found table in the output database. */ static void recoverLostAndFoundOnePage(sqlite3_recover *p, i64 iPage){ RecoverStateLAF *pLaf = &p->laf; sqlite3_value **apVal = pLaf->apVal; sqlite3_stmt *pPageData = pLaf->pPageData; sqlite3_stmt *pInsert = pLaf->pInsert; int nVal = -1; int iPrevCell = 0; i64 iRoot = 0; int bHaveRowid = 0; i64 iRowid = 0; int ii = 0; if( recoverLostAndFoundFindRoot(p, iPage, &iRoot) ) return; sqlite3_bind_int64(pPageData, 1, iPage); while( p->errCode==SQLITE_OK && SQLITE_ROW==sqlite3_step(pPageData) ){ int iCell = sqlite3_column_int64(pPageData, 0); int iField = sqlite3_column_int64(pPageData, 1); if( iPrevCell!=iCell && nVal>=0 ){ /* Insert the new row */ sqlite3_bind_int64(pInsert, 1, iRoot); /* rootpgno */ sqlite3_bind_int64(pInsert, 2, iPage); /* pgno */ sqlite3_bind_int(pInsert, 3, nVal); /* nfield */ if( bHaveRowid ){ sqlite3_bind_int64(pInsert, 4, iRowid); /* id */ } for(ii=0; iinMaxField ){ sqlite3_value *pVal = sqlite3_column_value(pPageData, 2); apVal[iField] = sqlite3_value_dup(pVal); assert( iField==nVal || (nVal==-1 && iField==0) ); nVal = iField+1; if( apVal[iField]==0 ){ recoverError(p, SQLITE_NOMEM, 0); } } iPrevCell = iCell; } recoverReset(p, pPageData); for(ii=0; iilaf; if( p->errCode==SQLITE_OK ){ if( pLaf->pInsert==0 ){ return SQLITE_DONE; }else{ if( p->errCode==SQLITE_OK ){ int res = sqlite3_step(pLaf->pAllPage); if( res==SQLITE_ROW ){ i64 iPage = sqlite3_column_int64(pLaf->pAllPage, 0); if( recoverBitmapQuery(pLaf->pUsed, iPage)==0 ){ recoverLostAndFoundOnePage(p, iPage); } }else{ recoverReset(p, pLaf->pAllPage); return SQLITE_DONE; } } } } return SQLITE_OK; } /* ** Initialize resources required in RECOVER_STATE_LOSTANDFOUND3 ** state - during which the lost-and-found table of the output database ** is populated with recovered data that can not be assigned to any ** recovered schema object. */ static void recoverLostAndFound3Init(sqlite3_recover *p){ RecoverStateLAF *pLaf = &p->laf; if( pLaf->nMaxField>0 ){ char *zTab = 0; /* Name of lost_and_found table */ zTab = recoverLostAndFoundCreate(p, pLaf->nMaxField); pLaf->pInsert = recoverLostAndFoundInsert(p, zTab, pLaf->nMaxField); sqlite3_free(zTab); pLaf->pAllPage = recoverPreparePrintf(p, p->dbOut, "WITH RECURSIVE seq(ii) AS (" " SELECT 1 UNION ALL SELECT ii+1 FROM seq WHERE ii<%lld" ")" "SELECT ii FROM seq" , p->laf.nPg ); pLaf->pPageData = recoverPrepare(p, p->dbOut, "SELECT cell, field, value " "FROM sqlite_dbdata('getpage()') d WHERE d.pgno=? " "UNION ALL " "SELECT -1, -1, -1" ); pLaf->apVal = (sqlite3_value**)recoverMalloc(p, pLaf->nMaxField*sizeof(sqlite3_value*) ); } } /* ** Initialize resources required in RECOVER_STATE_WRITING state - during which ** tables recovered from the schema of the input database are populated with ** recovered data. */ static int recoverWriteDataInit(sqlite3_recover *p){ RecoverStateW1 *p1 = &p->w1; RecoverTable *pTbl = 0; int nByte = 0; /* Figure out the maximum number of columns for any table in the schema */ assert( p1->nMax==0 ); for(pTbl=p->pTblList; pTbl; pTbl=pTbl->pNext){ if( pTbl->nCol>p1->nMax ) p1->nMax = pTbl->nCol; } /* Allocate an array of (sqlite3_value*) in which to accumulate the values ** that will be written to the output database in a single row. */ nByte = sizeof(sqlite3_value*) * (p1->nMax+1); p1->apVal = (sqlite3_value**)recoverMalloc(p, nByte); if( p1->apVal==0 ) return p->errCode; /* Prepare the SELECT to loop through schema tables (pTbls) and the SELECT ** to loop through cells that appear to belong to a single table (pSel). */ p1->pTbls = recoverPrepare(p, p->dbOut, "SELECT rootpage FROM recovery.schema " " WHERE type='table' AND (sql NOT LIKE 'create virtual%')" " ORDER BY (tbl_name='sqlite_sequence') ASC" ); p1->pSel = recoverPrepare(p, p->dbOut, "WITH RECURSIVE pages(page) AS (" " SELECT ?1" " UNION" " SELECT child FROM sqlite_dbptr('getpage()'), pages " " WHERE pgno=page" ") " "SELECT page, cell, field, value " "FROM sqlite_dbdata('getpage()') d, pages p WHERE p.page=d.pgno " "UNION ALL " "SELECT 0, 0, 0, 0" ); return p->errCode; } /* ** Clean up resources allocated by recoverWriteDataInit() (stuff in ** sqlite3_recover.w1). */ static void recoverWriteDataCleanup(sqlite3_recover *p){ RecoverStateW1 *p1 = &p->w1; int ii; for(ii=0; iinVal; ii++){ sqlite3_value_free(p1->apVal[ii]); } sqlite3_free(p1->apVal); recoverFinalize(p, p1->pInsert); recoverFinalize(p, p1->pTbls); recoverFinalize(p, p1->pSel); memset(p1, 0, sizeof(*p1)); } /* ** Perform one step (sqlite3_recover_step()) of work for the connection ** passed as the only argument, which is guaranteed to be in ** RECOVER_STATE_WRITING state - during which tables recovered from the ** schema of the input database are populated with recovered data. */ static int recoverWriteDataStep(sqlite3_recover *p){ RecoverStateW1 *p1 = &p->w1; sqlite3_stmt *pSel = p1->pSel; sqlite3_value **apVal = p1->apVal; if( p->errCode==SQLITE_OK && p1->pTab==0 ){ if( sqlite3_step(p1->pTbls)==SQLITE_ROW ){ i64 iRoot = sqlite3_column_int64(p1->pTbls, 0); p1->pTab = recoverFindTable(p, iRoot); recoverFinalize(p, p1->pInsert); p1->pInsert = 0; /* If this table is unknown, return early. The caller will invoke this ** function again and it will move on to the next table. */ if( p1->pTab==0 ) return p->errCode; /* If this is the sqlite_sequence table, delete any rows added by ** earlier INSERT statements on tables with AUTOINCREMENT primary ** keys before recovering its contents. The p1->pTbls SELECT statement ** is rigged to deliver "sqlite_sequence" last of all, so we don't ** worry about it being modified after it is recovered. */ if( sqlite3_stricmp("sqlite_sequence", p1->pTab->zTab)==0 ){ recoverExec(p, p->dbOut, "DELETE FROM sqlite_sequence"); recoverSqlCallback(p, "DELETE FROM sqlite_sequence"); } /* Bind the root page of this table within the original database to ** SELECT statement p1->pSel. The SELECT statement will then iterate ** through cells that look like they belong to table pTab. */ sqlite3_bind_int64(pSel, 1, iRoot); p1->nVal = 0; p1->bHaveRowid = 0; p1->iPrevPage = -1; p1->iPrevCell = -1; }else{ return SQLITE_DONE; } } assert( p->errCode!=SQLITE_OK || p1->pTab ); if( p->errCode==SQLITE_OK && sqlite3_step(pSel)==SQLITE_ROW ){ RecoverTable *pTab = p1->pTab; i64 iPage = sqlite3_column_int64(pSel, 0); int iCell = sqlite3_column_int(pSel, 1); int iField = sqlite3_column_int(pSel, 2); sqlite3_value *pVal = sqlite3_column_value(pSel, 3); int bNewCell = (p1->iPrevPage!=iPage || p1->iPrevCell!=iCell); assert( bNewCell==0 || (iField==-1 || iField==0) ); assert( bNewCell || iField==p1->nVal || p1->nVal==pTab->nCol ); if( bNewCell ){ int ii = 0; if( p1->nVal>=0 ){ if( p1->pInsert==0 || p1->nVal!=p1->nInsert ){ recoverFinalize(p, p1->pInsert); p1->pInsert = recoverInsertStmt(p, pTab, p1->nVal); p1->nInsert = p1->nVal; } if( p1->nVal>0 ){ sqlite3_stmt *pInsert = p1->pInsert; for(ii=0; iinCol; ii++){ RecoverColumn *pCol = &pTab->aCol[ii]; int iBind = pCol->iBind; if( iBind>0 ){ if( pCol->bIPK ){ sqlite3_bind_int64(pInsert, iBind, p1->iRowid); }else if( pCol->iFieldnVal ){ recoverBindValue(p, pInsert, iBind, apVal[pCol->iField]); } } } if( p->bRecoverRowid && pTab->iRowidBind>0 && p1->bHaveRowid ){ sqlite3_bind_int64(pInsert, pTab->iRowidBind, p1->iRowid); } if( SQLITE_ROW==sqlite3_step(pInsert) ){ const char *z = (const char*)sqlite3_column_text(pInsert, 0); recoverSqlCallback(p, z); } recoverReset(p, pInsert); assert( p->errCode || pInsert ); if( pInsert ) sqlite3_clear_bindings(pInsert); } } for(ii=0; iinVal; ii++){ sqlite3_value_free(apVal[ii]); apVal[ii] = 0; } p1->nVal = -1; p1->bHaveRowid = 0; } if( iPage!=0 ){ if( iField<0 ){ p1->iRowid = sqlite3_column_int64(pSel, 3); assert( p1->nVal==-1 ); p1->nVal = 0; p1->bHaveRowid = 1; }else if( iFieldnCol ){ assert( apVal[iField]==0 ); apVal[iField] = sqlite3_value_dup( pVal ); if( apVal[iField]==0 ){ recoverError(p, SQLITE_NOMEM, 0); } p1->nVal = iField+1; }else if( pTab->nCol==0 ){ p1->nVal = pTab->nCol; } p1->iPrevCell = iCell; p1->iPrevPage = iPage; } }else{ recoverReset(p, pSel); p1->pTab = 0; } return p->errCode; } /* ** Initialize resources required by sqlite3_recover_step() in ** RECOVER_STATE_LOSTANDFOUND1 state - during which the set of pages not ** already allocated to a recovered schema element is determined. */ static void recoverLostAndFound1Init(sqlite3_recover *p){ RecoverStateLAF *pLaf = &p->laf; sqlite3_stmt *pStmt = 0; assert( p->laf.pUsed==0 ); pLaf->nPg = recoverPageCount(p); pLaf->pUsed = recoverBitmapAlloc(p, pLaf->nPg); /* Prepare a statement to iterate through all pages that are part of any tree ** in the recoverable part of the input database schema to the bitmap. And, ** if !p->bFreelistCorrupt, add all pages that appear to be part of the ** freelist. */ pStmt = recoverPrepare( p, p->dbOut, "WITH trunk(pgno) AS (" " SELECT read_i32(getpage(1), 8) AS x WHERE x>0" " UNION" " SELECT read_i32(getpage(trunk.pgno), 0) AS x FROM trunk WHERE x>0" ")," "trunkdata(pgno, data) AS (" " SELECT pgno, getpage(pgno) FROM trunk" ")," "freelist(data, n, freepgno) AS (" " SELECT data, min(16384, read_i32(data, 1)-1), pgno FROM trunkdata" " UNION ALL" " SELECT data, n-1, read_i32(data, 2+n) FROM freelist WHERE n>=0" ")," "" "roots(r) AS (" " SELECT 1 UNION ALL" " SELECT rootpage FROM recovery.schema WHERE rootpage>0" ")," "used(page) AS (" " SELECT r FROM roots" " UNION" " SELECT child FROM sqlite_dbptr('getpage()'), used " " WHERE pgno=page" ") " "SELECT page FROM used" " UNION ALL " "SELECT freepgno FROM freelist WHERE NOT ?" ); if( pStmt ) sqlite3_bind_int(pStmt, 1, p->bFreelistCorrupt); pLaf->pUsedPages = pStmt; } /* ** Perform one step (sqlite3_recover_step()) of work for the connection ** passed as the only argument, which is guaranteed to be in ** RECOVER_STATE_LOSTANDFOUND1 state - during which the set of pages not ** already allocated to a recovered schema element is determined. */ static int recoverLostAndFound1Step(sqlite3_recover *p){ RecoverStateLAF *pLaf = &p->laf; int rc = p->errCode; if( rc==SQLITE_OK ){ rc = sqlite3_step(pLaf->pUsedPages); if( rc==SQLITE_ROW ){ i64 iPg = sqlite3_column_int64(pLaf->pUsedPages, 0); recoverBitmapSet(pLaf->pUsed, iPg); rc = SQLITE_OK; }else{ recoverFinalize(p, pLaf->pUsedPages); pLaf->pUsedPages = 0; } } return rc; } /* ** Initialize resources required by RECOVER_STATE_LOSTANDFOUND2 ** state - during which the pages identified in RECOVER_STATE_LOSTANDFOUND1 ** are sorted into sets that likely belonged to the same database tree. */ static void recoverLostAndFound2Init(sqlite3_recover *p){ RecoverStateLAF *pLaf = &p->laf; assert( p->laf.pAllAndParent==0 ); assert( p->laf.pMapInsert==0 ); assert( p->laf.pMaxField==0 ); assert( p->laf.nMaxField==0 ); pLaf->pMapInsert = recoverPrepare(p, p->dbOut, "INSERT OR IGNORE INTO recovery.map(pgno, parent) VALUES(?, ?)" ); pLaf->pAllAndParent = recoverPreparePrintf(p, p->dbOut, "WITH RECURSIVE seq(ii) AS (" " SELECT 1 UNION ALL SELECT ii+1 FROM seq WHERE ii<%lld" ")" "SELECT pgno, child FROM sqlite_dbptr('getpage()') " " UNION ALL " "SELECT NULL, ii FROM seq", p->laf.nPg ); pLaf->pMaxField = recoverPreparePrintf(p, p->dbOut, "SELECT max(field)+1 FROM sqlite_dbdata('getpage') WHERE pgno = ?" ); } /* ** Perform one step (sqlite3_recover_step()) of work for the connection ** passed as the only argument, which is guaranteed to be in ** RECOVER_STATE_LOSTANDFOUND2 state - during which the pages identified ** in RECOVER_STATE_LOSTANDFOUND1 are sorted into sets that likely belonged ** to the same database tree. */ static int recoverLostAndFound2Step(sqlite3_recover *p){ RecoverStateLAF *pLaf = &p->laf; if( p->errCode==SQLITE_OK ){ int res = sqlite3_step(pLaf->pAllAndParent); if( res==SQLITE_ROW ){ i64 iChild = sqlite3_column_int(pLaf->pAllAndParent, 1); if( recoverBitmapQuery(pLaf->pUsed, iChild)==0 ){ sqlite3_bind_int64(pLaf->pMapInsert, 1, iChild); sqlite3_bind_value(pLaf->pMapInsert, 2, sqlite3_column_value(pLaf->pAllAndParent, 0) ); sqlite3_step(pLaf->pMapInsert); recoverReset(p, pLaf->pMapInsert); sqlite3_bind_int64(pLaf->pMaxField, 1, iChild); if( SQLITE_ROW==sqlite3_step(pLaf->pMaxField) ){ int nMax = sqlite3_column_int(pLaf->pMaxField, 0); if( nMax>pLaf->nMaxField ) pLaf->nMaxField = nMax; } recoverReset(p, pLaf->pMaxField); } }else{ recoverFinalize(p, pLaf->pAllAndParent); pLaf->pAllAndParent =0; return SQLITE_DONE; } } return p->errCode; } /* ** Free all resources allocated as part of sqlite3_recover_step() calls ** in one of the RECOVER_STATE_LOSTANDFOUND[123] states. */ static void recoverLostAndFoundCleanup(sqlite3_recover *p){ recoverBitmapFree(p->laf.pUsed); p->laf.pUsed = 0; sqlite3_finalize(p->laf.pUsedPages); sqlite3_finalize(p->laf.pAllAndParent); sqlite3_finalize(p->laf.pMapInsert); sqlite3_finalize(p->laf.pMaxField); sqlite3_finalize(p->laf.pFindRoot); sqlite3_finalize(p->laf.pInsert); sqlite3_finalize(p->laf.pAllPage); sqlite3_finalize(p->laf.pPageData); p->laf.pUsedPages = 0; p->laf.pAllAndParent = 0; p->laf.pMapInsert = 0; p->laf.pMaxField = 0; p->laf.pFindRoot = 0; p->laf.pInsert = 0; p->laf.pAllPage = 0; p->laf.pPageData = 0; sqlite3_free(p->laf.apVal); p->laf.apVal = 0; } /* ** Free all resources allocated as part of sqlite3_recover_step() calls. */ static void recoverFinalCleanup(sqlite3_recover *p){ RecoverTable *pTab = 0; RecoverTable *pNext = 0; recoverWriteDataCleanup(p); recoverLostAndFoundCleanup(p); for(pTab=p->pTblList; pTab; pTab=pNext){ pNext = pTab->pNext; sqlite3_free(pTab); } p->pTblList = 0; sqlite3_finalize(p->pGetPage); p->pGetPage = 0; sqlite3_file_control(p->dbIn, p->zDb, SQLITE_FCNTL_RESET_CACHE, 0); { #ifndef NDEBUG int res = #endif sqlite3_close(p->dbOut); assert( res==SQLITE_OK ); } p->dbOut = 0; } /* ** Decode and return an unsigned 16-bit big-endian integer value from ** buffer a[]. */ static u32 recoverGetU16(const u8 *a){ return (((u32)a[0])<<8) + ((u32)a[1]); } /* ** Decode and return an unsigned 32-bit big-endian integer value from ** buffer a[]. */ static u32 recoverGetU32(const u8 *a){ return (((u32)a[0])<<24) + (((u32)a[1])<<16) + (((u32)a[2])<<8) + ((u32)a[3]); } /* ** Decode an SQLite varint from buffer a[]. Write the decoded value to (*pVal) ** and return the number of bytes consumed. */ static int recoverGetVarint(const u8 *a, i64 *pVal){ sqlite3_uint64 u = 0; int i; for(i=0; i<8; i++){ u = (u<<7) + (a[i]&0x7f); if( (a[i]&0x80)==0 ){ *pVal = (sqlite3_int64)u; return i+1; } } u = (u<<8) + (a[i]&0xff); *pVal = (sqlite3_int64)u; return 9; } /* ** The second argument points to a buffer n bytes in size. If this buffer ** or a prefix thereof appears to contain a well-formed SQLite b-tree page, ** return the page-size in bytes. Otherwise, if the buffer does not ** appear to contain a well-formed b-tree page, return 0. */ static int recoverIsValidPage(u8 *aTmp, const u8 *a, int n){ u8 *aUsed = aTmp; int nFrag = 0; int nActual = 0; int iFree = 0; int nCell = 0; /* Number of cells on page */ int iCellOff = 0; /* Offset of cell array in page */ int iContent = 0; int eType = 0; int ii = 0; eType = (int)a[0]; if( eType!=0x02 && eType!=0x05 && eType!=0x0A && eType!=0x0D ) return 0; iFree = (int)recoverGetU16(&a[1]); nCell = (int)recoverGetU16(&a[3]); iContent = (int)recoverGetU16(&a[5]); if( iContent==0 ) iContent = 65536; nFrag = (int)a[7]; if( iContent>n ) return 0; memset(aUsed, 0, n); memset(aUsed, 0xFF, iContent); /* Follow the free-list. This is the same format for all b-tree pages. */ if( iFree && iFree<=iContent ) return 0; while( iFree ){ int iNext = 0; int nByte = 0; if( iFree>(n-4) ) return 0; iNext = recoverGetU16(&a[iFree]); nByte = recoverGetU16(&a[iFree+2]); if( iFree+nByte>n || nByte<4 ) return 0; if( iNext && iNextiContent ) return 0; for(ii=0; iin ){ return 0; } if( eType==0x05 || eType==0x02 ) nByte += 4; nByte += recoverGetVarint(&a[iOff+nByte], &nPayload); if( eType==0x0D ){ i64 dummy = 0; nByte += recoverGetVarint(&a[iOff+nByte], &dummy); } if( eType!=0x05 ){ int X = (eType==0x0D) ? n-35 : (((n-12)*64/255)-23); int M = ((n-12)*32/255)-23; int K = M+((nPayload-M)%(n-4)); if( nPayloadn ){ return 0; } for(iByte=iOff; iByte<(iOff+nByte); iByte++){ if( aUsed[iByte]!=0 ){ return 0; } aUsed[iByte] = 0xFF; } } nActual = 0; for(ii=0; iipMethods!=&recover_methods ); return pFd->pMethods->xClose(pFd); } /* ** Write value v to buffer a[] as a 16-bit big-endian unsigned integer. */ static void recoverPutU16(u8 *a, u32 v){ a[0] = (v>>8) & 0x00FF; a[1] = (v>>0) & 0x00FF; } /* ** Write value v to buffer a[] as a 32-bit big-endian unsigned integer. */ static void recoverPutU32(u8 *a, u32 v){ a[0] = (v>>24) & 0x00FF; a[1] = (v>>16) & 0x00FF; a[2] = (v>>8) & 0x00FF; a[3] = (v>>0) & 0x00FF; } /* ** Detect the page-size of the database opened by file-handle pFd by ** searching the first part of the file for a well-formed SQLite b-tree ** page. If parameter nReserve is non-zero, then as well as searching for ** a b-tree page with zero reserved bytes, this function searches for one ** with nReserve reserved bytes at the end of it. ** ** If successful, set variable p->detected_pgsz to the detected page-size ** in bytes and return SQLITE_OK. Or, if no error occurs but no valid page ** can be found, return SQLITE_OK but leave p->detected_pgsz set to 0. Or, ** if an error occurs (e.g. an IO or OOM error), then an SQLite error code ** is returned. The final value of p->detected_pgsz is undefined in this ** case. */ static int recoverVfsDetectPagesize( sqlite3_recover *p, /* Recover handle */ sqlite3_file *pFd, /* File-handle open on input database */ u32 nReserve, /* Possible nReserve value */ i64 nSz /* Size of database file in bytes */ ){ int rc = SQLITE_OK; const int nMin = 512; const int nMax = 65536; const int nMaxBlk = 4; u32 pgsz = 0; int iBlk = 0; u8 *aPg = 0; u8 *aTmp = 0; int nBlk = 0; aPg = (u8*)sqlite3_malloc(2*nMax); if( aPg==0 ) return SQLITE_NOMEM; aTmp = &aPg[nMax]; nBlk = (nSz+nMax-1)/nMax; if( nBlk>nMaxBlk ) nBlk = nMaxBlk; do { for(iBlk=0; rc==SQLITE_OK && iBlk=((iBlk+1)*nMax)) ? nMax : (nSz % nMax); memset(aPg, 0, nMax); rc = pFd->pMethods->xRead(pFd, aPg, nByte, iBlk*nMax); if( rc==SQLITE_OK ){ int pgsz2; for(pgsz2=(pgsz ? pgsz*2 : nMin); pgsz2<=nMax; pgsz2=pgsz2*2){ int iOff; for(iOff=0; iOff(u32)p->detected_pgsz ){ p->detected_pgsz = pgsz; p->nReserve = nReserve; } if( nReserve==0 ) break; nReserve = 0; }while( 1 ); p->detected_pgsz = pgsz; sqlite3_free(aPg); return rc; } /* ** The xRead() method of the wrapper VFS. This is used to intercept calls ** to read page 1 of the input database. */ static int recoverVfsRead(sqlite3_file *pFd, void *aBuf, int nByte, i64 iOff){ int rc = SQLITE_OK; if( pFd->pMethods==&recover_methods ){ pFd->pMethods = recover_g.pMethods; rc = pFd->pMethods->xRead(pFd, aBuf, nByte, iOff); if( nByte==16 ){ sqlite3_randomness(16, aBuf); }else if( rc==SQLITE_OK && iOff==0 && nByte>=108 ){ /* Ensure that the database has a valid header file. The only fields ** that really matter to recovery are: ** ** + Database page size (16-bits at offset 16) ** + Size of db in pages (32-bits at offset 28) ** + Database encoding (32-bits at offset 56) ** ** Also preserved are: ** ** + first freelist page (32-bits at offset 32) ** + size of freelist (32-bits at offset 36) ** + the wal-mode flags (16-bits at offset 18) ** ** We also try to preserve the auto-vacuum, incr-value, user-version ** and application-id fields - all 32 bit quantities at offsets ** 52, 60, 64 and 68. All other fields are set to known good values. ** ** Byte offset 105 should also contain the page-size as a 16-bit ** integer. */ const int aPreserve[] = {32, 36, 52, 60, 64, 68}; u8 aHdr[108] = { 0x53, 0x51, 0x4c, 0x69, 0x74, 0x65, 0x20, 0x66, 0x6f, 0x72, 0x6d, 0x61, 0x74, 0x20, 0x33, 0x00, 0xFF, 0xFF, 0x01, 0x01, 0x00, 0x40, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x10, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x5b, 0x30, 0x0D, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00 }; u8 *a = (u8*)aBuf; u32 pgsz = recoverGetU16(&a[16]); u32 nReserve = a[20]; u32 enc = recoverGetU32(&a[56]); u32 dbsz = 0; i64 dbFileSize = 0; int ii; sqlite3_recover *p = recover_g.p; if( pgsz==0x01 ) pgsz = 65536; rc = pFd->pMethods->xFileSize(pFd, &dbFileSize); if( rc==SQLITE_OK && p->detected_pgsz==0 ){ rc = recoverVfsDetectPagesize(p, pFd, nReserve, dbFileSize); } if( p->detected_pgsz ){ pgsz = p->detected_pgsz; nReserve = p->nReserve; } if( pgsz ){ dbsz = dbFileSize / pgsz; } if( enc!=SQLITE_UTF8 && enc!=SQLITE_UTF16BE && enc!=SQLITE_UTF16LE ){ enc = SQLITE_UTF8; } sqlite3_free(p->pPage1Cache); p->pPage1Cache = 0; p->pPage1Disk = 0; p->pgsz = nByte; p->pPage1Cache = (u8*)recoverMalloc(p, nByte*2); if( p->pPage1Cache ){ p->pPage1Disk = &p->pPage1Cache[nByte]; memcpy(p->pPage1Disk, aBuf, nByte); aHdr[18] = a[18]; aHdr[19] = a[19]; recoverPutU32(&aHdr[28], dbsz); recoverPutU32(&aHdr[56], enc); recoverPutU16(&aHdr[105], pgsz-nReserve); if( pgsz==65536 ) pgsz = 1; recoverPutU16(&aHdr[16], pgsz); aHdr[20] = nReserve; for(ii=0; ii<(int)(sizeof(aPreserve)/sizeof(aPreserve[0])); ii++){ memcpy(&aHdr[aPreserve[ii]], &a[aPreserve[ii]], 4); } memcpy(aBuf, aHdr, sizeof(aHdr)); memset(&((u8*)aBuf)[sizeof(aHdr)], 0, nByte-sizeof(aHdr)); memcpy(p->pPage1Cache, aBuf, nByte); }else{ rc = p->errCode; } } pFd->pMethods = &recover_methods; }else{ rc = pFd->pMethods->xRead(pFd, aBuf, nByte, iOff); } return rc; } /* ** Used to make sqlite3_io_methods wrapper methods less verbose. */ #define RECOVER_VFS_WRAPPER(code) \ int rc = SQLITE_OK; \ if( pFd->pMethods==&recover_methods ){ \ pFd->pMethods = recover_g.pMethods; \ rc = code; \ pFd->pMethods = &recover_methods; \ }else{ \ rc = code; \ } \ return rc; /* ** Methods of the wrapper VFS. All methods except for xRead() and xClose() ** simply uninstall the sqlite3_io_methods wrapper, invoke the equivalent ** method on the lower level VFS, then reinstall the wrapper before returning. ** Those that return an integer value use the RECOVER_VFS_WRAPPER macro. */ static int recoverVfsWrite( sqlite3_file *pFd, const void *aBuf, int nByte, i64 iOff ){ RECOVER_VFS_WRAPPER ( pFd->pMethods->xWrite(pFd, aBuf, nByte, iOff) ); } static int recoverVfsTruncate(sqlite3_file *pFd, sqlite3_int64 size){ RECOVER_VFS_WRAPPER ( pFd->pMethods->xTruncate(pFd, size) ); } static int recoverVfsSync(sqlite3_file *pFd, int flags){ RECOVER_VFS_WRAPPER ( pFd->pMethods->xSync(pFd, flags) ); } static int recoverVfsFileSize(sqlite3_file *pFd, sqlite3_int64 *pSize){ RECOVER_VFS_WRAPPER ( pFd->pMethods->xFileSize(pFd, pSize) ); } static int recoverVfsLock(sqlite3_file *pFd, int eLock){ RECOVER_VFS_WRAPPER ( pFd->pMethods->xLock(pFd, eLock) ); } static int recoverVfsUnlock(sqlite3_file *pFd, int eLock){ RECOVER_VFS_WRAPPER ( pFd->pMethods->xUnlock(pFd, eLock) ); } static int recoverVfsCheckReservedLock(sqlite3_file *pFd, int *pResOut){ RECOVER_VFS_WRAPPER ( pFd->pMethods->xCheckReservedLock(pFd, pResOut) ); } static int recoverVfsFileControl(sqlite3_file *pFd, int op, void *pArg){ RECOVER_VFS_WRAPPER ( (pFd->pMethods ? pFd->pMethods->xFileControl(pFd, op, pArg) : SQLITE_NOTFOUND) ); } static int recoverVfsSectorSize(sqlite3_file *pFd){ RECOVER_VFS_WRAPPER ( pFd->pMethods->xSectorSize(pFd) ); } static int recoverVfsDeviceCharacteristics(sqlite3_file *pFd){ RECOVER_VFS_WRAPPER ( pFd->pMethods->xDeviceCharacteristics(pFd) ); } static int recoverVfsShmMap( sqlite3_file *pFd, int iPg, int pgsz, int bExtend, void volatile **pp ){ RECOVER_VFS_WRAPPER ( pFd->pMethods->xShmMap(pFd, iPg, pgsz, bExtend, pp) ); } static int recoverVfsShmLock(sqlite3_file *pFd, int offset, int n, int flags){ RECOVER_VFS_WRAPPER ( pFd->pMethods->xShmLock(pFd, offset, n, flags) ); } static void recoverVfsShmBarrier(sqlite3_file *pFd){ if( pFd->pMethods==&recover_methods ){ pFd->pMethods = recover_g.pMethods; pFd->pMethods->xShmBarrier(pFd); pFd->pMethods = &recover_methods; }else{ pFd->pMethods->xShmBarrier(pFd); } } static int recoverVfsShmUnmap(sqlite3_file *pFd, int deleteFlag){ RECOVER_VFS_WRAPPER ( pFd->pMethods->xShmUnmap(pFd, deleteFlag) ); } static int recoverVfsFetch( sqlite3_file *pFd, sqlite3_int64 iOff, int iAmt, void **pp ){ (void)pFd; (void)iOff; (void)iAmt; *pp = 0; return SQLITE_OK; } static int recoverVfsUnfetch(sqlite3_file *pFd, sqlite3_int64 iOff, void *p){ (void)pFd; (void)iOff; (void)p; return SQLITE_OK; } /* ** Install the VFS wrapper around the file-descriptor open on the input ** database for recover handle p. Mutex RECOVER_MUTEX_ID must be held ** when this function is called. */ static void recoverInstallWrapper(sqlite3_recover *p){ sqlite3_file *pFd = 0; assert( recover_g.pMethods==0 ); recoverAssertMutexHeld(); sqlite3_file_control(p->dbIn, p->zDb, SQLITE_FCNTL_FILE_POINTER, (void*)&pFd); assert( pFd==0 || pFd->pMethods!=&recover_methods ); if( pFd && pFd->pMethods ){ int iVersion = 1 + (pFd->pMethods->iVersion>1 && pFd->pMethods->xShmMap!=0); recover_g.pMethods = pFd->pMethods; recover_g.p = p; recover_methods.iVersion = iVersion; pFd->pMethods = &recover_methods; } } /* ** Uninstall the VFS wrapper that was installed around the file-descriptor open ** on the input database for recover handle p. Mutex RECOVER_MUTEX_ID must be ** held when this function is called. */ static void recoverUninstallWrapper(sqlite3_recover *p){ sqlite3_file *pFd = 0; recoverAssertMutexHeld(); sqlite3_file_control(p->dbIn, p->zDb,SQLITE_FCNTL_FILE_POINTER,(void*)&pFd); if( pFd && pFd->pMethods ){ pFd->pMethods = recover_g.pMethods; recover_g.pMethods = 0; recover_g.p = 0; } } /* ** This function does the work of a single sqlite3_recover_step() call. It ** is guaranteed that the handle is not in an error state when this ** function is called. */ static void recoverStep(sqlite3_recover *p){ assert( p && p->errCode==SQLITE_OK ); switch( p->eState ){ case RECOVER_STATE_INIT: { int bUseWrapper = 1; /* This is the very first call to sqlite3_recover_step() on this object. */ recoverSqlCallback(p, "BEGIN"); recoverSqlCallback(p, "PRAGMA writable_schema = on"); recoverSqlCallback(p, "PRAGMA foreign_keys = off"); recoverEnterMutex(); /* Open the output database. And register required virtual tables and ** user functions with the new handle. */ recoverOpenOutput(p); /* Attempt to open a transaction and read page 1 of the input database. ** Two attempts may be made - one with a wrapper installed to ensure ** that the database header is sane, and then if that attempt returns ** SQLITE_NOTADB, then again with no wrapper. The second attempt is ** required for encrypted databases. */ if( p->errCode==SQLITE_OK ){ do{ p->errCode = SQLITE_OK; if( bUseWrapper ) recoverInstallWrapper(p); /* Open a transaction on the input database. */ sqlite3_file_control(p->dbIn, p->zDb, SQLITE_FCNTL_RESET_CACHE, 0); recoverExec(p, p->dbIn, "PRAGMA writable_schema = on"); recoverExec(p, p->dbIn, "BEGIN"); if( p->errCode==SQLITE_OK ) p->bCloseTransaction = 1; recoverExec(p, p->dbIn, "SELECT 1 FROM sqlite_schema"); recoverTransferSettings(p); recoverOpenRecovery(p); recoverCacheSchema(p); if( bUseWrapper ) recoverUninstallWrapper(p); }while( p->errCode==SQLITE_NOTADB && (bUseWrapper--) && SQLITE_OK==sqlite3_exec(p->dbIn, "ROLLBACK", 0, 0, 0) ); } recoverLeaveMutex(); recoverExec(p, p->dbOut, "BEGIN"); recoverWriteSchema1(p); p->eState = RECOVER_STATE_WRITING; break; } case RECOVER_STATE_WRITING: { if( p->w1.pTbls==0 ){ recoverWriteDataInit(p); } if( SQLITE_DONE==recoverWriteDataStep(p) ){ recoverWriteDataCleanup(p); if( p->zLostAndFound ){ p->eState = RECOVER_STATE_LOSTANDFOUND1; }else{ p->eState = RECOVER_STATE_SCHEMA2; } } break; } case RECOVER_STATE_LOSTANDFOUND1: { if( p->laf.pUsed==0 ){ recoverLostAndFound1Init(p); } if( SQLITE_DONE==recoverLostAndFound1Step(p) ){ p->eState = RECOVER_STATE_LOSTANDFOUND2; } break; } case RECOVER_STATE_LOSTANDFOUND2: { if( p->laf.pAllAndParent==0 ){ recoverLostAndFound2Init(p); } if( SQLITE_DONE==recoverLostAndFound2Step(p) ){ p->eState = RECOVER_STATE_LOSTANDFOUND3; } break; } case RECOVER_STATE_LOSTANDFOUND3: { if( p->laf.pInsert==0 ){ recoverLostAndFound3Init(p); } if( SQLITE_DONE==recoverLostAndFound3Step(p) ){ p->eState = RECOVER_STATE_SCHEMA2; } break; } case RECOVER_STATE_SCHEMA2: { int rc = SQLITE_OK; recoverWriteSchema2(p); p->eState = RECOVER_STATE_DONE; /* If no error has occurred, commit the write transaction on the output ** database. Regardless of whether or not an error has occurred, make ** an attempt to end the read transaction on the input database. */ recoverExec(p, p->dbOut, "COMMIT"); rc = sqlite3_exec(p->dbIn, "END", 0, 0, 0); if( p->errCode==SQLITE_OK ) p->errCode = rc; recoverSqlCallback(p, "PRAGMA writable_schema = off"); recoverSqlCallback(p, "COMMIT"); p->eState = RECOVER_STATE_DONE; recoverFinalCleanup(p); break; }; case RECOVER_STATE_DONE: { /* no-op */ break; }; } } /* ** This is a worker function that does the heavy lifting for both init ** functions: ** ** sqlite3_recover_init() ** sqlite3_recover_init_sql() ** ** All this function does is allocate space for the recover handle and ** take copies of the input parameters. All the real work is done within ** sqlite3_recover_run(). */ sqlite3_recover *recoverInit( sqlite3* db, const char *zDb, const char *zUri, /* Output URI for _recover_init() */ int (*xSql)(void*, const char*),/* SQL callback for _recover_init_sql() */ void *pSqlCtx /* Context arg for _recover_init_sql() */ ){ sqlite3_recover *pRet = 0; int nDb = 0; int nUri = 0; int nByte = 0; if( zDb==0 ){ zDb = "main"; } nDb = recoverStrlen(zDb); nUri = recoverStrlen(zUri); nByte = sizeof(sqlite3_recover) + nDb+1 + nUri+1; pRet = (sqlite3_recover*)sqlite3_malloc(nByte); if( pRet ){ memset(pRet, 0, nByte); pRet->dbIn = db; pRet->zDb = (char*)&pRet[1]; pRet->zUri = &pRet->zDb[nDb+1]; memcpy(pRet->zDb, zDb, nDb); if( nUri>0 && zUri ) memcpy(pRet->zUri, zUri, nUri); pRet->xSql = xSql; pRet->pSqlCtx = pSqlCtx; pRet->bRecoverRowid = RECOVER_ROWID_DEFAULT; } return pRet; } /* ** Initialize a recovery handle that creates a new database containing ** the recovered data. */ sqlite3_recover *sqlite3_recover_init( sqlite3* db, const char *zDb, const char *zUri ){ return recoverInit(db, zDb, zUri, 0, 0); } /* ** Initialize a recovery handle that returns recovered data in the ** form of SQL statements via a callback. */ sqlite3_recover *sqlite3_recover_init_sql( sqlite3* db, const char *zDb, int (*xSql)(void*, const char*), void *pSqlCtx ){ return recoverInit(db, zDb, 0, xSql, pSqlCtx); } /* ** Return the handle error message, if any. */ const char *sqlite3_recover_errmsg(sqlite3_recover *p){ return (p && p->errCode!=SQLITE_NOMEM) ? p->zErrMsg : "out of memory"; } /* ** Return the handle error code. */ int sqlite3_recover_errcode(sqlite3_recover *p){ return p ? p->errCode : SQLITE_NOMEM; } /* ** Configure the handle. */ int sqlite3_recover_config(sqlite3_recover *p, int op, void *pArg){ int rc = SQLITE_OK; if( p==0 ){ rc = SQLITE_NOMEM; }else if( p->eState!=RECOVER_STATE_INIT ){ rc = SQLITE_MISUSE; }else{ switch( op ){ case 789: /* This undocumented magic configuration option is used to set the ** name of the auxiliary database that is ATTACH-ed to the database ** connection and used to hold state information during the ** recovery process. This option is for debugging use only and ** is subject to change or removal at any time. */ sqlite3_free(p->zStateDb); p->zStateDb = recoverMPrintf(p, "%s", (char*)pArg); break; case SQLITE_RECOVER_LOST_AND_FOUND: { const char *zArg = (const char*)pArg; sqlite3_free(p->zLostAndFound); if( zArg ){ p->zLostAndFound = recoverMPrintf(p, "%s", zArg); }else{ p->zLostAndFound = 0; } break; } case SQLITE_RECOVER_FREELIST_CORRUPT: p->bFreelistCorrupt = *(int*)pArg; break; case SQLITE_RECOVER_ROWIDS: p->bRecoverRowid = *(int*)pArg; break; case SQLITE_RECOVER_SLOWINDEXES: p->bSlowIndexes = *(int*)pArg; break; default: rc = SQLITE_NOTFOUND; break; } } return rc; } /* ** Do a unit of work towards the recovery job. Return SQLITE_OK if ** no error has occurred but database recovery is not finished, SQLITE_DONE ** if database recovery has been successfully completed, or an SQLite ** error code if an error has occurred. */ int sqlite3_recover_step(sqlite3_recover *p){ if( p==0 ) return SQLITE_NOMEM; if( p->errCode==SQLITE_OK ) recoverStep(p); if( p->eState==RECOVER_STATE_DONE && p->errCode==SQLITE_OK ){ return SQLITE_DONE; } return p->errCode; } /* ** Do the configured recovery operation. Return SQLITE_OK if successful, or ** else an SQLite error code. */ int sqlite3_recover_run(sqlite3_recover *p){ while( SQLITE_OK==sqlite3_recover_step(p) ); return sqlite3_recover_errcode(p); } /* ** Free all resources associated with the recover handle passed as the only ** argument. The results of using a handle with any sqlite3_recover_** ** API function after it has been passed to this function are undefined. ** ** A copy of the value returned by the first call made to sqlite3_recover_run() ** on this handle is returned, or SQLITE_OK if sqlite3_recover_run() has ** not been called on this handle. */ int sqlite3_recover_finish(sqlite3_recover *p){ int rc; if( p==0 ){ rc = SQLITE_NOMEM; }else{ recoverFinalCleanup(p); if( p->bCloseTransaction && sqlite3_get_autocommit(p->dbIn)==0 ){ rc = sqlite3_exec(p->dbIn, "END", 0, 0, 0); if( p->errCode==SQLITE_OK ) p->errCode = rc; } rc = p->errCode; sqlite3_free(p->zErrMsg); sqlite3_free(p->zStateDb); sqlite3_free(p->zLostAndFound); sqlite3_free(p->pPage1Cache); sqlite3_free(p); } return rc; } #endif /* ifndef SQLITE_OMIT_VIRTUALTABLE */kylin-ai-data-management-service-1.2.0.0/src/sqliteRecover/sqlite3recover.h000066400000000000000000000231411520577635400266240ustar00rootroot00000000000000/* ** 2022-08-27 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ************************************************************************* ** ** This file contains the public interface to the "recover" extension - ** an SQLite extension designed to recover data from corrupted database ** files. */ /* ** OVERVIEW: ** ** To use the API to recover data from a corrupted database, an ** application: ** ** 1) Creates an sqlite3_recover handle by calling either ** sqlite3_recover_init() or sqlite3_recover_init_sql(). ** ** 2) Configures the new handle using one or more calls to ** sqlite3_recover_config(). ** ** 3) Executes the recovery by repeatedly calling sqlite3_recover_step() on ** the handle until it returns something other than SQLITE_OK. If it ** returns SQLITE_DONE, then the recovery operation completed without ** error. If it returns some other non-SQLITE_OK value, then an error ** has occurred. ** ** 4) Retrieves any error code and English language error message using the ** sqlite3_recover_errcode() and sqlite3_recover_errmsg() APIs, ** respectively. ** ** 5) Destroys the sqlite3_recover handle and frees all resources ** using sqlite3_recover_finish(). ** ** The application may abandon the recovery operation at any point ** before it is finished by passing the sqlite3_recover handle to ** sqlite3_recover_finish(). This is not an error, but the final state ** of the output database, or the results of running the partial script ** delivered to the SQL callback, are undefined. */ #ifndef _SQLITE_RECOVER_H #define _SQLITE_RECOVER_H #include "sqlite3.h" #ifdef __cplusplus extern "C" { #endif /* ** An instance of the sqlite3_recover object represents a recovery ** operation in progress. ** ** Constructors: ** ** sqlite3_recover_init() ** sqlite3_recover_init_sql() ** ** Destructor: ** ** sqlite3_recover_finish() ** ** Methods: ** ** sqlite3_recover_config() ** sqlite3_recover_errcode() ** sqlite3_recover_errmsg() ** sqlite3_recover_run() ** sqlite3_recover_step() */ typedef struct sqlite3_recover sqlite3_recover; /* ** These two APIs attempt to create and return a new sqlite3_recover object. ** In both cases the first two arguments identify the (possibly ** corrupt) database to recover data from. The first argument is an open ** database handle and the second the name of a database attached to that ** handle (i.e. "main", "temp" or the name of an attached database). ** ** If sqlite3_recover_init() is used to create the new sqlite3_recover ** handle, then data is recovered into a new database, identified by ** string parameter zUri. zUri may be an absolute or relative file path, ** or may be an SQLite URI. If the identified database file already exists, ** it is overwritten. ** ** If sqlite3_recover_init_sql() is invoked, then any recovered data will ** be returned to the user as a series of SQL statements. Executing these ** SQL statements results in the same database as would have been created ** had sqlite3_recover_init() been used. For each SQL statement in the ** output, the callback function passed as the third argument (xSql) is ** invoked once. The first parameter is a passed a copy of the fourth argument ** to this function (pCtx) as its first parameter, and a pointer to a ** nul-terminated buffer containing the SQL statement formated as UTF-8 as ** the second. If the xSql callback returns any value other than SQLITE_OK, ** then processing is immediately abandoned and the value returned used as ** the recover handle error code (see below). ** ** If an out-of-memory error occurs, NULL may be returned instead of ** a valid handle. In all other cases, it is the responsibility of the ** application to avoid resource leaks by ensuring that ** sqlite3_recover_finish() is called on all allocated handles. */ sqlite3_recover *sqlite3_recover_init( sqlite3* db, const char *zDb, const char *zUri ); sqlite3_recover *sqlite3_recover_init_sql( sqlite3* db, const char *zDb, int (*xSql)(void*, const char*), void *pCtx ); /* ** Configure an sqlite3_recover object that has just been created using ** sqlite3_recover_init() or sqlite3_recover_init_sql(). This function ** may only be called before the first call to sqlite3_recover_step() ** or sqlite3_recover_run() on the object. ** ** The second argument passed to this function must be one of the ** SQLITE_RECOVER_* symbols defined below. Valid values for the third argument ** depend on the specific SQLITE_RECOVER_* symbol in use. ** ** SQLITE_OK is returned if the configuration operation was successful, ** or an SQLite error code otherwise. */ int sqlite3_recover_config(sqlite3_recover*, int op, void *pArg); /* ** SQLITE_RECOVER_LOST_AND_FOUND: ** The pArg argument points to a string buffer containing the name ** of a "lost-and-found" table in the output database, or NULL. If ** the argument is non-NULL and the database contains seemingly ** valid pages that cannot be associated with any table in the ** recovered part of the schema, data is extracted from these ** pages to add to the lost-and-found table. ** ** SQLITE_RECOVER_FREELIST_CORRUPT: ** The pArg value must actually be a pointer to a value of type ** int containing value 0 or 1 cast as a (void*). If this option is set ** (argument is 1) and a lost-and-found table has been configured using ** SQLITE_RECOVER_LOST_AND_FOUND, then is assumed that the freelist is ** corrupt and an attempt is made to recover records from pages that ** appear to be linked into the freelist. Otherwise, pages on the freelist ** are ignored. Setting this option can recover more data from the ** database, but often ends up "recovering" deleted records. The default ** value is 0 (clear). ** ** SQLITE_RECOVER_ROWIDS: ** The pArg value must actually be a pointer to a value of type ** int containing value 0 or 1 cast as a (void*). If this option is set ** (argument is 1), then an attempt is made to recover rowid values ** that are not also INTEGER PRIMARY KEY values. If this option is ** clear, then new rowids are assigned to all recovered rows. The ** default value is 1 (set). ** ** SQLITE_RECOVER_SLOWINDEXES: ** The pArg value must actually be a pointer to a value of type ** int containing value 0 or 1 cast as a (void*). If this option is clear ** (argument is 0), then when creating an output database, the recover ** module creates and populates non-UNIQUE indexes right at the end of the ** recovery operation - after all recoverable data has been inserted ** into the new database. This is faster overall, but means that the ** final call to sqlite3_recover_step() for a recovery operation may ** be need to create a large number of indexes, which may be very slow. ** ** Or, if this option is set (argument is 1), then non-UNIQUE indexes ** are created in the output database before it is populated with ** recovered data. This is slower overall, but avoids the slow call ** to sqlite3_recover_step() at the end of the recovery operation. ** ** The default option value is 0. */ #define SQLITE_RECOVER_LOST_AND_FOUND 1 #define SQLITE_RECOVER_FREELIST_CORRUPT 2 #define SQLITE_RECOVER_ROWIDS 3 #define SQLITE_RECOVER_SLOWINDEXES 4 /* ** Perform a unit of work towards the recovery operation. This function ** must normally be called multiple times to complete database recovery. ** ** If no error occurs but the recovery operation is not completed, this ** function returns SQLITE_OK. If recovery has been completed successfully ** then SQLITE_DONE is returned. If an error has occurred, then an SQLite ** error code (e.g. SQLITE_IOERR or SQLITE_NOMEM) is returned. It is not ** considered an error if some or all of the data cannot be recovered ** due to database corruption. ** ** Once sqlite3_recover_step() has returned a value other than SQLITE_OK, ** all further such calls on the same recover handle are no-ops that return ** the same non-SQLITE_OK value. */ int sqlite3_recover_step(sqlite3_recover*); /* ** Run the recovery operation to completion. Return SQLITE_OK if successful, ** or an SQLite error code otherwise. Calling this function is the same ** as executing: ** ** while( SQLITE_OK==sqlite3_recover_step(p) ); ** return sqlite3_recover_errcode(p); */ int sqlite3_recover_run(sqlite3_recover*); /* ** If an error has been encountered during a prior call to ** sqlite3_recover_step(), then this function attempts to return a ** pointer to a buffer containing an English language explanation of ** the error. If no error message is available, or if an out-of memory ** error occurs while attempting to allocate a buffer in which to format ** the error message, NULL is returned. ** ** The returned buffer remains valid until the sqlite3_recover handle is ** destroyed using sqlite3_recover_finish(). */ const char *sqlite3_recover_errmsg(sqlite3_recover*); /* ** If this function is called on an sqlite3_recover handle after ** an error occurs, an SQLite error code is returned. Otherwise, SQLITE_OK. */ int sqlite3_recover_errcode(sqlite3_recover*); /* ** Clean up a recovery object created by a call to sqlite3_recover_init(). ** The results of using a recovery object with any API after it has been ** passed to this function are undefined. ** ** This function returns the same value as sqlite3_recover_errcode(). */ int sqlite3_recover_finish(sqlite3_recover*); #ifdef __cplusplus } /* end of the 'extern "C"' block */ #endif #endif /* ifndef _SQLITE_RECOVER_H */kylin-ai-data-management-service-1.2.0.0/test/000077500000000000000000000000001520577635400210415ustar00rootroot00000000000000kylin-ai-data-management-service-1.2.0.0/test/20250626.jpg000066400000000000000000001077501520577635400224630ustar00rootroot00000000000000JFIF``C    $.' ",#(7),01444'9=82<.342C  2!!22222222222222222222222222222222222222222222222222}" }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ?h v)@w>j0iqqa某E!apJE!Ӏ+@SVRjE =(1Ի4ƋȈ3R2Tw!6Q*d'+a͢!T,"1HC4; ? R3EF(!;bOJޥW)Zq4ګJJR44ilF9FWM#/"#4IN#w%,*7KQ)hfpH`eLW.%qHG5.9/;@T#&( 0qRinF!;Jsߊ^~VR!V)'&4 U\ϔh1)2[튗4'.q6U'4:*GM}^Ȥ}*&Y)yy+hӦ[d[R) 8F^D-Kҏ/֎d ^[v4zURSn鮅PvVS b`d5Ҥ Ihr) ..W)EKQҺF"ۚ`\T)3XEi긧Is֔ӰM&1Sr`m)cڣ2SՊ,d#H _<ӖnRބczS5iDړJdҙxUZ7fe*YnRT8(;`4)`jLRݏA &2),DV)a ѴRM1P)NMai RiBE4-9Vtl!61J .E[؃'HA֭4M.vlPucjPtqRNb(B)9USG3$~+$0U-\ju6O{ H4`vK 4 sOU%ZF"L<ɁRy`m5)RD,! Y9vseG,Vfn+2d{ [ٚCfCT*i=юcW% (OH ԛ=Tܟebxe_j⨌NV5\\Em"șYRm5V&)A (U{.3֢2{Pwb3 8@y&^{湲q`Sh$R&M#8jxy$FsWmc8-VPBi8cRx^ZOlL[g`O&sUgx#iϥ!/گ"AL#bqM1ڴUFxɱP[E+*%N0ZcKnvhabb0u(syu?qhX䑾QNb RXdbATYR g>$'+avi .'||4jji$|MWBW#q$d ܙVF8TFSa F,qXI8G>t̒şk%zy{TD4HGNO[x` M>8mW+D%g>@SO{pmaSTedwAsMD`p yq =Uw,s}+՜my幧J5)Zf+j{} 3BBҠjgN^TgǬ FO 4^cԤ-j v O,ʕz(fyإ+KZ ^6IG"my|T/ipFO)JW Bњ30; ZOj2c2].ɞ$NFzӶ6ǑeS  @lbNO4򞴛C!hwtEzVl`sI$"Ɩ":`V3Y̌x88M|œ""1( Dbf$fL(KaEI"IHRDAՄ|M7V#l -ܤMnyp*Mg(VMlҔt }j6+ 4ؘ&HO$"viKs֥+1ɨX'M-]4%TVƌ4#mL)#cptQخ E=pzub(OyU4PH@QUgFc*\Ǵp#vf ԟ}*=M¸?Z];R*$% b'4֕F)SYs1դ!0*d^GZK'Jh wobzf;^Db37sҬycJb7z=ȃ+4ƅѺԐBR:RER*EVTz4`LUlS(G1ˀ:9v3 1=M>9L$Q2oTg4o3Rl=&`@P޿Y4lw+iVLiHjL 32h3,l1"`VrJ >jD,܌#" GegcT1܋L^xć U.q;#<B0XW~d9pA5X + bsiil:"_j Թb XԦİ4{%=s|SMpr*z8H?Z:Rc Qگ89DbbzqJd"i< 3[ojzS|^j2)J[HaFRHF*RF1HЪ=*,b4{ĀЪf_5Pb=QY>|y9>b9KFEznjcx<Ӹ7#Fҧ l甏sjL2 )$3J,rZ6ڔy$mu1 9)LXJ V:k8 [N˺A"q[]hIGKu H>lT[X.UCCJLRՙ9a#Mx\u(qIzUG;Qp cACަ,;Rfbh`KڋCK`cFjJSP;rҼ^tzw *UoCNVdc!! >F An)ZbI)h;zi\85y9 rJİpj"})R=& H8*; ƬV1Q|ep5t9J؊#];TwJj"lV4L0[#Ji8[FdhC!Eoҥ7e ޴a}]Y1#P`x^2jID6ƷZf㞵!QMѓ!#MSj&GRnTg#n ChB3&ږLV 8%KIbyPи.}4F)I4 sIzp8N,whMa;ѶƂM;hi67JMFHnP.Ai@+>}jݽr;Y36~j gb\y(PzI^2JƓCR:d'SG_婍o qȤc ڎPr;f1PsjsiP,8ˀ;ԭ`A*M,ތdfZ1(pE#Ąr3:9SkO1B~BҀ+u5"HɌ`yW$i0CMyᱹH'4ՉhhB7%l*y#QuBē!`}E8ɡ4Y)n$Q zƳ +n9V2\Mzҙq"E8]8-T\M&@ʎ|܊o5!ŚImudc>\±gˤ)H'I=iCtu(mT5"#70!.O8(#iԴp()6C%cqXq:HXU/8 U8K5w'3Y 0?e3 E7OJ |4G'PјRӓ9(R4ygU1twTs=i8F#E%xNZR֯4Ihz mrDkm@~0 *h/dC\eX L\f {6sHޫPTr= S1sMM},MbCUUǵ2G`8^*/A px #?(5%6,GN(S|8d8.$_5=Ć@7MV6A 9`*(x'ޔkX+zLAS .АPf$uIB15)r}gW+IW0!X 'jR65+X 0aϨ; Bz9g| 1=-uJ$yQV"Y /J: c5>3CʌG~h V+,\ʜ^;2 -F=zvLBWqPyϽBo{µRL߱>1@0Oz푼_UК4:́:S^U'8Ôm' 2i v3ۃv7zӘrt)pOSNJ^bhQ,5*\Mr*%qӕۊ!rO$'mxM..RiPҩZjW)#;N*TdӸXS!?4fRr>䚑THdtG/Y/@>(AS,):( U?SVR=з\t t4$ Tմp:VPkʔ!%s-XH9ūղPo3Qv,֏?FatiXFx,b'5rԺT`fTq0uJָ kvEbI3o&A)Q?5nkQJٹsZD^n+CTc RT 9#^2ّ*2"9mdbw ٳM9UVFz9La'sTT89r\5\wNfz=UJэ =湕yF[n-Qϵ=)+Dij3]_[Oo" ɩ 8jd+S,R]JX"9qSCn #c`^K <3R:"ێP- ƚoAP9X6sOQ5X_r* /\U5:)@t9Y#7/;$$WXM\٦۳ޚح]40Zz*<\r"`;XY44s(ƍMR d҇4r)JW%ěuC1T&iVR$+Z;4iQ9;Q3OٚP5JbqЖqZVۆqU#;XUջ+zRr2\y*5~s@"365m3SUzTB>bsG2Ab@(OG'繩2=(AbRhv_֌/JkkB#'Jo-ڦmʘvaTW3 CMuR? P4N<gIsN9bO)8+X(Rx#JC1=3Y{Oo%V$Q@ r*cU粈5f1'JVۄY{8SjNeN[$#TOK"Qjw]J@Jݭ^$!eS`$sY2 o85$d s$A68A䌕%UݙFYqL>HJmI.̒C, ee,S]  20*5dl枓i ")bd%,!*u 7N)zZKCͷNJ<22). "ՙL6㊷!%(DLRSQ@fZ_`q׽Rh 16m`JUTڼvJk&ڠ13[5QwU:HS$Z$aOY]g4Lb6P95l(ϭMJAc 16=i,Bjo *Akm“Y2bx+JuyGm)SnsbHcn Uɱcj^znya8%859`=cȡyQ2>ܬ 7 X 98ByUR?Y:h8WͷI'nꢤ{oCF*.aͨ6>UZNwS:ajD'I?H|~L0p\\,oLҩHV^FEDø̉a1RyRm~1Vj}=dS Uf>aǭ7d$َKc&NkjkM<ُ*8,&%Y؞a.H[#G`vܮEP#} "&ש=ȦmNDmN&n0T-<-L{%+]H$Zq.M³uJPV. QCr4C0I8^JS }vT]"eRâ֢h5mv}Vm8sJ\V11m0Syd DuLN&|M(t #u^at+HM?qթ~HSym6Dn,k7OT}0ϏY;bb1\W3HuS:tVg6>$gq9fNs⣗anhsO>.A/eRlj c' ï"䧫£$~5F`uJgo]ñ޵iu4Tn9}|ŕZM7mgOFVRǖ(yW+z)kۓJSC_#sŠf7QPɀ7 ǹJrN<㩩Ld `b!Wc5M[*jiŢ[qT+TjԒS$mQ3'(ϭk󚴑c"(.Prҥ$EFsvSsN7wi(nb9eqэyȤ,>4ij\HFj\Q"K~M.=ivhZ1Op2 R,'far qҜ#cRsCbƀ:Լd#Y1URMgC T:R1ܻ\."Z#LxJNu6A>H+v;xSŒ1TV7"UZ&fvǖj&Oq ֏}CmJmܾDgbQi>pE_?{?4~fP C IȻS"^ i[u8!*|鋐k(eATB|AnD g*tPA/[{95ED7Z-M2|Nԓ|F,n:*] ƏZ9 ְ!K85&+ZcYel4jx~[!֩bdeM+Zoi`)EbR%}+Iʜ)b3F[RfaM\x9)_e!CV< i ?h.Q[y@1@3g֣O{"k #TӁ_͸-;l[(E _#j;`bzlIMh)C WͪH4m*1<j6cXb u0T-YT /EQsGq~3if \񏈓iue7aC+.ne˻1bǩ'w`ߖ&+,w&'.v$:yco{qI+#ȓm݈[ QCb+ۘX9N9iBQ?II/tRM⎩mp2OBvr?*7ڼ+%O[#>$V]h׋5fr=뚦J饋tCIgNr'5kv06Jz];UF,# L>Zrݞ88J+L909'Y槊+Gppjnk9D2;-3ev9C]vM*@CNWּXe>WN~GL+5KѬȍd9ʼF ٠ # WIQ 8qNyp_ {cJP9wLm*"s{VcӚjƥ7i#6{;҅J`w=ɬV$Q?x$APޞ'YI??J;2}I4d|g*?ݭ}2AGEau7vrխI-JsQӱE>kM6\s뚯>j]yӲ96v(SwӏhM:tt?Qh8u!52Ɠ)8*OƶR4??S#Zii0ަ`}?:i8l wFMZ FWc֠x,?7( pc6&QhiDaQ@huidgt9 ZHFϰ|RjkQr%מ٫RvESVSwbQHh9} .R_ʧ~5"{Ѹti `4߅7@yaJO 7Rzjꚕyrbg{TqOJ@lbnºY՚OccN.G9z K9W<" MDkdx3=hJ-6L(( }WWrMIzvo½$YbY+CsS^^esə`RlM;tuaky_ Gw+<)EPOP P)L1Ri+ifN*taUW5*Vm@ݏ;5TDnsV-JqΘI+xUNz*?OTJUx,=>GLdήk9#}6ҵM2u䇮&7]"+aiJ[cp:4.:8׎J7 e<.ǭrpUW9q9ljFzLtMr I\wͲr{3<ʢ]>ӘqҴ+Ȳ!r+ =Mhi^ $f#}ߕ>Z֦YHU=W#Ҋ~|USo)/ҷD2 י:U)IX5-i@fԔH(4њ|°= ?y4nR0<ҏߞUG|}Jo/N$ 꽣Q6݅8&;ѺsKt$v0X0Zi +&r<֗L~]=;ǥxwxR:s09b+v#z\>JjצSGM=Q摉4bMi٦1h4S(KCuk$yn U?*ͧ } Hw+ARv5x+P:mXdy,?b*eI'cՍVNF73wJq3@8*vܖ?Ńr!FYEUuUn:iҸtk'TI柃ZhrgW!<r߄< MF[4Du6G1VS@?m'(EFիBya/J=͢jd]psR SsxH‰lF lGG"/$RPtmt2kVqXgj4H*oyn t(OYuܿh)-T'5՝u'%?fץrgǃT i@EF-J"asFLA#WS^5~/ዣ9xA= zE|ӓÖG?lT:syWIVm+TåwؑZiM +6g쪭Q=ퟎajxiw'oKɲoOO#X;ft//,m\Ra-H5+f +/ >mvhv@cȴ"japVW783M7e '#v ,ہ i/{Y)ޗEJ,2$TԿcDʸf?:>΀ IϦj~iy?:Q*γ'S;ey]_Luaauīp5 pqԞ:vnz8p+0 HUclcSՑ=6Qý)(E%(@QEE ᮤbٰ&DKּ'A:vop\g>^ѫ(ʑAztҞ8O ɿq@3<ƴLgfX-An 1ǘ+ک,%V"Oֵ# daj'aJA ?!-<ܕroӒs+/OK8v3rgԹ;$G!#_cؑ^[xAesH8I.?P"N8\J+\6"s~*/8sI6m蟭7iSgx4HW}+  pEJ1Q cJ@踮J7&gҫqjHRq?#\s4d#p*rOƠ,RmJ(22"4N}0f`sGZ?MgȘi&793/L75[ A{gGЊoa]ݣ72bz0sgU|dACp'?TZ,=I7s7"MD⟙Ei.a>ҧrG>hER |bf?GIYs8 t\bNibSaV{W}oUbOӊGXaAihQȦLぞZЗVF'lHHz[B)=p+攽A/&[y6:B^\kõ4rCnjBi[M'R@ @4 (ٯ\Ɓn'VH+sVDL$T&T]d'.#)#W.JUb瑖5暾OY<60+lHskJjC`Mg_['VO?ƏW]: &: ȤFI^BʗcD˲b6FFbTH9j d¹/ Z8%qCfflgޢN]5RӼAu[Nwe8}8?]ԳƖX^QN̰q|s[ˤYǞ>5stb`>5W?9u dtc z6xRgɈƸ*Uï:# R_l,imDpzꜾ*%aP#9}W"tKtrfF8>cCG/D_IC9M)ruPhуU.#sLkPuU>>j6qO95!eJZd!sgQ"FAԬj}Oo|5O["#2t߉7VƘnmlKX?P x̒kc`#EP=Cr$ {f[WWV1#wT+ȮGPhengHV)` 4%Ьx!-1$J$ "9e)YH}BWib/UI{it0GMmxC4nP:\I^F:l~푒P6toΪ$gUNߙG{w,I" ly TrvEyխۓo%Ʈa<޹Z+H|K?)*1qJ5)n_ToŗVV7Cly:]lP4Z_"}_YIԊzFj3[ZFYwD:s3Dۤ[jE}8VhS(PH:SMƪM=h}v?dí y/4y.\qv1\ȎGZϗs]oj&NTo/RI~g+d".?WV[F?v]e!Z0vqiJJ_fcsx?1US2B=rnt;Yzݝ#[‰#|@Ⳗ"yTFT|"R[yv>ܜVWZI0 o+mUrN;q Id*Dn&e3aY%.lQ1^j3Ēnz:+l-9%ۻ#?5".u'pN?M~xo%gG^~Ѳ@Gyͳ[I G*94aaMg{uhkw8$;~FXt:tǚk[CHWZџ\eyʻ8μb\M(Md~򜏭zݦo5$X`nc{כE>Uz׭S .Zfs^:2Z#qc/D8JFkotyǏĪ)$;[p ۋsmzOCq;qۑX O8n+EU{n-rN$7ghK{y1TE,H.Y]mFaWt,3/;TfU(^͜]*Ěja*6n=о0|-;d?Iu$ĺ)h 8FFWdOj1U-C MNKe9tO?"䕃9z(Ϡ"9fQ^-i)=s\Q934T=rз˚9 v q\ԤmmQCU/E/R%T+V#VY&"aj2)K(DzZA֗44S(E(4bxHkĥЛb/#9 8eG9oFsxÖQYcO7sە].ki/P1u?jc4}Ѿܧ\J"DbL KgzoWe*0a^So&R:KR[Ocs h+9#k~.הz@5XF(9UԯWSK>yMgV9(N6t=I"#ly1!HGIVux=Y;C+do<8+&;Qdbpyz"\]J9I_I|fV,b,Ecb;~=i~Y>aʖjM)Aug, n1?0`;~Ԝs Y䅏+̋ x8:R| -]Іb@p8أW^ Ѩ 8Ζkm^>E,~juOj1Kj&E^x9֊uLTqUӠ#wwlm gܚ|!ukjdbIXc^ƻ;(p qf8?eY+#jVwqN#`Do2ջ} b]̲yتzSJHs = ApFK)B9*FqDZFXP4XW}GRŴFs7Z7ɐAv`m.An#4ؼ׭{?$NKyGO<46P+$n]JhW'e$UrfJ}V[3H,1Vs {t@ sך_FV]Fێ}jmRNpTuJ H݆qmň攍aBD3W]0}&*0nby>¶D񢀬8;^o_߇c>#ٵm I?֫Cz0<@Y+r'*})]WqVDzjnܟ:ԹɷOOPGOQawGժ%թma->uO JZC 0V!ԗVXnLDvZ~$ze+v3T$]>W<Qwh`ġN3-|5YTFYQHd[ ЃJЖx2m ZNxM niXMޏH]m!Ulp*Ln9XZ\k=FM^emo{ 4θr4Uu?I`e,wg<)Og<y8'lv aobj+$F ;7znX\6մ$g}xB 0)jݿJ27]7"`ܶYN~"Rw35iD,۫*Jt|9]gm cP օ`=k'drSW^8ޕfs֫7ZqQA+C0`%Q@jJZJSE}u_->r~GO&vrdsZw m:5m@}BltK:Dj;ॹZ/$L<G@)HBՐI u'@?DˊIhI ۱;|{SQ=P~U9泱5"  ?&MnBs5e%Ir͎P8B=s|%αg@859F TYӵ . xKH 2~o@y-) ?^ ZH m g% JsWva PrxIF>Qst枮.}1EČ~V`\y{9Eip 8T𪉹?ÚE,w 3J'nIa9|=~[$}j0đS{(;@xF)ؗ!(h]Wj]`.Z1] ru9mT\n^=UG_M֟).Ec$TST *v]mCt?M<5H62@b]{H搮s{jTiqqbT|˟RZb ⳯cكBaq~54m\~. B+nF MQr)g;b .wV)SIܥ3T̀KqFUxWt=+J2ҩ@13?] TccXi~|aF-hIގDKE@ JZC֐ ڊ=))S)ÁHI1ePNr"I䲐DacT"(ëVґQ~p$>.MNLBCH'c*+`9N*yC(.Oh,Z-Amƌ0P}>]rӎ)>lR%|m `֜yPcR;}[;@.ȁqR#G+f*8y#HR@rqgNsR)o3?9^$ck8R>a9WRIN F1J7mb8tG_^|.CܮEQTz uFWVG}ҕ$ 3iW%3ܓ<=;mQמ*U1@ʕ'[I5|D#w%=\F/+ǖ(qMq,OsKm|ebZNJ yHIET#9G Jù$Np)*"E^9##Fᐆ=U`@E#Wkqjʝ04巁%/'ЯFXs(X*,3UXЁQ<hA큚Q/roc*n'"wu;?GJEc˼C!󧲙 PhW"A =IO$ iriɕ(1ƒ7!Iߴm*RSh"g | Gd'>hqP+_4u|i'"M&ƑNXG%wƚ:C,v*J$؊ Zrg4i,g, {wVXZ 嫪Y<Zs)XsTR͖usl1urw&t+\^; $ڧT9zXi,OAQgtYLơs㰫 DVj9ɡj)E4IZw!h!E)HQI^)i -wi#rP3qZ| JA$ʠ& @EE~H[b:Ni[cc$}yIVBW?Q܉cv8A; i0#'M7(Qܞ/#--طFbFi.a쮣Z6Wf'J<"Pna5#2y^M5mx],B(< 48dSʫrm[X}|*ޛQHn=*+EE;3\;nIӌŦ:޴[{x)-y nW(A^ԗdnv`BG0(lTg}In$lMU9ػdi%aJWaJ ֒u)(L&h=iE!(=AQE ;άczΦ@BɄTFҰNn%E##?x|"\fb6})#89b(E-dE%ȫ6=sQ*Eer$UiL*2Pq*b,_5;cR/[PE!$’ έ g@Ia6OL #|Luh*2ہ$Ʌ< 陷 T.۝ɱTyj$Te\@ˏ1NtXwf۔V >*|;&սN bܹ2nߡ*_9 _FF(W.RBc4D \sY;9K Q5"FRQL@h@8Si€2ii)i@iHq@RJJZ(Ӏoԧ$dwd*x(tm$-?2T G ,;ҫ$̩ cKnrrwۚC C1=1i3\vqO"'qEV>$EYI\uuF|]P͐?!T|gFo\gԌcArjx(Y z X[՟< q! [o'$vv&{ȤC4dL#iG,3. TrܟõL -cr?Z ̗рH,Iư#Y {`eg$*9-qRXoXTvLgPDv ^ۋP^Y p2(s~n ˱p6sS$+H0%5! xKC%U\$@IʎK93d,iŗ-H)A `@,.޿uh۹a$ =@  >Jr9,àٚ'܀a>y(VDCր-lcZvϠh,|x"[ـLl3`dƐ#8Vg>đ6|c%hQa*1v<Rܫ+7 b0FGv'늞\źD#Ƙ.%T/Spo#'TÅzP6[pi3! ry֚OW I\~EVO)EXsڣZRc=]ITNkm3ƳX K#\bUۥ[H幪 4PzR (J:R4hPx(iLF(P8APIE)!"$uɊ2yQKo6'r EK aM!wzPҗ;-~Oǎ) BcM>VT*hDꩌUvn&MxbO'Aa@&*eESyE]dtykpq8D0W}jPG;g4.L =GS4R25,9IڞV;g5ٮTT"EA|syoʌLSw^HXrBTlZM :$ؑ_`zOQ=PmOR½O8AI&1儜b{?ZO.>}:KS{u1Tbx+"J.; 7~THO1^N I#4%亹%^FCڙm2HOSD$cGzn)'8wv?:O ˴'_q)vxtv23aJȻ6+ݺ\j_!Dc?F`I^ll|}2I@0 XIkq.6a\C"zՅ=b_@RDNO7 < vDT|9LEvR AѣNu-c^LU~q1vČ =Ԍ,D\1Y6?${Um Gvx擰7?'kBAl" @'ŸˑV4Svpˆ;Qh6M}'(osjXyf+Xw584.:1& %qvftMeݮB +1[ӕ)RIX"( /Zm(MIAGzZC֖zERf֊)][B73jh>38x KIPy=BZĠ4v2?+º\A%{I68Ox=eeU*Jd6\J~YeAAҥG,UǗLݷ|Ƒl=&\̆Ή!7r?h yp8=12|)\jZluTTTruڹbdsJùYR@tV=E!sv*x-QV;y9e1 0Hqɦ5rTwP\;>WBKp?-O"Tz?@Xedsр?Ƥ[Y|D/`1~ ,G'幍rOKQIdѓΉ, ^vvbxN}')֋l1Ǵ!(?ZsEc\cQMpҚgy$()>a$=hwaXINHRWapjLOBۘ[<7"8,ߧLymVfqҝq#@7ީG#}O4Cg QR/Y1QcRu#4Hop1ƥmRV#bGJ^LQװa/I i|}jM8߃b QH6f`s+[7~ioDѱU;DQ(&lLR=2Yx|P 61 ÓNS]@ҙ:llUxƛO>OKB$SO|@<~c$'޲b@~W 枂-0Vbќߺm"}jE^#j&45LhI Sc?0 +7 ֱ$z}pd+j9`p2Is\洽faH6)bxXcMrlj2`qQ!E8S@ \Zi ()M%-3KEv;M9x%R0ݱc략1Bb\qN¸ ̍J$ j;Xzxwe8L]E#qMEOjmq) Ī٩YPzGr#5 Dw҆YQ"f`ǚ8_ RXhn&2 RnV"kh#?ʳU=χ(i`,[lӯzk 40۰znV?GM?wԂ* 1IHFx =܏}UTʏcI9?45`5ϳTjWvs@z`P.SɂBmp&{YW*Xe8;v8XHeyOG񧴓F$`tʣs4J#&OBG͆F ͜~>{Z(6볓Ak=*X E*ݳX,-ߜ ^/j<ʡjҦ|_Ve cakʠpKkI_b@Mª$ĝǓW#>ԓK >?O2,VsR8n+fBV4Aь ̴2x5+q^9孊_jOvr}kY(l ҡO"5f?08@WjflBv%28Lr9+JTjtiьHOg54ܓӗݸW6D+ŖM?Ҳ栖&erT}#uqP5A(ܽgV8wʽih ZQI@'z8(4QIAl?޴rs@FnseEQ$gsc5sooҙ,fgqݽ)A*Y`$I>A?CМUŅ¥PҵӧcV/>Pp[h]Xp^Iu4l#kӚ›$s늷U#^!Hz ~r&qO+6d-&_Snlo-n70 +Lbe7}HK& my,+%H|~EFa}Mij`CC#X BG&/Ze]hy ȠfA?J_3[?Ү.a#e-$e'zӰĀ3+0ʦ2|`W32ޘN+ A~4!6Sy9H[F~kM0C$,?G[<5̪ BVF$FCMpG*P.7˅!AVUaсɄo38)!;G<v!4n埊!rĚ#?xid$kĩvmm#`#W粉cJI&Iyo0*ЫsP[cBІ( 3ǘ2ҁ~}ORZsc̜MW̢%C7Sqdմc|0'nG֤1%Kw8Ƴ.@2q5^}c5/icJ2~ВB_bA]rjjpy\5)OjT^S uN,ăM\s7>T#*EFÊG"j_nHEDA5dXĕ3piɨ9YOZ_B-Ƒfڐb6\qb!0kCPDNh'*Qyd$s\ͬφv5:nSH2%SOZz)ndvBQG|VbdI3U9!6m5tݸ OG=+GBHJiIV{cO^]|1)lInc]Mhj ֎jWM+o3d'ii`YsvPbME$䂫!4Щ﹁f2IT~2#N)]4ܝſZjvYmlHQ>Nno^\ HWt+Lq=mKLPCǦO&+* 8A N#n )|c֘ʗlcU-*XT~b6EF/6 U.: &4g\ŷQAi%ԟ$Rl-֯a@d]CnBh$qcx;#5JVKS,šXgBKz2GQSIo9n$??ʣP$A-"+5V')[kdu^eψt_7Ui%_=L?]U0>dv# p"n{R*?#LHGE&I‘ӝBK`3#{W%p ?ҠQlyǶzU!3= R$[zjW(79/sm8o4q$0@s(UIz:.N*F;%y=MleV(SIyInrIVbY_UCRjs9eGnȣE{)%gV%rֱ` TH~#89H=sQ٧2D1>^vHгE@_>B΄#eIOA ]qPv;*rHZQr TDۣHa@U䏽?*v M*к2xF?4ҜgOwR8?n˱GZZȃ2~G5r$cLfؤaϭVdTZ} ]MKEd Ǘ;ƣͳBF軤)+6"ҒHZD0j[xR3S*SWɋI0V~Zm:'RSQ1Z#6H#&)åk q2܌)1V1Svѡ,MJҞ)O\RQK@rlLڐөK2H,;'q:BU!p@V0IP}=c{6?9UVn媑~>^HrU" YA0=:eHd3݇5a2=Re"wBdU+܁*wT2aDl!CR8]N,kŒ E) pzZM.+&?Eel6qըs) &kjaa$=(fF\w8 ¤ UFʼdIqus&̫qg 1GxI㹓ZO9&sQuT|֫I~A&(&=ڛ @3i@'$Œ6v#`7_)Ҙ"`#7{W^FO_N {Y'% T]1ô,r?Zԛ6RXg_r)7q-&QCcWd|K"]mLqwHXHE>TQĪ tٝiaw9V̒7+[ĀƠ7r 8ak"*Ud?\0jF'XLl`ISe12i!sӊZ9OR)ӱL~tq| h,VN3lcxSkM9,W--MQ'=5ŏ1eLYIzYdO=dcSЛ "øQ)iz-c*{"˒qr֡ =ppQ*y*U%nbf'yR) {99W:Xx$U;e,yZL VvAK+NWw"JMD؝n)2iƑIy4ڴH%NYJ3KL('ZyqHc;43)XҮG>@Y@ ^"&\eoQV!U̠ ŵmI4  "=s2P4(=GZg~x1rO:v3M$(BhIݮ::2vbw9a F zu&$P}Hɦ n#7&&E [HH_c@IS] {k͏lJI^;~630뿁S!-Us}b?2Bs,)PPPɅx M;&9?^^PyqKėʨ*Gb51ZA'ZΑeOU9rẅ{9IIVECN{⤶|$k'R c ?vsL{1j?z^4َ<^徦dprp&Px XmCFp?TSngr4x(5gLcBUG@ïҳvJe4iaMqpV6Rφtb803P5[j\UZ-݋KDDsJ,;؟L§>3M8XWQOdjԻ/yz#H;;~*`c'P1Lz)% n8GJa\:v3"4l4j,6Ї2lb4w->ҬRQEUpJ) iө:V HḔd$>:)Dr~gbO^a5̑qM V,-}sf?3NҎ[v7Ưj7,8psTKhB{b?sqgQ&d(TC5S#Ȕzdҹ6gm1HІy rs-!YɑA9msc`GQÅRW>=OZ$Eʂs('gO3aAF]1[)e?yM.To\d=*yE{4f2*zs6v@˜Vv1gj8iKx[rD+^CK@wsTaYqʏEdvOD\֠]ϻιCQ~2jTE:vձf,R'ҪxŲsi^9bQf|ȖQ+Z"tcUm8q1ܪ T`cfhƶLq< uܠYXz{O܇ڏ2P/ޣ"PPqQg&zԂNJ*INQ JQIڎ1=i>RQkylin-ai-data-management-service-1.2.0.0/test/CMakeLists.txt000066400000000000000000000053421520577635400236050ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.5) project(UnitTest) set(TEST_TARGET UnitTest) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) file(GLOB_RECURSE SOURCE_LIST ../src/aiIndex/*.cpp ../src/controller/*.cpp ../src/dao/*.cpp ../src/service/*.cpp) find_package(PkgConfig REQUIRED) pkg_check_modules(BUSINESS_FRAMEWORK REQUIRED IMPORTED_TARGET kyai-business-framework) pkg_check_modules(DOCUMENT_SERVICE REQUIRED IMPORTED_TARGET kylin-ai-document-service) pkg_check_modules(GIO REQUIRED gio-unix-2.0) pkg_check_modules(UUID REQUIRED IMPORTED_TARGET uuid) pkg_check_modules(NLP REQUIRED IMPORTED_TARGET kysdk-genai-nlp) find_package(GTest REQUIRED) find_package(nlohmann_json REQUIRED) find_package(SQLite3 REQUIRED) if(SQLite3_VERSION VERSION_GREATER_EQUAL "3.42.0") message(STATUS "SQLite3 version ${SQLite3_VERSION} >= 3.42.0 - Enabling database recovery features") set(SQLITE_RECOVERY_SOURCES ../src/sqliteRecover/dbdata.c ../src/sqliteRecover/sqlite3recover.c ../src/sqliteRecover/recoverdb.cpp) else() message(WARNING "SQLite3 version ${SQLite3_VERSION} is too old (need >= 3.42.0) - Disabling database recovery features") set(SQLITE_RECOVERY_SOURCES "") endif() include_directories( ../src ../src/service ../src/dao ../src/aiIndex ../src/sqliteRecover ${GIO_INCLUDE_DIRS} ${NLP_INCLUDE_DIRS} ) add_executable(${TEST_TARGET} ${SOURCE_LIST} ${SQLITE_RECOVERY_SOURCES} dao/testVector.cpp dao/testEmbedding.cpp dao/testDocumentParse.cpp dao/testFileDatabase.cpp service/testTagService.cpp service/testImageService.cpp service/testfileInfoService.cpp testController.cpp aiIndex/testTextEmbeddingTask.cpp aiIndex/testBaseTask.cpp aiIndex/testTagTask.cpp aiIndex/testSummaryTask.cpp aiIndex/testAIIndex.cpp ) if(SQLite3_VERSION VERSION_GREATER_EQUAL "3.42.0") target_compile_definitions(${TEST_TARGET} PRIVATE ENABLE_SQLITE_DB_RECOVERY) endif() target_link_libraries(${TEST_TARGET} PRIVATE GTest::GTest GTest::Main PRIVATE PkgConfig::BUSINESS_FRAMEWORK PRIVATE PkgConfig::DOCUMENT_SERVICE PRIVATE nlohmann_json::nlohmann_json PRIVATE PkgConfig::UUID PRIVATE PkgConfig::NLP PRIVATE ${GIO_LIBRARIES} PRIVATE pthread PRIVATE ${SQLite3_LIBRARIES} ) # 解决龙芯架构不支持 fstream 标准库问题 if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9) target_link_libraries(${TEST_TARGET} PRIVATE stdc++fs) endif () endif () add_test(NAME ${TEST_TARGET} COMMAND ${TEST_TARGET})kylin-ai-data-management-service-1.2.0.0/test/aiIndex/000077500000000000000000000000001520577635400224225ustar00rootroot00000000000000kylin-ai-data-management-service-1.2.0.0/test/aiIndex/testAIIndex.cpp000066400000000000000000000045041520577635400253120ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include #include "aiIndex.h" #include "systemCallback.h" class AIIndexTest : public testing::Test { public: // 所有案例执行之前 static void SetUpTestSuite(); // 所有案例执行之后 static void TearDownTestSuite(); protected: // 每个案例之前执行 void SetUp() override { std::cout << "AIIndexTest SetUp..." << std::endl; } // 每个案例之后执行 void TearDown() override { std::cout << "AIIndexTest TearDown..." << std::endl; } private: }; void AIIndexTest::SetUpTestSuite() { std::cout << "AIIndexTest SetUpTestSuite..." << std::endl; } void AIIndexTest::TearDownTestSuite() { std::cout << "AIIndexTest TearDownTestSuite..." << std::endl; } TEST_F(AIIndexTest, test01) { // 测试AIIndex 启动 EXPECT_EQ(DataManagement::AIIndex::getInstance().exec(), 0); EXPECT_FALSE(DataManagement::AIIndex::getInstance().isIdle()); // 由于当前在测试肯定为非空闲 } TEST_F(AIIndexTest, test02) { // 测试状态机 /* * system-signal idle-status * 0 0 忙碌 * 3 1 空闲 * 在收到系统空闲信号后,等待一分钟进行真正的空闲 idle-status 被设置成2 * 状态机的切换逻辑:单向 */ // 由于获得不了 IdleState的状态,所以这个函数先不测 DataManagement::AIIndex::getInstance().setIdleState(DataManagement::IdleState::IDLE_STATE_BUSY); } TEST_F(AIIndexTest, test03) { // 测试索引开关 DataManagement::AIIndex::getInstance().isEnable(); DataManagement::AIIndex::getInstance().setAIIndexEnable(false); } kylin-ai-data-management-service-1.2.0.0/test/aiIndex/testBaseTask.cpp000066400000000000000000000062641520577635400255330ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include #include "baseTask.h" #include "nlpBaseTask.h" #include "summaryTask.h" #include "tagTask.h" #include "textEmbeddingTask.h" class BaseTaskTest : public testing::Test { public: // 所有案例执行之前 static void SetUpTestSuite(); // 所有案例执行之后 static void TearDownTestSuite(); protected: // 每个案例之前执行 void SetUp() override { std::cout << "BaseTaskTest SetUp..." << std::endl; } // 每个案例之后执行 void TearDown() override { std::cout << "BaseTaskTest TearDown..." << std::endl; } private: }; void BaseTaskTest::SetUpTestSuite() { std::cout << "BaseTaskTest SetUpTestSuite..." << std::endl; } void BaseTaskTest::TearDownTestSuite() { std::cout << "BaseTaskTest TearDownTestSuite..." << std::endl; } // 测试子类继承 BaseTask namespace { class TestClass : public DataManagement::BaseTask { public: explicit TestClass() : DataManagement::BaseTask("test01") {} void handlerTask() override { std::cout << "TestClass handlerTask..." << std::endl; } void stopTask() override { std::cout << "TestClass stopTask..." << std::endl; } bool haveTask() override { return true; } bool isReadyToStart() override { return true; } bool isReadyToStop() override { return true; } }; } // namespace name // 测试基类中的虚函数 TEST_F(BaseTaskTest, test01) { // 测试开启任务 std::shared_ptr base = std::make_shared(); EXPECT_TRUE(base->haveTask()); EXPECT_TRUE(base->isReadyToStart()); EXPECT_TRUE(base->isReadyToStop()); base->handlerTask(); base->stopTask(); } // 通过基类测试所有子类的公共部分 TEST_F(BaseTaskTest, test02) { // 测试开启任务 using namespace DataManagement; const std::map> taskTypeMap = { {TASK_TYPE_TEXT_EMBEDDING, std::make_shared()}, {TASK_TYPE_TAG, std::make_shared()}, {TASK_TYPE_SUMMARY, std::make_shared()}, }; std::vector> taskLists; std::vector taskType = {1, 2, 3}; // 1 向量化 2 标签 3 摘要 for (const auto& item : taskType) { const std::map>::const_iterator& it = taskTypeMap.find(item); if (it == taskTypeMap.end()) { continue; } taskLists.push_back(it->second); } for (const auto& worker : taskLists) { worker->start(); } }kylin-ai-data-management-service-1.2.0.0/test/aiIndex/testSummaryTask.cpp000066400000000000000000000030371520577635400263110ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include #include "summaryTask.h" class testSummaryTask : public testing::Test { public: // 所有案例执行之前 static void SetUpTestSuite(); // 所有案例执行之后 static void TearDownTestSuite(); protected: // 每个案例之前执行 void SetUp() override { std::cout << "testSummaryTask SetUp..." << std::endl; } // 每个案例之后执行 void TearDown() override { std::cout << "testSummaryTask TearDown..." << std::endl; } private: }; void testSummaryTask::SetUpTestSuite() { std::cout << "testSummaryTask SetUpTestSuite..." << std::endl; } void testSummaryTask::TearDownTestSuite() { std::cout << "testSummaryTask TearDownTestSuite..." << std::endl; } TEST_F(testSummaryTask, SummaryTask) { DataManagement::SummaryTask task; // task.setPrompt(); // task.worker(); }kylin-ai-data-management-service-1.2.0.0/test/aiIndex/testTagTask.cpp000066400000000000000000000027631520577635400253740ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include #include "tagTask.h" class testTagTask : public testing::Test { public: // 所有案例执行之前 static void SetUpTestSuite(); // 所有案例执行之后 static void TearDownTestSuite(); protected: // 每个案例之前执行 void SetUp() override { std::cout << "testTagTask SetUp..." << std::endl; } // 每个案例之后执行 void TearDown() override { std::cout << "testTagTask TearDown..." << std::endl; } private: }; void testTagTask::SetUpTestSuite() { std::cout << "testTagTask SetUpTestSuite..." << std::endl; } void testTagTask::TearDownTestSuite() { std::cout << "testTagTask TearDownTestSuite..." << std::endl; } TEST_F(testTagTask, TagTask) { DataManagement::TagTask task; // task.setPrompt(); // task.worker(); }kylin-ai-data-management-service-1.2.0.0/test/aiIndex/testTextEmbeddingTask.cpp000066400000000000000000000032021520577635400273710ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include #include "textEmbeddingTask.h" class testTextEmbeddingTask : public testing::Test { public: // 所有案例执行之前 static void SetUpTestSuite(); // 所有案例执行之后 static void TearDownTestSuite(); protected: // 每个案例之前执行 void SetUp() override { std::cout << "testTextEmbeddingTask SetUp..." << std::endl; } // 每个案例之后执行 void TearDown() override { std::cout << "testTextEmbeddingTask TearDown..." << std::endl; } private: }; void testTextEmbeddingTask::SetUpTestSuite() { std::cout << "testTextEmbeddingTask SetUpTestSuite..." << std::endl; } void testTextEmbeddingTask::TearDownTestSuite() { std::cout << "testTextEmbeddingTask TearDownTestSuite..." << std::endl; } TEST_F(testTextEmbeddingTask, TextEmbeddingTask) { DataManagement::TextEmbeddingTask task; // task.haveTask(); // task.handlerTask(); // task.stopTask(); }kylin-ai-data-management-service-1.2.0.0/test/dao/000077500000000000000000000000001520577635400216045ustar00rootroot00000000000000kylin-ai-data-management-service-1.2.0.0/test/dao/testDocumentParse.cpp000066400000000000000000000074451520577635400257730ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include #include #include #include "documentParse.h" class DocumentParseTest : public testing::Test { public: static void SetUpTestSuite() { std::cout << "DocumentParseTest SetUpTestSuite..." << std::endl; } static void TearDownTestSuite() { std::cout << "DocumentParseTest TearDownTestSuite..." << std::endl; } protected: void SetUp() override { std::cout << "DocumentParseTest SetUp..." << std::endl; namespace fs = std::filesystem; // 获取测试文件路径 fs::path dir = fs::path(__FILE__).parent_path().parent_path(); testFilePath_ = (dir / "hand.docx").string(); } void TearDown() override { std::cout << "DocumentParseTest TearDown..." << std::endl; } std::string testFilePath_; }; // TEST_F(DocumentParseTest, test01) { // // 测试使用默认分块大小 // DocumentParse documentParse; // nlohmann::json ret = documentParse.syncChunkDocument({testFilePath_}); // EXPECT_FALSE(ret.empty()); // EXPECT_TRUE(ret.is_array()); // // 检查返回的数据结构是否正确 // for (const auto& item : ret) { // EXPECT_TRUE(item.contains("filePath")); // EXPECT_TRUE(item.contains("chunks")); // EXPECT_TRUE(item["chunks"].is_array()); // } // } // TEST_F(DocumentParseTest, test02) { // // 测试使用自定义分块大小 // DocumentParse documentParse; // nlohmann::json ret = documentParse.syncChunkDocument({testFilePath_}, 256); // EXPECT_FALSE(ret.empty()); // EXPECT_TRUE(ret.is_array()); // // 检查返回的数据结构是否正确 // for (const auto& item : ret) { // EXPECT_TRUE(item.contains("filePath")); // EXPECT_TRUE(item.contains("chunks")); // EXPECT_TRUE(item["chunks"].is_array()); // } // } // TEST_F(DocumentParseTest, test03) { // // 测试处理无效文件路径 // DocumentParse documentParse; // nlohmann::json ret = documentParse.syncChunkDocument({"/invalid/path/file.txt"}); // std::cout << ret << std::endl; // if (!ret.empty()) { // // 检查返回的数据结构是否正确 // for (const auto& item : ret) { // EXPECT_TRUE(item.contains("filePath")); // EXPECT_TRUE(item.contains("chunks")); // EXPECT_TRUE(item["chunks"].is_array()); // EXPECT_TRUE(item["chunks"].empty()); // 无效路径 虽然会有返回值,但chunks // } // } // } // TEST_F(DocumentParseTest, test04) { // // 测试处理多个文件的情况 // DocumentParse documentParse; // const std::vector& filePaths = {testFilePath_, testFilePath_, testFilePath_, testFilePath_, // testFilePath_, testFilePath_, testFilePath_, testFilePath_}; // nlohmann::json ret = documentParse.syncChunkDocument(filePaths); // EXPECT_FALSE(ret.empty()); // EXPECT_EQ(ret.size(), filePaths.size()); // for (const auto& item : ret) { // EXPECT_TRUE(item.contains("filePath")); // EXPECT_TRUE(item.contains("chunks")); // } // }kylin-ai-data-management-service-1.2.0.0/test/dao/testEmbedding.cpp000066400000000000000000000143751520577635400251000ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include #include #include #include #include "constant.h" #include "embedding.h" // 辅助函数(保持不变) std::vector readFile(const std::string &filePath) { std::ifstream file(filePath, std::ios::binary | std::ios::ate); if (!file.is_open()) { throw std::runtime_error("Failed to open file"); } std::streamsize size = file.tellg(); file.seekg(0, std::ios::beg); std::vector buffer(size); if (!file.read(reinterpret_cast(buffer.data()), size)) { throw std::runtime_error("Failed to read file"); } return buffer; } const std::string base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789+/"; std::string base64Encode(const std::vector &buffer) { std::string encodedData; int i = 0; uint8_t char_array_3[3]; uint8_t char_array_4[4]; while (i < buffer.size()) { char_array_3[0] = buffer[i++]; char_array_3[1] = (i < buffer.size()) ? buffer[i++] : 0; char_array_3[2] = (i < buffer.size()) ? buffer[i++] : 0; char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); char_array_4[3] = (char_array_3[2] & 0x3f); for (int j = 0; j < 4; ++j) { encodedData += base64_chars[char_array_4[j]]; } } while ((encodedData.size() % 4) != 0) { encodedData += '='; } return encodedData; } class EmbeddingTest : public testing::Test { protected: void SetUp() override { // 准备测试图片路径 namespace fs = std::filesystem; fs::path dir = fs::path(__FILE__).parent_path().parent_path(); testImagePath_ = (dir / "20250626.jpg").string(); // 检查测试文件是否存在 if (!fs::exists(testImagePath_)) { throw std::runtime_error("Test image file not found: " + testImagePath_); } } std::string testImagePath_; }; // TEST_F(EmbeddingTest, test01) { // // 测试有效数据 // Embedding embedder(DataManagementEmbeddingDataType::Text, scene::TaskPriority::HIGH); // std::string testText = "This is a test sentence for embedding."; // auto result = embedder.getEmbedding(testText); // EXPECT_FALSE(result.empty()) << "Text embedding result should not be empty"; // EXPECT_GT(result.size(), 0) << "Text embedding vector should have positive size"; // } // TEST_F(EmbeddingTest, test02) { // // 测试空字符串 // Embedding embedder(DataManagementEmbeddingDataType::Text, scene::TaskPriority::HIGH); // auto result = embedder.getEmbedding(""); // EXPECT_TRUE(result.empty()) << "Empty input should return empty embedding"; // } // TEST_F(EmbeddingTest, test03) { // // 测试有效数据 // Embedding embedder(DataManagementEmbeddingDataType::ImageText, scene::TaskPriority::HIGH); // std::string testText = "Text description of an image"; // auto result = embedder.getEmbedding(testText); // // 虽然实际可能调用文本处理器,但测试接口行为 // EXPECT_FALSE(result.empty()) << "ImageText embedding result should not be empty"; // } // TEST_F(EmbeddingTest, test04) { // // 测试有效数据 // Embedding embedder(DataManagementEmbeddingDataType::ImageFlie, scene::TaskPriority::HIGH); // auto result = embedder.getEmbedding(testImagePath_); // EXPECT_FALSE(result.empty()) << "Image file embedding result should not be empty"; // EXPECT_GT(result.size(), 0) << "Image embedding vector should have positive size"; // } // TEST_F(EmbeddingTest, test05) { // // 测试无效数据 // Embedding embedder(DataManagementEmbeddingDataType::ImageFlie, scene::TaskPriority::HIGH); // std::string invalidPath = "/nonexistent/path/to/image.jpg"; // auto result = embedder.getEmbedding(invalidPath); // EXPECT_TRUE(result.empty()) << "Invalid image path should return empty embedding"; // } // TEST_F(EmbeddingTest, test06) { // // data-management目前没用到 ImageBase64,虽然接口可以传入这个函数,但是调用的却是ImageFlie // Embedding embedder(DataManagementEmbeddingDataType::ImageBase64, scene::TaskPriority::HIGH); // auto imageData = readFile(testImagePath_); // std::string base64Data = base64Encode(imageData); // auto result = embedder.getEmbedding(base64Data); // // 这里返回值为空,以后要是实现了base64,这里要改成 EXPECT_FALSE // EXPECT_TRUE(result.empty()); // } // TEST_F(EmbeddingTest, test07) { // // 传入一些没有意义的枚举值 // auto invalidType = static_cast(99); // Embedding embedder(invalidType, scene::TaskPriority::HIGH); // auto result = embedder.getEmbedding("any data"); // EXPECT_TRUE(result.empty()) << "Unknown data type should return empty embedding"; // } // TEST_F(EmbeddingTest, test08) { // // 测试不同优先级是否会影响结果 // Embedding highPriority(DataManagementEmbeddingDataType::Text, scene::TaskPriority::HIGH); // Embedding lowPriority(DataManagementEmbeddingDataType::Text, scene::TaskPriority::LOW); // std::string testText = "Testing priority handling"; // auto highResult = highPriority.getEmbedding(testText); // auto lowResult = lowPriority.getEmbedding(testText); // EXPECT_EQ(highResult.size(), lowResult.size()) << "Same input should produce same output size"; // }kylin-ai-data-management-service-1.2.0.0/test/dao/testFileDatabase.cpp000066400000000000000000000162741520577635400255260ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include #include #include #include #include #include #include #include "constant.h" #include "fileDatabase.h" std::string joinIds(const std::unordered_set& ids) { std::string result = std::accumulate(ids.begin(), ids.end(), std::string(), [](const std::string& params, int64_t id) { return params.empty() ? std::to_string(id) : params + ", " + std::to_string(id); }); return result; } class FileDatabaseTest : public testing::Test { protected: static void SetUpTestSuite() { std::cout << "FileDatabaseTest SetUpTestSuite..." << std::endl; std::string dbPath = FileDatabase::getDbFilePath(); if (!dbPath.empty()) { std::filesystem::remove(dbPath); } } static void TearDownTestSuite() { std::cout << "FileDatabaseTest TearDownTestSuite..." << std::endl; } void SetUp() override { std::cout << "FileDatabaseTest SetUp..." << std::endl; db = std::make_unique(FileDatabase::createDatabase()); } void TearDown() override { std::cout << "FileDatabaseTest TearDown..." << std::endl; } std::unique_ptr db; }; TEST_F(FileDatabaseTest, test01) { // 测试表是否存在 std::string dbPath = FileDatabase::getDbFilePath(); EXPECT_FALSE(dbPath.empty()); EXPECT_TRUE(std::filesystem::exists(dbPath)); EXPECT_TRUE(db->tableExists("t_file_info")); EXPECT_TRUE(db->tableExists("t_tag_info")); EXPECT_TRUE(db->tableExists("t_file_tag_info")); EXPECT_TRUE(db->tableExists("t_ai_index_task")); } TEST_F(FileDatabaseTest, test02) { // 测试插入文件 FileInfoDO fileInfo; fileInfo.filePath = "/test/path1"; fileInfo.enable = 1; fileInfo.modifyTime = "2025-06-30 12:00:00"; EXPECT_TRUE(FileDatabase::insertFileInfo(*db, fileInfo)); EXPECT_GT(fileInfo.fileId, 0); FileInfoDO retrieved; EXPECT_TRUE(FileDatabase::getSingleFileInfo(*db, "/test/path1", retrieved)); EXPECT_EQ(retrieved.filePath, "/test/path1"); EXPECT_EQ(retrieved.fileSummary, ""); // 一开始是空的,因为是闲时任务,insertFileInfo时不会去插入摘要 FileInfoDO updated = retrieved; updated.fileSummary = "updated_summary"; EXPECT_TRUE(FileDatabase::updateFileInfo(*db, updated)); FileInfoDO verified; EXPECT_TRUE(FileDatabase::getSingleFileInfo(*db, "/test/path1", verified)); EXPECT_EQ(verified.fileSummary, "updated_summary"); auto allFiles = FileDatabase::getAllFileInfos(*db); EXPECT_GT(allFiles.size(), 0); std::string path = FileDatabase::getFilePathById(*db, fileInfo.fileId); EXPECT_EQ(path, "/test/path1"); } TEST_F(FileDatabaseTest, test03) { // 测试插入标签 FileInfoDO fileInfo; fileInfo.filePath = "/test/tag_file"; fileInfo.enable = 1; fileInfo.modifyTime = "2025-06-30 12:00:00"; EXPECT_TRUE(FileDatabase::insertFileInfo(*db, fileInfo)); EXPECT_GT(fileInfo.fileId, 0); int64_t tagId; EXPECT_TRUE(FileDatabase::insertNewTagName(*db, fileInfo.fileId, "test_tag", tagId)); EXPECT_GT(tagId, 0); int64_t retrievedTagId; EXPECT_TRUE(FileDatabase::getTagIdByTagName(*db, "test_tag", retrievedTagId)); EXPECT_EQ(retrievedTagId, tagId); auto tags = FileDatabase::getFileTags(*db, "/test/tag_file"); EXPECT_EQ(tags.size(), 1); EXPECT_EQ(tags[0], "test_tag"); auto allTags = FileDatabase::getAllTags(*db); EXPECT_GE(allTags.size(), 1); EXPECT_TRUE(FileDatabase::isTagInUse(*db, tagId)); EXPECT_TRUE(FileDatabase::deleteFileTagRelation(*db, fileInfo.fileId, tagId)); EXPECT_TRUE(FileDatabase::deleteTagInfo(*db, tagId)); EXPECT_FALSE(FileDatabase::isTagInUse(*db, tagId)); } TEST_F(FileDatabaseTest, test04) { // 测试文件和标签关系 FileInfoDO fileInfo; fileInfo.filePath = "/test/relation_file"; fileInfo.enable = 1; fileInfo.modifyTime = "2025-06-30 12:00:00"; EXPECT_TRUE(FileDatabase::insertFileInfo(*db, fileInfo)); int64_t tagId1, tagId2; EXPECT_TRUE(FileDatabase::insertNewTagName(*db, fileInfo.fileId, "tag1", tagId1)); EXPECT_TRUE(FileDatabase::insertNewTagName(*db, fileInfo.fileId, "tag2", tagId2)); auto tagIds = FileDatabase::getTagIdsByFileId(*db, fileInfo.fileId); EXPECT_EQ(tagIds.size(), 2); std::unordered_set ids{tagId1, tagId2}; auto filePaths = FileDatabase::getFilePathsByTagIds(*db, ids); EXPECT_EQ(filePaths.size(), 2); EXPECT_EQ(filePaths[0], "/test/relation_file"); auto tagNames = FileDatabase::getTagNamesByTagIds(*db, ids); EXPECT_EQ(tagNames.size(), 2); EXPECT_TRUE(std::find(tagNames.begin(), tagNames.end(), "tag1") != tagNames.end()); EXPECT_TRUE(std::find(tagNames.begin(), tagNames.end(), "tag2") != tagNames.end()); } TEST_F(FileDatabaseTest, test05) { // 测试删除文件 FileInfoDO fileInfo1; fileInfo1.filePath = "/test/transaction1"; fileInfo1.enable = 1; fileInfo1.modifyTime = "2025-06-30 12:00:00"; FileInfoDO fileInfo2; fileInfo2.filePath = "/test/transaction2"; fileInfo2.enable = 1; fileInfo2.modifyTime = "2025-06-30 12:00:00"; EXPECT_TRUE(FileDatabase::insertFileInfo(*db, fileInfo1)); EXPECT_TRUE(FileDatabase::insertFileInfo(*db, fileInfo2)); auto filesBefore = FileDatabase::getAllFileInfos(*db); EXPECT_GE(filesBefore.size(), 2); std::unordered_set idsToDelete; for (const auto& f : filesBefore) { if (f.filePath == "/test/transaction1" || f.filePath == "/test/transaction2") { idsToDelete.insert(f.fileId); } } std::string expression = joinIds(idsToDelete); EXPECT_TRUE(FileDatabase::deleteByFileIds(*db, expression)); auto filesAfter = FileDatabase::getAllFileInfos(*db); for (const auto& id : idsToDelete) { EXPECT_TRUE(FileDatabase::getFilePathById(*db, id).empty()); // 删除成功,这里为空 } } TEST_F(FileDatabaseTest, test06) { // 测试不存在的数据 // 测试查询不存在的文件 FileInfoDO nonExistent; EXPECT_FALSE(FileDatabase::getSingleFileInfo(*db, "/non/existent/path", nonExistent)); // 测试获取不存在的文件路径 EXPECT_TRUE(FileDatabase::getFilePathById(*db, 999999).empty()); // 测试获取不存在的标签 int64_t dummyTagId; EXPECT_FALSE(FileDatabase::getTagIdByTagName(*db, "non_existent_tag", dummyTagId)); // 测试检查未使用的标签 EXPECT_FALSE(FileDatabase::isTagInUse(*db, 999999)); } kylin-ai-data-management-service-1.2.0.0/test/dao/testVector.cpp000066400000000000000000000210551520577635400244550ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include #include #include #include #include "constant.h" #include "dataManagementVectorDatabase.h" #include "embedding.h" static std::string generateUUID() { uuid_t uuid; char uuidStr[37]; uuid_generate_random(uuid); uuid_unparse(uuid, uuidStr); return uuidStr; } void replaceDashWithUnderscore(std::string& str) { std::replace(str.begin(), str.end(), '-', '_'); } class DataManagementVectorDatabaseTest : public testing::Test { public: static void SetUpTestSuite() { std::cout << "DataManagementVectorDatabaseTest SetUpTestSuite..." << std::endl; } static void TearDownTestSuite() { std::cout << "DataManagementVectorDatabaseTest TearDownTestSuite..." << std::endl; } protected: void SetUp() override { std::cout << "DataManagementVectorDatabaseTest SetUp..." << std::endl; // 使用不同的集合名避免测试间干扰 collectionName = "test_collection_" + generateUUID(); replaceDashWithUnderscore(collectionName); // 向量集合名之间要用下划线 vectorDB = std::make_unique(collectionName); // 准备测试嵌入向量 testEmbedding = Embedding(DataManagementEmbeddingDataType::Text, scene::TaskPriority::HIGH) .getEmbedding("This is a test sentence for vector database."); if (testEmbedding.empty()) { FAIL() << "Failed to generate test embedding"; } } void TearDown() override { std::cout << "DataManagementVectorDatabaseTest TearDown..." << std::endl; } std::unique_ptr vectorDB; std::string collectionName; std::vector testEmbedding; }; // TEST_F(DataManagementVectorDatabaseTest, test01) { // // 测试创建集合 // EXPECT_TRUE(vectorDB != nullptr); // // 尝试插入数据验证集合是否存在 // std::string uuid = generateUUID(); // nlohmann::json metadata; // metadata["test_key"] = "test_value"; // bool insertResult = vectorDB->insertVectorData({DataManagementVectorInfo{uuid, testEmbedding, metadata}}); // EXPECT_TRUE(insertResult) << "Insert failed, collection might not exist"; // // 清理测试数据 // EXPECT_TRUE(vectorDB->deleteVectorData("id in ['" + uuid + "']")); // } // TEST_F(DataManagementVectorDatabaseTest, test02) { // // 测试插入与搜索功能 // std::vector testData; // const int testDataSize = 5; // for (int i = 0; i < testDataSize; ++i) { // std::string uuid = generateUUID(); // nlohmann::json metadata; // metadata["index"] = i; // metadata["content"] = "Test content " + std::to_string(i); // // 对每条数据做轻微修改以增加多样性 // std::vector modifiedEmbedding = testEmbedding; // if (i > 0) { // modifiedEmbedding[0] += static_cast(i) * 0.01f; // } // testData.emplace_back(DataManagementVectorInfo{uuid, modifiedEmbedding, metadata}); // } // // 插入数据 // EXPECT_TRUE(vectorDB->insertVectorData(testData)); // // 搜索测试 - 使用第一条数据的嵌入向量作为查询 // auto searchResult = vectorDB->searchMetadata(testData[0].embedding, testDataSize); // // 验证搜索结果 // EXPECT_GE(searchResult.size(), 1) << "Should find at least the exact match"; // // 验证第一条结果是否匹配(相似度应该很高,这里限制的是大于等于0.8) // EXPECT_GE(searchResult[0].similarity, 0.8f) << "Exact match similarity should be 0.8"; // EXPECT_EQ(searchResult[0].metadata["index"], 0) << "Metadata should match inserted data"; // // 验证结果顺序(相似度应该从高到低排序) // for (size_t i = 1; i < searchResult.size(); ++i) { // EXPECT_LE(searchResult[i].similarity, searchResult[i - 1].similarity) // << "Results should be sorted by similarity descending"; // } // // 清理测试数据 // std::string expression = "id in ["; // for (const auto& data : testData) { // expression += "'" + data.id + "',"; // } // expression.back() = ']'; // 替换最后一个逗号 // EXPECT_TRUE(vectorDB->deleteVectorData(expression)); // } // TEST_F(DataManagementVectorDatabaseTest, test03) { // // 测试搜索阈值 // std::string uuid1 = generateUUID(); // std::string uuid2 = generateUUID(); // nlohmann::json metadata1; // metadata1["type"] = "type1"; // nlohmann::json metadata2; // metadata2["type"] = "type2"; // // 第二条数据的嵌入向量做较大修改 // std::vector embedding2 = testEmbedding; // for (auto& val : embedding2) { // val += 0.5f; // 增加较大偏移 // } // EXPECT_TRUE(vectorDB->insertVectorData({DataManagementVectorInfo{uuid1, testEmbedding, metadata1}, // DataManagementVectorInfo{uuid2, embedding2, metadata2}})); // // 使用高阈值搜索 - 应该只返回非常相似的结果 // auto highThresholdResult = vectorDB->searchMetadata(testEmbedding, 10, 0.8f); // EXPECT_EQ(highThresholdResult.size(), 1) << "High threshold should filter out dissimilar results"; // EXPECT_EQ(highThresholdResult[0].id, uuid1) << "Should return only the exact match"; // // 使用低阈值搜索 - 应该返回所有结果 // auto lowThresholdResult = vectorDB->searchMetadata(testEmbedding, 10, 0.0f); // EXPECT_GE(lowThresholdResult.size(), 2) << "Low threshold should return all results"; // // 清理 // EXPECT_TRUE(vectorDB->deleteVectorData("id in ['" + uuid1 + "', '" + uuid2 + "']")); // } // TEST_F(DataManagementVectorDatabaseTest, test04) { // // 测试删除操作 // std::string uuid1 = generateUUID(); // std::string uuid2 = generateUUID(); // nlohmann::json metadata; // metadata["test"] = "delete_test"; // EXPECT_TRUE(vectorDB->insertVectorData({DataManagementVectorInfo{uuid1, testEmbedding, metadata}, // DataManagementVectorInfo{uuid2, testEmbedding, metadata}})); // // 验证数据已插入 // auto beforeDelete = vectorDB->searchMetadata(testEmbedding); // EXPECT_GE(beforeDelete.size(), 2); // // 删除第一条数据 // EXPECT_TRUE(vectorDB->deleteVectorData("id == '" + uuid1 + "'")); // // 验证删除结果 // auto afterDelete = vectorDB->searchMetadata(testEmbedding); // bool foundRemaining = false; // bool foundDeleted = false; // for (const auto& result : afterDelete) { // if (result.id == uuid1) foundDeleted = true; // if (result.id == uuid2) foundRemaining = true; // } // EXPECT_FALSE(foundDeleted) << "Deleted item should not appear in search results"; // EXPECT_TRUE(foundRemaining) << "Other items should still exist"; // // 清理剩余数据 // EXPECT_TRUE(vectorDB->deleteVectorData("id == '" + uuid2 + "'")); // } // TEST_F(DataManagementVectorDatabaseTest, test05) { // // 测试插入 搜索空数据 // auto emptyResult = vectorDB->searchMetadata({}); // EXPECT_TRUE(emptyResult.empty()) << "Search with empty vector should return empty result"; // // 测试删除不存在的数据 // EXPECT_TRUE(vectorDB->deleteVectorData("id == 'non_existent_id'")) << "Delete non-existent data should not fail"; // // 测试插入空向量 // EXPECT_FALSE(vectorDB->insertVectorData({DataManagementVectorInfo{"", std::vector{}, nlohmann::json{}}})) // << "Insert empty data should fail"; // // 测试插入不匹配维度的向量 // std::vector wrongDimVector(10, 0.0f); // EXPECT_FALSE( // vectorDB->insertVectorData({DataManagementVectorInfo{generateUUID(), wrongDimVector, nlohmann::json{}}})) // << "Insert vector with wrong dimension should fail"; // }kylin-ai-data-management-service-1.2.0.0/test/hand.docx000066400000000000000000000270121520577635400226340ustar00rootroot00000000000000PK N@ docProps/PKN@JK\pdocProps/app.xmlQo0ߗ?ޡ)5Ӳ4͠mj߯E9=ڃΠ "0=L\ԙ/6W0<>ୖ `<!L7֪B5QpJZ'udUqkN(!X%ClZJ0t{ӆF7oi  >H]a4 8o̺͑o\ij~̑…-x;F[.hkѧ*亇5GlnlS|RZ79lwO'8aq2;nI9 VqQhEIh];`'X?5KPKN@m8C}docProps/core.xmlAO &{ ̢M%jvr3omdB=x><U'ƊF(M@ /vߢ:8%Uu}U0i4'Fޤltc M<|kΏf5eGQhTr6)gjioցC2#pgݜÉmv1Syz T u#/]vd71Yb|BjLuk 1HM Ma(5ITT_PKN@ߑ& docProps/custom.xmlN0EHCGȫJR5q+! Qۑ*Ŀ b;:sfX18 cJ@B /v3Xת%8 U`$ xg$eLUIx̲C\H>$f{2NY."ܯ`/TrYA"P2\r) O}ڇ*!:x/<@Q,y xKhWsbR 9́iGA,+δym[^ZC 8q6!.C:C'ĠI_{n4;9GNy;G. $EyhXаa C%( KP 4,A9 !+6|B<9rJ7=*"/j4vՈnu̴;z;M\x ,Zx#z, Aƿh_pC#||VD"^Rs\Oْ'-B4T+k j dM)ĽnERDFTGr%hܳ4_ n]6<+멜^S,2WT|_w~8dIA;*j0˄Ann5=2Ŗ+O?/2|HY 4YGѨE$'S~!W@tvtO=̊yN#935 % SBA3jܾ1%H$ڿfŰ/cLi0- F&_2䚇v1U.W3"MX&v]MX:>cS?DĞ6K54? 7[LN5klZwa `$76d0mjufOΩ(f,N)'5R H?!(yOzĥ5x@!5BpRS"lkbkkiJ03SZ3Jt3=A{hg ,ߍdnݠA'wN,O=X@{> Y!Y*]GZN_ cÒ+ebUM%r8r[Oދ*f5+'z-S E׮VȚ:lòT^2X/~oXa7XrϞcva[y K=|y]j$ n(\"as;gO,9AK-%),p,&Y؂C"CڡD{Pw(2ra,X"g97YD$ro,Mo,MxclIE}Cb Юxc>ra,X"g97YD$ro, F\6mh4-X"<4DmF7򹬣xcYlo,-:}ˆ7YD%ro,xcI&Xxclhj[D6yh$roGƲ+X[t˅o,xcK,X"g97M$rٴԶxclI.YehW,7 #X.g97YD%ro$xcI0iCm!j!ۻ`>R6~Zo(zjSyïfy}^A ya΀V՚x4vU|ˣr쌯1"k=XhW0^߿;Wz|scGsү (TwyR_L{~:S՛,XA ٩2ێIc([֜XS7V HyTfsţ聙e &_GFSWUiS~Fn8_"i=- jw>]Z-hLP1]q]$:Pp&-:GbvB-ڹ&OSj`tm$85Tyj)ӲK.ۧdzi2ESəVoWfa~n\jJTV捍hW(: b'E{C6Ag I•&:R۩"l֟FϭxFC׼4`B^xث:-uQ#'^T-kTJ&[ ڳ9r Fƞ͒3PX{Pr'$RѮhѭnF;PJkJ&#$T4 }. 7 et͏#@G%dx'F(*nm3Eb9^ӄFG`Q+2a~ }kA7y\ŰP^.uK%x^Y#ؓ` zf+ >K_oh[5߸ohPM ookw(b^^ ۍT uXw„XᇶKcB3_߆4Պʍ Jr U}(8_=P|op@Yk4wVU4X\jl<|aeXOOB.[dW9? '_ %Y; Pɼ4O\E~4SedNa%1~Re'{V܁َyJA4h5Zg |fEFm"j)Z/X(Lto5 m`݅M fDqͼO=x=<%‘HiF]An>N"dF=_q1' -EOíbn{ G'uPK N@ word/theme/PKN@Ѯ$word/theme/theme1.xmlYMo7X콱d#2">vDJ)-ˈ\݊X@Ѵȡ^z(HhkHS CjEJT>E}3|!WWޏw ,iK%ɐ$ ~˾'$JDY և\A21>GRkkbH\b)NوIõ#ӵR#^bp{c4"C?f;HPC{7L46BLErѦ쨏KߣHHxK_ۺ6s#*Wv]x]A1iRԶ @2S:ŸVq1}Vw;j5@GvQ%Uo5(_Yw-נ _]W*VkP-vn5($/KFk2bt oT+z|j(KM1b\Uk1x H$')!q Q2'a$4h#y64KCjFO 9Ie$E1^߾g~=~/#j%i/y<ڍ&ϟ?Wn l9Wnb3nb; J## }}(rv;D6gE|"^[(쩹0'I螜OL-]sPb3IA=e+͛%8Scb 9l$A>X47%1e"bspaԵ6>+ucjH\QL̀#H|h:BBCL .k$}ĝ:m$d3m6nE(N]I"C"&.wy@t!JjpӤ4/dU)!R$'w6 7Ho8x_TĹgvznQ[sM6rz/ߋ|EM1^a;f zgksVleAX)ɸ@x4{.Tph} A"r-UA_2KGkUb۲Bn,{5I1)&Y-k@tz5=&`h6r0CN.,kXYz+ش1BxRm w>H.KxZ@x[Ld0Yr 9O? ;m|v}AM B_ "zYd+f7Pt~)"A& A.2 d, {c,)È'zEapBe},70 ~K1 Ȕ ̐Jo]`x(iʑm tUDUTvwOB[T?֭"_ f0DN(|>[Cv^BzRPJ3ZckG SgpfBI 5*jn#O-ar%Q+o \'Jzɋ'o KQ5{zNp[R"ص921OP,d_LhM ,)y׿] ER & lOo!{U6 O^ox-xU4'T#aNv _îhظ~t`ʧI!Qh3p̚YDe&gQ䘟%ŏ_7*ۑA3pdㅨ$ᖕI,60c'\ˉCfPOs(XZP42Iilj2˙2o"ܓQTBn𤺀 5Y\Cei;SafR.r҂ԴÚ~:@g){4dE%Xs凔a#AIz`eb_T 9O%͍!WD("}!e&*Ң!@΁pq9+K4)) ZxUmC{!p26#އVRK^AJ+>$ջ@Dw )D حuy95k6ptqO8_?2]Jhȍ]8៩2&v;P)܆^H-)ܡeL?OrEk[Ƙ4ヲSbJO|ŢIK@p|MW7?_PKN@,d]word/fontTable.xmlWn@#ui -&ΤD3N~+V K$v5B5mE;3v&6@(s=s=>>{4q,Y{7gJ8MKw}{FT:0 M ϓS$38炢}"j4\9tIB#/d>&GTN #gr@26^$ۘPK L".L"Jb%p3YRG4q73.P7Ae9CblS!b\!|*G (r=) !qz17>$9ʣB$y fH'F7]x$~U&4DԖEBXـzEDu=$t@l^g(44(I%8{Ǔ`Gfh(KF)>%,VU&XME&X pViUzCt)vVNvBLV|Ol@mI#@gםvBti=*Mtc"ex!jm3 [F> LO"ZmBzR+R ;Z<{{ 2TFX )ӪUl`efD`RkwNk͕ Ԙř#AP6n&~?cU02(af. q1Z>)vU[!ĪWձ?7,3zyK^%+ u nP\6R \Y2)?Ƃ& j:IP/G]FST_YjZAoU*Mh3;E+@bf ڙCoZQ`\*fb69@{*Vv+rdeB@ȡ |u/^CIv]/B[3^LR(r1bh/R6B3YS=_xa4C[kPK N@_rels/PKN@"" _rels/.relsJ1!}7*"loDH}! L}{r2g|szstxm U Naq "3zCD62{(.>+㝔Y0W!/6$\ɈzU]ikANl;tr(Θ:b!i> rfu>JG1ے7Pay,]1<h|T|ڷ}I]7:|k"t7ã#8 !}~M,R/oua&!p\CVE1A>PKN@|I~b[Content_Types].xmln0EUb袪*>- 3~3PNaJQ7Xf Hh/E RO^{!)_)!f9Rp3w6ۘ_¿B]]u5k@lMv0NԇC;lOJ <^ mI<}/n鐲ӆPKN@|I~b ([Content_Types].xmlPK N@%_rels/PKN@""  !&_rels/.relsPK N@ docProps/PKN@JK\p 'docProps/app.xmlPKN@m8C} docProps/core.xmlPKN@ߑ&  #docProps/custom.xmlPK N@zword/PK N@ G'word/_rels/PKN@P p'word/_rels/document.xml.relsPKN@l word/document.xmlPKN@,d] "word/fontTable.xmlPKN@iՅ  lword/settings.xmlPKN@] e word/styles.xmlPK N@ word/theme/PKN@Ѯ$ word/theme/theme1.xmlPK$*kylin-ai-data-management-service-1.2.0.0/test/service/000077500000000000000000000000001520577635400225015ustar00rootroot00000000000000kylin-ai-data-management-service-1.2.0.0/test/service/testImageService.cpp000066400000000000000000000042531520577635400264540ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include #include #include "imageService.h" class ImageServiceTest : public testing::Test { public: // 所有案例执行之前 static void SetUpTestSuite(); // 所有案例执行之后 static void TearDownTestSuite(); protected: // 每个案例之前执行 void SetUp() override { std::cout << "ImageServiceTest SetUp..." << std::endl; } // 每个案例之后执行 void TearDown() override { std::cout << "ImageServiceTest TearDown..." << std::endl; } private: }; void ImageServiceTest::SetUpTestSuite() { std::cout << "ImageServiceTest SetUpTestSuite..." << std::endl; } void ImageServiceTest::TearDownTestSuite() { std::cout << "ImageServiceTest TearDownTestSuite..." << std::endl; } // TEST_F(ImageServiceTest, test01) { // namespace fs = std::filesystem; // fs::path dir = fs::path(__FILE__).parent_path().parent_path(); // fs::path path = dir / "20250626.jpg"; // ImageService imageService; // imageService.embedding({path.string()}); // } // TEST_F(ImageServiceTest, test02) { // namespace fs = std::filesystem; // fs::path dir = fs::path(__FILE__).parent_path().parent_path(); // fs::path path = dir / "20250626.jpg"; // ImageService imageService; // imageService.embedding({path.string(), path.string(), path.string(), path.string(), path.string(), path.string()}); // } // TEST_F(ImageServiceTest, test03) { // ImageService imageService; // imageService.embedding({}); // }kylin-ai-data-management-service-1.2.0.0/test/service/testTagService.cpp000066400000000000000000000077461520577635400261570ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include #include #include #include "tagService.h" static int64_t testFileId = []() { // 用当前时间戳 + 随机数生成唯一 ID return std::chrono::system_clock::now().time_since_epoch().count() + rand(); }(); static const std::vector testTags = {"test_tag1", "test_tag2", "test_tag3"}; class TagServiceTest : public testing::Test { public: static void SetUpTestSuite() { std::cout << "TagServiceTest SetUpTestSuite..." << std::endl; // 可以在这里初始化测试数据库等 } static void TearDownTestSuite() { std::cout << "TagServiceTest TearDownTestSuite..." << std::endl; // 清理测试环境 } protected: void SetUp() override { std::cout << "TagServiceTest SetUp..." << std::endl; tagService = std::make_unique(); } void TearDown() override { std::cout << "TagServiceTest TearDown..." << std::endl; // 清理单个测试用例的资源 tagService.reset(); } std::unique_ptr tagService; }; // TEST_F(TagServiceTest, test01) { // // 测试添加新标签 // tagService->updateFileTags(testFileId, {testTags[0], testTags[1]}); // auto similarTags = tagService->searchSimilarTagNames({testTags[0]}); // EXPECT_FALSE(similarTags.empty()); // EXPECT_NE(similarTags.end(), std::find(similarTags.begin(), similarTags.end(), testTags[0])); // } // TEST_F(TagServiceTest, test02) { // // 测试添加重复标签 // tagService->updateFileTags(testFileId, {testTags[0]}); // size_t beforeCount = tagService->searchSimilarTagNames({testTags[0]}).size(); // // 再次添加相同标签 // tagService->updateFileTags(testFileId, {testTags[0]}); // size_t afterCount = tagService->searchSimilarTagNames({testTags[0]}).size(); // // 验证标签数量没有增加(去重) // EXPECT_EQ(beforeCount, afterCount); // } // TEST_F(TagServiceTest, test03) { // // 先添加一些标签 // tagService->updateFileTags(testFileId, {testTags[0], testTags[1]}); // // 验证标签存在 // auto beforeTags = tagService->searchSimilarTagNames({testTags[0]}); // EXPECT_FALSE(beforeTags.empty()); // // 删除标签 // std::unordered_set fileIds = {testFileId}; // tagService->deleteFileTags(fileIds); // // 验证标签已被删除 这里当时设计的时候,只去删向量数据库了,sql数据库没有去删 // auto afterTags = tagService->searchSimilarTagNames({testTags[0]}); // // EXPECT_TRUE(afterTags.empty()); // } // TEST_F(TagServiceTest, test04) { // // 添加测试标签 // tagService->updateFileTags(testFileId, {testTags[0]}); // // 搜索相似标签 // auto results = tagService->searchSimilarTagNames({testTags[0]}); // // 验证结果包含我们添加的标签 // EXPECT_FALSE(results.empty()); // EXPECT_NE(results.end(), std::find(results.begin(), results.end(), testTags[0])); // } // TEST_F(TagServiceTest, test05) { // // 添加带标签的文件 // tagService->updateFileTags(testFileId, {testTags[0]}); // // 搜索具有相同标签的文件 // auto results = tagService->searchSimilarFilePaths({testTags[0]}); // // EXPECT_FALSE(results.empty()); // } kylin-ai-data-management-service-1.2.0.0/test/service/testfileInfoService.cpp000066400000000000000000000176301520577635400271700ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include #include #include #include #include "fileInfoService.h" class FileInfoServiceTest : public testing::Test { public: static void SetUpTestSuite() { std::cout << "FileInfoServiceTest SetUpTestSuite..." << std::endl; // 先把本地的向量数据库删了 std::string dbPath = FileDatabase::getDbFilePath(); if (!dbPath.empty()) { std::filesystem::remove(dbPath); } } static void TearDownTestSuite() { std::cout << "FileInfoServiceTest TearDownTestSuite..." << std::endl; } protected: void SetUp() override { std::cout << "FileInfoServiceTest SetUp..." << std::endl; fileInfoService = std::make_unique(); // 创建临时目录 当前测试用例跑完后,会在TearDownTestSuite时删除该目录 testDir = std::filesystem::current_path() / "FileInfoServiceTest"; std::filesystem::create_directories(testDir); // 创建测试文件 testFilePath1 = testDir / "test1.txt"; testFilePath2 = testDir / "test2.txt"; std::ofstream(testFilePath1) << "This is a test file content for similarity search."; std::ofstream(testFilePath2) << "Another test file with different content."; } void TearDown() override { std::cout << "FileInfoServiceTest TearDown..." << std::endl; fileInfoService.reset(); // 清理测试环境 std::filesystem::remove_all(testDir); } static inline std::filesystem::path testDir; static inline std::filesystem::path testFilePath1; static inline std::filesystem::path testFilePath2; std::unique_ptr fileInfoService; }; TEST_F(FileInfoServiceTest, test01) { // 测试区分文件函数 std::unordered_set filePaths = {testFilePath1.string(), testFilePath2.string()}; std::unordered_set newFilePaths; std::vector updateFileInfos; // 第一次分类 当前插入的都是新文件 fileInfoService->classifyFile(filePaths, newFilePaths, updateFileInfos); EXPECT_EQ(newFilePaths.size(), 2); EXPECT_TRUE(updateFileInfos.empty()); // 第二次分类,刚才插入的文件,现在应该都是更新文件 newFilePaths.clear(); fileInfoService->insertTask(filePaths); // 先插入数据库 fileInfoService->classifyFile(filePaths, newFilePaths, updateFileInfos); EXPECT_TRUE(newFilePaths.empty()); EXPECT_EQ(updateFileInfos.size(), 2); } /* *** debuild的时候获得不到这个hand.docx,所以这个测试用例就注释掉了 *** 如果手动生成文件,读取时间是一直变化的,所以当时才用的本地文件 *** 就导致了测试结果和我的预期不一样 */ // TEST_F(FileInfoServiceTest, test02) { // namespace fs = std::filesystem; // fs::path dir = fs::path(__FILE__).parent_path(); // fs::path path = dir / "hand.docx"; // // 测试插入和更新 // std::unordered_set filePaths = {path.string()}; // fileInfoService->insertTask(filePaths); // // 验证文件是否已插入 // FileInfoDO fileInfo; // SQLite::Database db = fileInfoService->getDatabase(); // bool success = FileDatabase::getSingleFileInfo(db, path.string(), fileInfo); // EXPECT_TRUE(success); // EXPECT_EQ(fileInfo.filePath, path.string()); // std::cout << fileInfo.fileId << " " << fileInfo.filePath << " " << fileInfo.enable << " " << fileInfo.modifyTime // << " " << fileInfo.fileSummary << std::endl; // // 测试更新任务 // // 由于前面把db给拿出来了,所以之前的FileInfoService中私有成员db用不了,需要重新创建FileInfoService对象 // fileInfoService.reset(); // std::unique_ptr fileInfoServiceTmp = std::make_unique(); // std::vector fileInfos = {fileInfo}; // auto updateIds = fileInfoServiceTmp->updateTask(fileInfos); // EXPECT_EQ(updateIds.size(), 0); // 这里是0 如果文件没有修改,不会更新,这里应该为0 // for (auto& fileInfo : fileInfos) { // std::cout << fileInfo.fileId << " " << fileInfo.filePath << " " << fileInfo.enable << " " << fileInfo.modifyTime // << " " << fileInfo.fileSummary << std::endl; // } // // 修改文件内容,在插入,这个时候updateIds.size为1 // std::ofstream(path, std::ios::app) << "This is an updated test file content."; // updateIds = fileInfoServiceTmp->updateTask(fileInfos); // EXPECT_EQ(updateIds.size(), 1); // } TEST_F(FileInfoServiceTest, test03) { // 测试获取文件最后修改时间 auto modifyTime = fileInfoService->getLastModifyTime(testFilePath1.string()); EXPECT_NE(modifyTime, "0000-00-00 00:00:00"); // 测试不存在的文件 auto nonExistTime = fileInfoService->getLastModifyTime((testDir / "nonexist.txt").string()); EXPECT_EQ(nonExistTime, "0000-00-00 00:00:00"); } // TEST_F(FileInfoServiceTest, test04) { // // 测试搜索文件 // std::unordered_set filePaths = {testFilePath1.string(), testFilePath2.string()}; // fileInfoService->insertTask(filePaths); // // 由于索引是闲时创建,所以这里暂时只判断接口是否成功 // SimilaritySearchResult result; // auto ret = fileInfoService->searchFilesByText("test", result); // EXPECT_EQ(ret, SUCCESS); // // 对于空搜索 // SimilaritySearchResult emptyResult; // ret = fileInfoService->searchFilesByText("", emptyResult); // EXPECT_EQ(ret, EMBEDDING_FAILED); // 会返回向量化错误码 // EXPECT_TRUE(emptyResult.empty()); // } TEST_F(FileInfoServiceTest, test05) { // 测试 DeleteTextFileVectorInfo std::unordered_set filePaths = {testFilePath1.string()}; fileInfoService->insertTask(filePaths); SQLite::Database db = fileInfoService->getDatabase(); FileInfoDO fileInfo; FileDatabase::getSingleFileInfo(db, testFilePath1.string(), fileInfo); std::unordered_set fileIds = {fileInfo.fileId}; // 测试删除向量信息 // insertTask闲时后 服务才会去插入向量信息 测试用例虽然没有实际插入,但是删除不存的向量,要返回true bool success = FileInfoService::deleteTextFileVectorInfo(fileIds); EXPECT_TRUE(success); // 测试空输入 success = FileInfoService::deleteTextFileVectorInfo({}); EXPECT_TRUE(success); } TEST_F(FileInfoServiceTest, test06) { // 先插入测试文件 fileInfoService->insertTask({testFilePath1.string()}); SQLite::Database db = fileInfoService->getDatabase(); FileInfoDO fileInfo; FileDatabase::getSingleFileInfo(db, testFilePath1.string(), fileInfo); // 测试删除文件 fileInfoService.reset(); std::unique_ptr fileInfoServiceTmp = std::make_unique(); bool success = fileInfoServiceTmp->deleteFiles({fileInfo.fileId}); EXPECT_TRUE(success); // 验证文件是否已删除 FileInfoDO deletedInfo; success = FileDatabase::getSingleFileInfo(db, testFilePath1.string(), deletedInfo); EXPECT_FALSE(success); // 测试空输入 success = fileInfoServiceTmp->deleteFiles({}); EXPECT_TRUE(success); }kylin-ai-data-management-service-1.2.0.0/test/testController.cpp000066400000000000000000000026461520577635400246000ustar00rootroot00000000000000/* * Copyright (C) 2024 KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program. If not, see . */ #include #include class testController : public testing::Test { public: // 所有案例执行之前 static void SetUpTestSuite(); // 所有案例执行之后 static void TearDownTestSuite(); protected: // 每个案例之前执行 void SetUp() override { std::cout << "testController SetUp..." << std::endl; } // 每个案例之后执行 void TearDown() override { std::cout << "testController TearDown..." << std::endl; } private: }; void testController::SetUpTestSuite() { std::cout << "testController SetUpTestSuite..." << std::endl; } void testController::TearDownTestSuite() { std::cout << "testController TearDownTestSuite..." << std::endl; } TEST_F(testController, Controller) {}